# Exploratory Data Analysis (EDA) of Transport Dataset - Transport

![image-2.png](attachment:image-2.png)

In [1]:
# Import required libraries
import pandas as pd
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()  # Loads from .env by default

## Define base path (adjust if notebook is in different location)
BASE_PATH = os.getenv('BASE_PATH')

# Verify base path exists
if not BASE_PATH or not os.path.exists(BASE_PATH):
    raise ValueError(f"Invalid BASE_PATH: {BASE_PATH}. Check your .env file and directory structure")

In [2]:
raw = pd.read_parquet(os.path.join(BASE_PATH, 'data/1_Bronze/transport.parquet'))

In [3]:
df = raw.copy()

In [4]:
df.head()

Unnamed: 0,Línea,Estación,Acceso de Estación,MES,INTERVALO,DÍA 01,DÍA 02,DÍA 03,DÍA 04,DÍA 05,...,DÍA 25,DÍA 26,DÍA 27,DÍA 28,DÍA 29,DÍA 30,DÍA 31,Total general,Unnamed: 37,Unnamed: 38
0,(11) Zona K Calle 26,(06000) Portal Eldorado,(01) PLAT2 ALIM-DESAL FONTIBÓN/FONTIBÓN CENTRO...,NOVIEMBRE,00:00,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,X,0.0,,
1,(11) Zona K Calle 26,(06000) Portal Eldorado,(01) PLAT2 ALIM-DESAL FONTIBÓN/FONTIBÓN CENTRO...,NOVIEMBRE,00:15,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,X,0.0,,
2,(11) Zona K Calle 26,(06000) Portal Eldorado,(01) PLAT2 ALIM-DESAL FONTIBÓN/FONTIBÓN CENTRO...,NOVIEMBRE,00:30,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,X,0.0,,
3,(11) Zona K Calle 26,(06000) Portal Eldorado,(01) PLAT2 ALIM-DESAL FONTIBÓN/FONTIBÓN CENTRO...,NOVIEMBRE,00:45,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,X,0.0,,
4,(11) Zona K Calle 26,(06000) Portal Eldorado,(01) PLAT2 ALIM-DESAL FONTIBÓN/FONTIBÓN CENTRO...,NOVIEMBRE,01:00,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,X,0.0,,


In [5]:
# list all the unique valies of Línea cuenta column
df['Línea'].unique()

array(['(11) Zona K Calle 26', '(12) Zona L Carrera 10',
       '(30) Zona G NQS Sur', '(31) Zona F Av. Américas',
       '(32) Zona C Av. Suba', '(33) Zona B AutoNorte',
       '(34) Zona H Caracas Sur', '(35) Zona D Calle 80',
       '(36) Zona A Caracas', '(37) Zona J Eje Ambiental',
       '(38) Zona E NQS Central', '(39) Zona F Calle 13',
       '(40) Zona T Ciudad Bolívar', None], dtype=object)

In [6]:
# list all the unique valies of Estación cuenta column
df['Estación'].unique()

array(['(06000) Portal Eldorado', '(06001) Modelia', '(06002) Normandía',
       '(06100) Av. Rojas \x96 UNISALESIANA',
       '(06101) El Tiempo - Camara de Comercio de Bogota',
       '(06102) Salitre El Greco', '(06103) CAN', '(06104) Gobernación',
       '(06105) Quinta Paredes', '(06106) Recinto Ferial',
       '(06107) Ciudad Universitaria - Loteria de Bogota',
       '(06108) Concejo de Bogotá', '(06109) Centro Memoria',
       '(06111) Universidades \x96 CityU', '(50008) Corral Portal Dorado',
       '(10000) Portal 20 de Julio', '(10001) Country Sur',
       '(10002) Av. Primero de Mayo', '(10003) Ciudad Jardín',
       '(10004) Policarpa', '(10005) Bicentenario',
       '(10006) SAN VICTORINO - NEOS CENTRO', '(10007) Las Nieves',
       '(10008) San Diego', '(10009) Museo Nacional',
       '(10010) San Bernardo', '(07000) Portal Sur JFK Coop. Financiera',
       '(07001) PERDOMO', '(07002) MADELENA', '(07003) SEVILLANA',
       '(07004) VENECIA', '(07005) ALQUERIA', '(07006) 

# (END)