# Agregar dados de demanda de transporte sobre trilhos
Este jupyter notebook agrega dados obtidos das operadoras de transporte sobre trilhos de São Paulo

In [2]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import tabula

In [3]:
nome_meses = {
    'JAN': '01',
    'FEV': '02',
    'MAR': '03',
    'ABR': '04',
    'MAI': '05',
    'JUN': '06',
    'JUL': '07',
    'AGO': '08',
    'SET': '09',
    'OUT': '10',
    'NOV': '11',
    'DEZ': '12',
}

### Metrô

In [4]:
df_metro = pd.DataFrame()

In [5]:
for n_ano in ['2019','2020']:
    files = [f'metro/{n_ano}/{i}' for i in os.listdir(f'metro/{n_ano}')]
    for f in files:
        n_mes = nome_meses[f.split('-')[-1].strip()[:3].upper()]
        df = pd.read_csv(f, sep=';', encoding='latin-1', skiprows=4)
        df = df.iloc[:,0:8]
        df.dropna(inplace=True)

        dfmes = pd.DataFrame()
        for n in [0, 2, 4, 6]:
            dflinha = df.iloc[1:-1,n:n+2]
            n_linha = int(''.join([x for x in dflinha.columns[0] if x.isdigit()]))
            dflinha.set_axis(['estacao', 'demanda'], axis=1, inplace=True)
            dflinha['linha'] = n_linha
            dfmes = pd.concat([dfmes, dflinha])

        dfmes['data'] = f'{n_ano}-{n_mes}'
        dfmes['estacao'] = dfmes['estacao'].apply(lambda x: ''.join([t for t in x.strip() if not t.isdigit()]))
        dfmes.query('estacao != "-"', inplace=True)
        dfmes['demanda'] = dfmes['demanda'].apply(lambda x: x.strip())
        dfmes['demanda'] = dfmes['demanda'].replace('-',0)
        dfmes['demanda'] = dfmes['demanda'].apply(lambda x: x.replace(',','.') if isinstance(x, str) else x)
        dfmes['demanda'] = dfmes['demanda'].apply(lambda x: 1000*int(x))
        df_metro = pd.concat([df_metro, dfmes])

for n_ano in ['2021','2022']:
    files = [f'metro/{n_ano}/{i}' for i in os.listdir(f'metro/{n_ano}')]
    for f in files:
        df = pd.read_csv(f, sep=';', encoding='latin-1')
        df['estacao'] = df['estacao'].apply(lambda x: ''.join([t for t in x.strip() if not t.isdigit()]))
        df = df.set_index(['linha','estacao']).stack().reset_index().rename(columns = {'level_2':'data', 0:'demanda'})
        df['demanda'] = df['demanda'].apply(lambda x: 1000*int(x))
        df_metro = pd.concat([df_metro, df])

df_metro = df_metro.sort_values(by='data')

### ViaMobilidade

In [6]:
df_viamob = pd.DataFrame()

for n_ano in ['2019','2020','2021','2022']:
    files = [f'viamobilidade/{n_ano}/{i}' for i in os.listdir(f'viamobilidade/{n_ano}')]

    for f in files:
        n_mes = nome_meses[f.replace('-','_').split('_')[-1].strip()[:3].upper()]
        df = tabula.read_pdf(f, pages='all')[0]
        df = df.iloc[:,0:8]
        df.dropna(inplace=True)

        dfmes = pd.DataFrame()
        for n in [0]:
            dflinha = df.iloc[1:-1,n:n+2]
            n_linha = int(''.join([x for x in dflinha.columns[0] if x.isdigit()]))
            dflinha.set_axis(['estacao', 'demanda'], axis=1, inplace=True)
            dflinha['linha'] = n_linha
            dfmes = pd.concat([dfmes, dflinha])

        dfmes['data'] = f'{n_ano}-{n_mes}'
        dfmes['estacao'] = dfmes['estacao'].apply(lambda x: ''.join([t for t in x.strip() if not t.isdigit()]))
        dfmes.query('estacao != "-"', inplace=True)
        dfmes['demanda'] = dfmes['demanda'].replace('-',0)
        dfmes['demanda'] = dfmes['demanda'].apply(lambda x: x.replace(',','.') if isinstance(x, str) else x)
        dfmes['demanda'] = dfmes['demanda'].apply(lambda x: 1000*float(x))
        df_viamob = pd.concat([df_viamob, dfmes])
df_viamob = df_viamob.sort_values(by='data')

### ViaQuatro

In [7]:
df_viaquatro = pd.DataFrame()

for n_ano in ['2019','2020','2021','2022']:
    files = [f'viaquatro/{n_ano}/{i}' for i in os.listdir(f'viaquatro/{n_ano}')]

    for f in files:
        n_mes = nome_meses[f.split('-')[-1].strip()[:3].upper()]
        df = tabula.read_pdf(f, pages='all')[0]
        df = df.iloc[:,0:8]
        df.dropna(inplace=True)

        dfmes = pd.DataFrame()
        for n in [0]:
            dflinha = df.iloc[1:-1,n:n+2]
            n_linha = int(''.join([x for x in dflinha.columns[0] if x.isdigit()]))
            dflinha.set_axis(['estacao', 'demanda'], axis=1, inplace=True)
            dflinha['linha'] = n_linha
            dfmes = pd.concat([dfmes, dflinha])

        dfmes['data'] = f'{n_ano}-{n_mes}'
        dfmes['estacao'] = dfmes['estacao'].apply(lambda x: ''.join([t for t in x.strip() if not t.isdigit()]))
        dfmes.query('estacao != "-"', inplace=True)
        dfmes['demanda'] = dfmes['demanda'].replace('-',0)
        dfmes['demanda'] = dfmes['demanda'].apply(lambda x: x.replace(',','.'))
        dfmes['demanda'] = dfmes['demanda'].apply(lambda x: 1000*float(x))
        df_viaquatro = pd.concat([df_viaquatro, dfmes])
df_viaquatro = df_viaquatro.sort_values(by='data')

### Junção

In [12]:
df_demanda = pd.concat([df_metro, df_viamob, df_viaquatro])
df_demanda = df_demanda.sort_values(by=['linha', 'data'])
df_demanda['estacao'] = df_demanda['estacao'].replace('Japão-Liberdade','Liberdade')
df_demanda

Unnamed: 0,estacao,demanda,linha,data
23,Tucuruvi,62000.0,1,2019-01
22,Parada Inglesa,14000.0,1,2019-01
21,Jardim São Paulo-Ayrton Senna,12000.0,1,2019-01
20,Santana,57000.0,1,2019-01
19,Carandiru,13000.0,1,2019-01
...,...,...,...,...
589,Camilo Haddad,3000.0,15,2022-10
579,São Lucas,5000.0,15,2022-10
569,Oratório,5000.0,15,2022-10
559,Vila Prudente,52000.0,15,2022-10


In [17]:
df_agg = df_demanda.pivot_table(index='estacao', columns='data' ,values='demanda', aggfunc='sum').reset_index()
df_agg.to_csv('agregado estações.csv', index=False, sep='\t')