# Demanda SPTrans
Este jupyter notebook baixa dados de demanda das linhas da SPTrans

In [29]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import io
import os

In [10]:
TIME_STEP = 15

def getTimeRangeTags(datestring, dateformat='%Y-%m-%d', zerodate = datetime(2020,1,1)):
    '''
    Returns the sequetial number and start date of the week from a given datestring
    '''
    datedelta = datetarget - zerodate
    deltadays = datedelta.days
    timetag = (deltadays//TIME_STEP) + 1
    refday = zerodate + timedelta(days=(timetag-1)*TIME_STEP)
    
    d = {'text':f'{("000"+str(timetag))[-3:]} ({datetarget.year})',
         'refday':refday}
    return d

def is_business_day(date):
    return bool(len(pd.bdate_range(date, date)))

In [3]:
url = 'https://www.prefeitura.sp.gov.br/cidade/secretarias/upload/{0}.xls'

In [31]:
meses ={
    1:'JAN',
    2:'FEV',
    3:'MAR',
    4:'ABR',
    5:'MAI',
    6:'JUN',
    7:'JUL',
    8:'AGO',
    9:'SET',
    10:'OUT',
    11:'NOV',
    12:'DEZ'
}

sdate = datetime(2019,1,1)   # start date
edate = datetime(2022,10,31)   # end date

target_dates = list(pd.date_range(sdate,edate-timedelta(days=1),freq='d'))
target_dates = [f'{("00"+str(d.day))[-2:]}{meses[d.month]}{d.year}' for d in target_dates]

In [32]:
### Download files

for date in target_dates:
    try:
        filename = f'sptrans/{date}.xls'
        if not os.path.isfile(filename):
            r =requests.get(url.format(date))
            with open(filename, 'wb') as f:
                f.write(r.content)
    except:
        print(f'Erro em {date}')
        continue

In [35]:
df = pd.DataFrame()

for date in target_dates:
    try:
        filename = f'sptrans/{date}.xls'
        df_temp = pd.read_excel(filename, skiprows=2)
        df = pd.concat([df, df_temp])
    except:
        print(date)

01JAN2019
02JAN2019
03JAN2019
04JAN2019
05JAN2019
06JAN2019
07JAN2019
08JAN2019
09JAN2019
10JAN2019
11JAN2019
12JAN2019
13JAN2019
14JAN2019
15JAN2019
16JAN2019
17JAN2019
18JAN2019
19JAN2019
20JAN2019
21JAN2019
22JAN2019
23JAN2019
24JAN2019
25JAN2019
26JAN2019
27JAN2019
28JAN2019
29JAN2019
30JAN2019
31JAN2019
01FEV2019
02FEV2019
03FEV2019
04FEV2019
05FEV2019
06FEV2019
07FEV2019
08FEV2019
09FEV2019
10FEV2019
11FEV2019
12FEV2019
13FEV2019
14FEV2019
15FEV2019
16FEV2019
17FEV2019
18FEV2019
19FEV2019
20FEV2019
21FEV2019
22FEV2019
23FEV2019
24FEV2019
25FEV2019
26FEV2019
27FEV2019
28FEV2019


In [36]:
df['Data'] = df['Data'].apply(lambda x: x if isinstance(x, pd.Timestamp) else datetime. strptime(x, '%d/%m/%Y'))
df['Dia_útil'] = df['Data'].apply(is_business_day)
df['ano-mes'] = df['Data'].apply(lambda x: f'{x.year}' + '-' + (f'0{x.month}'[-2:]))
df['CodLinha'] = df['Linha'].apply(lambda x: x.split(' ')[0])
df_sptrans = df.drop_duplicates()
df_sptrans.head()

Unnamed: 0,Data,Tipo,Area,Empresa,Linha,Passageiros Pagtes Em Dinheiro,Passageiros Pagtes Comum,Passageiros Pgts Bu Comum M,Passageiros Pagtes Estudante,Passageiros Pgts Bu Est Mensal,...,Passageiros Int Ônibus->Ônibus,Passageiros Com Gratuidade,Passageiros Com Gratuidade Est,Tot Passageiros Transportados,Passageiros Comum e VT,Grupo,Lote,Dia_útil,ano-mes,CodLinha
0,2019-03-01,CONCESSAO,AREA 1,GATO PRETO,N10511 - TERM CACHOEIRINHA/TERM LAPA,14,47.0,7,1,0,...,122,23,6,269,,,,True,2019-03,N10511
1,2019-03-01,CONCESSAO,AREA 1,GATO PRETO,N14311 - METRO BARRA FUNDA/MORRO GRANDE,4,8.0,4,3,0,...,13,5,1,74,,,,True,2019-03,N14311
2,2019-03-01,CONCESSAO,AREA 1,GATO PRETO,800001 - TERM LAPA/PCA RAMOS DE AZEVEDO,408,1217.0,193,142,0,...,2140,1535,301,7271,,,,True,2019-03,800001
3,2019-03-01,CONCESSAO,AREA 1,GATO PRETO,830001 - TERM PIRITUBA/TERM LAPA,43,159.0,21,12,0,...,330,155,53,1014,,,,True,2019-03,830001
4,2019-03-01,CONCESSAO,AREA 1,GATO PRETO,853810 - VILA IARA/PAISSANDU,130,372.0,34,43,0,...,322,323,87,1727,,,,True,2019-03,853810


In [37]:
df_mean = df_sptrans.query('Dia_útil == True').sort_values(by='ano-mes').fillna(0).pivot_table(index='CodLinha', columns = 'ano-mes', values='Tot Passageiros Transportados', aggfunc='mean').fillna(0).reset_index()
df_mean.to_csv('agregado ônibus.csv', index=False)

In [44]:
df_all = df_sptrans.query('Dia_útil == True').sort_values(by='ano-mes').fillna(0).pivot_table(index= ['Data','ano-mes'], values='Tot Passageiros Transportados', aggfunc='sum').reset_index()\
                    .pivot_table(index= 'ano-mes', values='Tot Passageiros Transportados', aggfunc='mean').fillna(0).reset_index()
df_all

Unnamed: 0,ano-mes,Tot Passageiros Transportados
0,2019-03,8327065.0
1,2019-04,8564868.0
2,2019-05,8990493.0
3,2019-06,8322663.0
4,2019-07,7725656.0
5,2019-08,8518773.0
6,2019-09,8749808.0
7,2019-10,8961552.0
8,2019-11,8281881.0
9,2019-12,7453863.0
