In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import numpy as np

GTFS Data

In [110]:
stops = pd.read_csv('dados\sao_paulo\stops.txt', sep=',')
print(stops.describe())


            stop_id      stop_lat      stop_lon
count  2.092800e+04  20928.000000  20928.000000
mean   3.444969e+08    -23.570825    -46.614287
std    3.151575e+08      0.085744      0.112475
min    1.884800e+04    -23.911109    -46.983928
25%    7.805227e+06    -23.618165    -46.704803
50%    2.900014e+08    -23.558349    -46.635081
75%    6.300150e+08    -23.507502    -46.522022
max    9.600115e+08    -23.195643    -46.184930


In [3]:
stop_times = pd.read_csv('dados\sao_paulo\new_stop_times.txt', sep=',')
stop_times['arrival_time'] = pd.to_datetime(stop_times['arrival_time'])
stop_times['hour'] = stop_times['arrival_time'].dt.hour
print(stop_times.dtypes)


trip_id                   object
arrival_time      datetime64[ns]
departure_time            object
stop_id                    int64
stop_sequence              int64
hour                       int64
dtype: object


In [112]:
events = pd.merge(stop_times, stops, on='stop_id')
print(events.describe())

            stop_id  stop_sequence          hour      stop_lat      stop_lon
count  9.459800e+04   94598.000000  94598.000000  94598.000000  94598.000000
mean   3.803429e+08      26.649739     12.543003    -23.572109    -46.618723
std    3.112520e+08      19.361359      5.927964      0.081352      0.107793
min    1.884800e+04       1.000000      0.000000    -23.911109    -46.983928
25%    5.000483e+07      11.000000      7.000000    -23.620532    -46.703024
50%    3.500024e+08      23.000000     16.000000    -23.560734    -46.641499
75%    6.700098e+08      38.000000     17.000000    -23.511883    -46.535254
max    9.600115e+08     141.000000     23.000000    -23.195643    -46.184930


In [None]:
frequencies = pd.read_csv('dados/sao_paulo/frequencies.txt', sep=',')

frequencies['start_time'] = pd.to_datetime(frequencies['start_time'])
frequencies['end_time'] = pd.to_datetime(frequencies['end_time'])

frequencies['bus_per_hour'] = ((3600/frequencies['headway_secs']) * 1).astype(int)

print(frequencies['bus_per_hour'].describe())

Filters

In [117]:
filter_morning = events.loc[(events['hour'] >= 7) & (events['hour'] < 9)].groupby('stop_id', as_index=False).count()
filter_morning = filter_morning[['stop_id', 'trip_id']]
filter_morning = filter_morning.rename(columns={'trip_id': 'qtd_onibus'})

filter_morning = pd.merge(stops,filter_morning, on='stop_id')

print(filter_morning.loc[(filter_morning['qtd_onibus'] >= 10)].count())
print(filter_morning.count())


stop_id       212
stop_name     212
stop_desc     211
stop_lat      212
stop_lon      212
qtd_onibus    212
dtype: int64
stop_id       6450
stop_name     6450
stop_desc     6120
stop_lat      6450
stop_lon      6450
qtd_onibus    6450
dtype: int64


In [119]:
fig = go.Figure()

fig = px.scatter_mapbox(filter_morning, lat="stop_lat", lon="stop_lon", zoom=9, width=600, height=500)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

def SetColor(df):
    values = df['qtd_onibus'].tolist()
    color_list = []
    for i in values:
        if(i >= 10):
            color_list.append("red")
        else:
            color_list.append("blue")
    
    return color_list

def SetSize(df):
    values = df['qtd_onibus'].tolist()
    color_list = []
    for i in values:
        if(i >= 10):
            color_list.append(10)
        else:
            color_list.append(4)
    
    return color_list

fig.add_trace(go.Scattermapbox(
        lat=filter_morning['stop_lat'].tolist(),
        lon=filter_morning['stop_lon'].tolist(),
        mode='markers',
        marker=go.scattermapbox.Marker(
            size= SetSize(filter_morning),
            color= SetColor(filter_morning)  #function gets called here and will return a list of colors, (i.e. ['green', 'blue', 'red', 'green'])
        ),
    )
)

fig.show()