In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import numpy as np

In [2]:
stop_times = pd.read_csv('dados\paradas\stop_times.txt', sep=',')
stop_times['arrival_time'] = pd.to_datetime(stop_times['arrival_time'])
stop_times['hour'] = stop_times['arrival_time'].dt.hour

In [3]:
stops = pd.read_csv('dados\paradas\stops.txt', sep=',')

In [4]:
events = pd.merge(stop_times, stops, on='stop_id')
print(events.count())

trip_id           95731
arrival_time      95731
departure_time    95731
stop_id           95731
stop_sequence     95731
hour              95731
stop_name         95731
stop_desc         92027
stop_lat          95731
stop_lon          95731
dtype: int64


In [5]:
av_paulista1 = events[events['stop_name'].str.contains("Av. Paulista", case=False, na=False)]
print(av_paulista1.describe())
av_paulista2 = events[events['stop_desc'].str.contains("Av. Paulista", case=False, na=False)]
print(av_paulista2.describe())

av_paulista = pd.concat([av_paulista1,av_paulista2],ignore_index=True).drop_duplicates()
av_paulista = av_paulista.groupby('stop_id', as_index=False).count()
print(av_paulista.describe())

            stop_id  stop_sequence        hour    stop_lat    stop_lon
count  2.270000e+02     227.000000  227.000000  227.000000  227.000000
mean   2.476644e+08      26.594714   13.700441  -23.563351  -46.654006
std    2.582672e+08      15.886031    4.393091    0.004909    0.005827
min    7.063040e+05       3.000000    0.000000  -23.570405  -46.663271
25%    7.001678e+07      12.000000   11.000000  -23.567308  -46.659585
50%    7.001692e+07      24.000000   16.000000  -23.562931  -46.654428
75%    4.400169e+08      38.500000   17.000000  -23.558947  -46.649529
max    9.200167e+08      66.000000   18.000000  -23.555195  -46.645227
            stop_id  stop_sequence       hour   stop_lat   stop_lon
count  3.600000e+01      36.000000  36.000000  36.000000  36.000000
mean   9.939863e+07      20.583333  14.111111 -23.561787 -46.654936
std    1.117501e+08      13.472459   4.596755   0.003420   0.005153
min    7.063080e+05       8.000000   0.000000 -23.566211 -46.659175
25%    7.063080e+05  

In [6]:
stps = [18850, 18858, 18859, 706354, 9206289, 
        70015949, 70016782, 70016834, 70016916, 
        70016918, 70016924, 260016780, 260016919, 
        260016920, 440016921, 440016922, 440016923,
        920016411, 920016731]

av_paulista = events[events['stop_id'].isin(stps)]


In [7]:
#filter_es_morning = av_paulista.loc[(av_paulista['hour'] >= 7) & (av_paulista['hour'] < 9)].groupby('stop_id', as_index=False).count()
filter_es_morning = av_paulista.groupby('stop_id', as_index=False).count()
filter_es_morning = filter_es_morning[['stop_id', 'trip_id']]
filter_es_morning = filter_es_morning.rename(columns={'trip_id': 'qtd'})


filter_es_morning= pd.merge(filter_es_morning, stops, on='stop_id')
print(filter_es_morning)


      stop_id  qtd                      stop_name  \
0       18850    2                     Consolação   
1       18858    2                     Brigadeiro   
2       18859    2                   Trianon-masp   
3     9206289   20  Av. Bernardino De Campos, 108   
4    70015949   20             Av. Paulista, 1754   
5    70016782   18              Av. Paulista, 200   
6    70016834   18              Av. Paulista, 500   
7    70016916   18             Av. Paulista, 1374   
8    70016918   10                  Frei Caneca 1   
9    70016924   17              Av. Paulista, 659   
10  260016780   13             Av. Paulista, 2319   
11  260016919   11                  Frei Caneca 2   
12  260016920   15             Av. Paulista, 2452   
13  440016921   21             Av. Paulista, 2027   
14  440016922   19             Av. Paulista, 1578   
15  440016923   19              Av. Paulista, 901   
16  920016411   22  Av. Bernardino De Campos, 159   
17  920016731   18              Av. Paulista, 

In [8]:
fig = go.Figure()

fig = px.scatter_mapbox(filter_es_morning, lat="stop_lat", lon="stop_lon", zoom=9, width=600, height=500)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

def SetColor(df):
    values = df['qtd'].tolist()
    color_list = []
    for i in values:
        if(i >= 10):
            color_list.append("red")
        else:
            color_list.append("blue")
    
    return color_list

def SetSize(df):
    values = df['qtd'].tolist()
    color_list = []
    for i in values:
        if(i >= 10):
            color_list.append(10)
        else:
            color_list.append(4)
    
    return color_list

fig.add_trace(go.Scattermapbox(
        lat=filter_es_morning['stop_lat'].tolist(),
        lon=filter_es_morning['stop_lon'].tolist(),
        mode='markers',
        marker=go.scattermapbox.Marker(
            size= SetSize(filter_es_morning),
            color= SetColor(filter_es_morning)  #function gets called here and will return a list of colors, (i.e. ['green', 'blue', 'red', 'green'])
        ),
    )
)

fig.show()