In [1]:
import os
import sys
project_dir = os.path.join(os.pardir, os.pardir)
sys.path.append(project_dir)

import dotenv
dotenv_path = os.path.join(project_dir, '.env')
dotenv.load_dotenv(dotenv_path)

import time
import numpy as np
import pandas as pd
import geopandas as gpd
from sqlalchemy import MetaData, create_engine, extract, select
from sqlalchemy.engine.url import URL
from sqlalchemy.sql import or_
import datetime
from pytz import timezone
from shapely.geometry import Point

from src.data.processing_func import (get_direction, connect_database, extract_geo_sections)
from src.data.load_func import (extract_jps,
                                transf_flow_features,
                                transf_flow_labels)

pd.options.display.max_columns = 30

In [2]:
#Connection and initial setup
meta = connect_database()

date_begin = datetime.date(day=1, month=10, year=2017)
date_end = datetime.date(day=25, month=10, year=2017)

periods = [(7,9), (17,19)]

df_jps = extract_jps(meta, date_begin, date_end, periods=periods, weekends=True, summary=False)
df_jps.sample(5)

Unnamed: 0,MgrcDateStart,JpsId,SctnId,JamId,JamIndLevelOfTraffic,JamQtdLengthMeters,JamSpdMetersPerSecond,JamTimeDelayInSeconds,JamDscCoordinatesLonLat,JamSpdKmPerHour,LonDirection,LatDirection,date,hour,minute,period,minute_bin
13908,2017-10-03 18:51:00-03:00,226299.0,14009.0,481770.0,4.0,285.0,1.541667,157.0,"[{'x': -48.846533, 'y': -26.303657}, {'x': -48...",5.55,West,North,2017-10-03,18,51,1,45 a 59
6270,2017-10-02 18:31:00-03:00,277696.0,3443.0,479140.0,3.0,1349.0,5.036111,153.0,"[{'x': -48.847522, 'y': -26.337247}, {'x': -48...",18.13,East,South,2017-10-02,18,31,1,30 a 44
152489,2017-10-19 18:36:00-02:00,510550.0,7382.0,542635.0,3.0,587.0,2.469444,167.0,"[{'y': -26.289683, 'x': -48.845373}, {'y': -26...",8.89,West,South,2017-10-19,18,36,1,30 a 44
114838,2017-10-16 18:56:00-02:00,495807.0,15081.0,526635.0,3.0,1389.0,6.061111,123.0,"[{'x': -48.8289, 'y': -26.30954}, {'x': -48.82...",21.82,East,North,2017-10-16,18,56,1,45 a 59
6463,2017-10-02 18:31:00-03:00,206212.0,6098.0,479203.0,3.0,580.0,4.763889,65.0,"[{'x': -48.819743, 'y': -26.254253}, {'x': -48...",17.15,West,North,2017-10-02,18,31,1,30 a 44


In [3]:
#Filter holidays from list of holidays in Brazil, from ANBIMA
holidays = pd.read_excel(project_dir + "/data/external/feriados_nacionais.xls", skip_footer=9)
holidays["Data"] = holidays["Data"].dt.date
holiday_list = holidays["Data"].tolist()
df_jps = df_jps[~df_jps["date"].isin(holiday_list)]

#Aggregate traffic in slots of 15 minutes
jps_per_timeslot = df_jps.groupby(["SctnId", "hour",
                                 "minute_bin", "LonDirection","LatDirection"]) \
                                      .agg({"JpsId": ['count'],
                                           "JamQtdLengthMeters": ["mean"],
                                           "JamSpdKmPerHour": ["mean"],
                                           "JamTimeDelayInSeconds": ["mean"],
                                           "JamIndLevelOfTraffic": ["mean"],
                                           "period": ["max"],
                                           })
jps_per_timeslot.columns = [''.join(col_name).strip() for col_name in jps_per_timeslot.columns.values]
jps_per_timeslot.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,JpsIdcount,JamQtdLengthMetersmean,JamSpdKmPerHourmean,JamTimeDelayInSecondsmean,JamIndLevelOfTrafficmean,periodmax
SctnId,hour,minute_bin,LonDirection,LatDirection,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3071.0,7,45 a 59,West,South,8,298.625,6.675,111.5,4.0,-1
6097.0,8,45 a 59,West,North,1,977.0,20.42,85.0,2.0,-1
7404.0,18,15 a 29,East,South,34,775.029412,11.107059,191.705882,2.970588,1
15178.0,7,0 a 14,East,North,1,794.0,22.56,69.0,3.0,-1
5121.0,17,0 a 14,West,South,1,253.0,7.27,93.0,3.0,1


In [4]:
#Merge dataset with total number of waze signals, for each bin
jps_per_timeslot.reset_index(level=["SctnId", "LonDirection","LatDirection"], inplace=True)

wazesignals_per_timeslot = df_jps.groupby(["hour", "minute_bin"]).agg({"MgrcDateStart": [pd.Series.nunique]})
wazesignals_per_timeslot.columns = ["".join(x) for x in wazesignals_per_timeslot.columns.ravel()]
jps_per_timeslot = jps_per_timeslot.join(wazesignals_per_timeslot, how="outer")

jps_per_timeslot["traffic_prob"] = jps_per_timeslot["JpsIdcount"]/jps_per_timeslot["MgrcDateStartnunique"]

jps_per_timeslot.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,SctnId,LonDirection,LatDirection,JpsIdcount,JamQtdLengthMetersmean,JamSpdKmPerHourmean,JamTimeDelayInSecondsmean,JamIndLevelOfTrafficmean,periodmax,MgrcDateStartnunique,traffic_prob
hour,minute_bin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
18,45 a 59,13713.0,East,South,3,537.0,12.553333,74.666667,3.0,1,69,0.043478
8,15 a 29,3743.0,West,North,6,520.0,11.476667,114.166667,3.166667,-1,69,0.086957
18,15 a 29,11815.0,West,North,2,568.5,14.32,77.5,2.5,1,69,0.028986
7,30 a 44,2165.0,East,South,5,472.0,10.286,130.0,3.4,-1,69,0.072464
17,0 a 14,15732.0,East,South,5,1317.8,17.71,179.6,2.6,1,69,0.072464


In [5]:
#Merge dataset with official street sections from the Municipality
geo_sections = extract_geo_sections(meta, buffer=10)
jps_per_timeslot.reset_index(inplace=True)
geo_jps_per_timeslot = geo_sections.merge(jps_per_timeslot, how="inner", on="SctnId")
geo_jps_per_timeslot.set_index(["SctnId", "SctnDscNome", "LonDirection","LatDirection", "hour", "minute_bin"], inplace=True)

columns = {"MgrcDateStartnunique": "Total de sinais do Waze",
         "JpsIdcount": "Engarrafamentos registrados",
         "traffic_prob":"traffic_prob",
         "JamSpdKmPerHourmean": "Velocidade Média (km/h)",
         "JamQtdLengthMetersmean": "Fila média (m)",
         "JamTimeDelayInSecondsmean": "Atraso médio (s)",
         "JamIndLevelOfTrafficmean": "Nível médio de congestionamento (0 a 5)",
         "periodmax": "period",
        }
geo_jps_per_timeslot.rename(columns=columns, inplace=True)
col_list = [col for col in columns.values()]
col_list.append("section_LineString")
geo_jps_per_timeslot = geo_jps_per_timeslot[col_list]
geo_jps_per_timeslot.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Total de sinais do Waze,Engarrafamentos registrados,traffic_prob,Velocidade Média (km/h),Fila média (m),Atraso médio (s),Nível médio de congestionamento (0 a 5),period,section_LineString
SctnId,SctnDscNome,LonDirection,LatDirection,hour,minute_bin,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5321,OTTO NASS,West,South,8,15 a 29,69,1,0.014493,6.58,227.0,85.0,3.0,-1,"POLYGON ((-48.851433290526 -26.26074663231489,..."
14709,ADOLFO DA VEIGA,West,North,18,45 a 59,69,2,0.028986,17.375,882.0,101.5,2.5,1,POLYGON ((-48.82888350659901 -26.3676618853761...
15908,PREFEITO HELMUT FALLGATTER,West,South,7,30 a 44,69,4,0.057971,11.5375,1405.25,309.75,3.25,-1,POLYGON ((-48.8189479701395 -26.29593131760885...
9659,BIGUACU,West,North,18,30 a 44,69,2,0.028986,5.22,220.0,110.0,3.0,1,POLYGON ((-48.83886071808701 -26.2933072121553...
6619,AVENIDA DOUTOR ALBANO SCHULZ,East,South,17,0 a 14,69,2,0.028986,11.565,653.0,139.0,3.0,1,POLYGON ((-48.84302063675746 -26.2951634743250...


In [6]:
#Show probability and criticity indicators only for sections of interest
if 'sections_interest' not in globals():
    sections_interest = pd.read_csv(project_dir + "/data/external/vias_estudo.csv", index_col=0, decimal=',')
    
sections_interest.columns = sections_interest.columns.str.strip() 
sections_interest["geometry"] = sections_interest.apply(
                                    lambda row: Point(row["Longitude"], row["Latitude"]), axis=1)
crs = geo_jps_per_timeslot.crs
geo_sections_interest = gpd.GeoDataFrame(sections_interest, crs=crs, geometry="geometry")
prob_matrix = gpd.sjoin(geo_sections_interest, geo_jps_per_timeslot, how="left", op="within")
prob_matrix.sample(5)

Unnamed: 0,Latitude,Longitude,Endereço,Sentido,geometry,SctnId,SctnDscNome,LonDirection,LatDirection,hour,minute_bin,Total de sinais do Waze,Engarrafamentos registrados,traffic_prob,Velocidade Média (km/h),Fila média (m),Atraso médio (s),Nível médio de congestionamento (0 a 5),period
42,-26.32169,-48.83925,RUA FLORIANÓPOLIS,N/S,POINT (-48.83925 -26.32169),13990.0,FLORIANOPOLIS,East,South,17.0,30 a 44,69.0,37.0,0.536232,9.550811,526.243243,147.243243,3.162162,1.0
25,-26.30549,-48.84107,AV. CEL. PROCÓPIO GOMES,N/S,POINT (-48.84107 -26.30549),6271.0,AVENIDA DOUTOR PAULO MEDEIROS,East,South,8.0,45 a 59,71.0,1.0,0.014085,9.26,444.0,111.0,3.0,-1.0
43,-26.31261,-26.31261,AV. GETÚLIO VARGAS,N/S,POINT (-26.31261 -26.31261),,,,,,,,,,,,,,
17,-26.2966,-48.84255,AV. DR. ALBANO SCHULZ,N/S,POINT (-48.84255 -26.2966),665.0,AVENIDA DOUTOR ALBANO SCHULZ,West,North,17.0,0 a 14,69.0,4.0,0.057971,16.205,685.0,96.25,2.75,1.0
16,-26.29591,-48.84206,AV. HERMANN AUGUST LEPPER,S/N,POINT (-48.84206 -26.29591),13245.0,AVENIDA HERMANN AUGUST LEPPER,West,North,17.0,45 a 59,69.0,21.0,0.304348,11.864286,640.857143,151.238095,2.571429,1.0


In [7]:
prob_matrix["notraffic_prob"] = 1 - prob_matrix["traffic_prob"]
prob_matrix["weighted_Velocidade Média (km/h)"] = prob_matrix["traffic_prob"]*prob_matrix["Velocidade Média (km/h)"]
prob_matrix["weighted_Fila média (m)"] = prob_matrix["traffic_prob"]*prob_matrix["Fila média (m)"]
prob_matrix["weighted_Atraso médio (s)"] = prob_matrix["traffic_prob"]*prob_matrix["Atraso médio (s)"]
prob_matrix["weighted_Nível médio de congestionamento (0 a 5)"] = prob_matrix["traffic_prob"]*prob_matrix["Nível médio de congestionamento (0 a 5)"]

traffic_indicators = prob_matrix.groupby(["SctnId", "SctnDscNome",
                                          "Longitude", "Latitude",
                                          "LonDirection", "LatDirection",
                                          "period"]).agg({'notraffic_prob': np.prod,
                                                          'traffic_prob': np.sum,
                                                           "weighted_Velocidade Média (km/h)": np.sum,
                                                           "weighted_Fila média (m)": np.sum,
                                                           "weighted_Atraso médio (s)": np.sum,
                                                           "weighted_Nível médio de congestionamento (0 a 5)": np.sum}) 


traffic_indicators["Probabilidade de Trânsito"] = 1 - traffic_indicators["notraffic_prob"]
traffic_indicators["Velocidade Média (km/h)"] = traffic_indicators["weighted_Velocidade Média (km/h)"] / traffic_indicators["traffic_prob"]
traffic_indicators["Fila média (m)"] = traffic_indicators["weighted_Fila média (m)"] / traffic_indicators["traffic_prob"]
traffic_indicators["Atraso médio (s)"] = traffic_indicators["weighted_Atraso médio (s)"] / traffic_indicators["traffic_prob"]
traffic_indicators["Nível médio de congestionamento (0 a 5)"] = traffic_indicators["weighted_Nível médio de congestionamento (0 a 5)"] / traffic_indicators["traffic_prob"]

traffic_indicators = traffic_indicators[["Probabilidade de Trânsito",
     "Velocidade Média (km/h)",
     "Fila média (m)",
     "Atraso médio (s)",
     "Nível médio de congestionamento (0 a 5)"
     ]
  ]

traffic_indicators.sort_values(by="Probabilidade de Trânsito", ascending=False).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Probabilidade de Trânsito,Velocidade Média (km/h),Fila média (m),Atraso médio (s),Nível médio de congestionamento (0 a 5)
SctnId,SctnDscNome,Longitude,Latitude,LonDirection,LatDirection,period,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
13990.0,FLORIANOPOLIS,-48.83925,-26.32169,East,South,1.0,0.999042,9.502762,542.434921,154.028571,3.184127
13990.0,FLORIANOPOLIS,-48.8392,-26.32173,East,South,1.0,0.999042,9.502762,542.434921,154.028571,3.184127
7480.0,ITAIOPOLIS,-48.84354,-26.28744,East,South,1.0,0.996085,9.082803,566.382166,144.458599,3.044586
5515.0,NOVE DE MARCO,-48.84205,-26.30177,East,South,1.0,0.990553,9.177833,593.466667,142.345833,2.841667
8207.0,GETULIO VARGAS,-48.84529,-26.31577,East,South,1.0,0.986876,10.895223,1014.245536,228.241071,3.308036
5438.0,DOUTOR PLACIDO OLIMPIO DE OLIVEIRA,-48.84112,-26.3111,East,North,1.0,0.981805,6.776986,590.607656,273.909091,3.100478
2717.0,DOUTOR PLACIDO OLIMPIO DE OLIVEIRA,-48.8404,-26.31108,East,North,1.0,0.94681,6.870062,605.771605,269.993827,3.061728
11433.0,NOVE DE MARCO,-48.84205,-26.30177,East,South,1.0,0.934232,8.860774,614.593548,157.451613,2.916129
5515.0,NOVE DE MARCO,-48.84205,-26.30177,East,South,-1.0,0.908037,10.493589,649.532801,121.930526,2.74578
13245.0,AVENIDA HERMANN AUGUST LEPPER,-48.84206,-26.29591,West,North,1.0,0.89112,11.452422,652.648437,151.945312,2.757812


In [9]:
#Save as csv
traffic_indicators.to_csv(project_dir + "/data/processed/traffic_indicators.csv")