In [1]:
import os
import sys
project_dir = os.path.join(os.pardir, os.pardir)
sys.path.append(project_dir)

import dotenv
dotenv_path = os.path.join(project_dir, '.env')
dotenv.load_dotenv(dotenv_path)

import time
import numpy as np
import pandas as pd
import geopandas as gpd
from sqlalchemy import MetaData, create_engine, extract, select
from sqlalchemy.engine.url import URL
from sqlalchemy.sql import or_
import datetime
from pytz import timezone
from shapely.geometry import Point

from src.data.processing_func import (get_direction, connect_database, extract_geo_sections)
from src.data.load_func import (extract_jps,
                                transf_flow_features,
                                transf_flow_labels)

pd.options.display.max_columns = 30

In [2]:
#Connection and initial setup
meta = connect_database()

date_begin = datetime.date(day=12, month=10, year=2017)
date_end = datetime.date(day=18, month=10, year=2017)

periods = [(7,9), (17,19)]

df_jps = extract_jps(meta, date_begin, date_end, periods=periods, weekends=True, summary=False)
df_jps.sample(5)

Unnamed: 0,MgrcDateStart,JpsId,SctnId,JamId,JamIndLevelOfTraffic,JamQtdLengthMeters,JamSpdMetersPerSecond,JamTimeDelayInSeconds,JamDscCoordinatesLonLat,JamSpdKmPerHour,LonDirection,LatDirection,date,hour,minute,period,minute_bin
35374,2017-10-17 18:41:00-02:00,482111.0,5127.0,532254.0,3.0,1143.0,2.786111,306.0,"[{'y': -26.334296, 'x': -48.831079}, {'y': -26...",10.03,East,South,2017-10-17,18,41,1,30 a 44
14604,2017-10-16 17:51:00-02:00,495160.0,15674.0,525445.0,3.0,1917.0,5.572222,175.0,"[{'y': -26.331778, 'x': -48.84696}, {'y': -26....",20.06,East,South,2017-10-16,17,51,1,45 a 59
4245,2017-10-13 18:01:00-03:00,436734.0,3127.0,518033.0,2.0,1198.0,5.777778,93.0,"[{'y': -26.324184, 'x': -48.817529}, {'y': -26...",20.8,East,South,2017-10-13,18,1,1,0 a 14
16377,2017-10-16 18:06:00-02:00,488236.0,8128.0,525713.0,3.0,853.0,3.744444,161.0,"[{'x': -48.848246, 'y': -26.292419}, {'x': -48...",13.48,West,North,2017-10-16,18,6,1,0 a 14
32891,2017-10-17 18:21:00-02:00,482106.0,5127.0,531869.0,2.0,1027.0,6.294444,72.0,"[{'x': -48.821581, 'y': -26.340999}, {'x': -48...",22.66,West,North,2017-10-17,18,21,1,15 a 29


In [5]:
#Filter holidays from list of holidays in Brazil, from ANBIMA
holidays = pd.read_excel(project_dir + "/data/external/feriados_nacionais.xls", skip_footer=9)
holidays["Data"] = holidays["Data"].dt.date
holiday_list = holidays["Data"].tolist()
df_jps = df_jps[~df_jps["date"].isin(holiday_list)]

#Aggregate traffic in slots of 15 minutes
jps_per_timeslot = df_jps.groupby(["SctnId", "hour",
                                 "minute_bin", "LonDirection","LatDirection"]) \
                                      .agg({"JpsId": ['count'],
                                           "JamQtdLengthMeters": ["mean"],
                                           "JamSpdKmPerHour": ["mean"],
                                           "JamTimeDelayInSeconds": ["mean"],
                                           "JamIndLevelOfTraffic": ["mean"],
                                           "period": ["max"],
                                           })
jps_per_timeslot.columns = [''.join(col_name).strip() for col_name in jps_per_timeslot.columns.values]
jps_per_timeslot.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,JpsIdcount,JamQtdLengthMetersmean,JamSpdKmPerHourmean,JamTimeDelayInSecondsmean,JamIndLevelOfTrafficmean,periodmax
SctnId,hour,minute_bin,LonDirection,LatDirection,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
12814.0,7,15 a 29,West,North,4,1080.0,8.125,386.25,3.25,-1
5483.0,7,30 a 44,East,South,1,423.0,10.08,80.0,3.0,-1
11267.0,7,45 a 59,West,North,2,178.0,4.39,119.5,3.0,-1
14373.0,7,15 a 29,West,South,1,1011.0,17.47,71.0,2.0,-1
9471.0,18,45 a 59,East,North,3,10149.333333,22.91,1239.333333,3.0,1


In [6]:
#Merge dataset with total number of waze signals, for each bin
jps_per_timeslot.reset_index(level=["SctnId", "LonDirection","LatDirection"], inplace=True)


wazesignals_per_timeslot = df_jps.groupby(["hour", "minute_bin"]).agg({"MgrcDateStart": [pd.Series.nunique]})
wazesignals_per_timeslot.columns = ["".join(x) for x in wazesignals_per_timeslot.columns.ravel()]
jps_per_timeslot = jps_per_timeslot.join(wazesignals_per_timeslot, how="outer")

jps_per_timeslot["traffic_prob"] = jps_per_timeslot["JpsIdcount"]/jps_per_timeslot["MgrcDateStartnunique"]

jps_per_timeslot.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,SctnId,LonDirection,LatDirection,JpsIdcount,JamQtdLengthMetersmean,JamSpdKmPerHourmean,JamTimeDelayInSecondsmean,JamIndLevelOfTrafficmean,periodmax,MgrcDateStartnunique,traffic_prob
hour,minute_bin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
18,30 a 44,15124.0,East,North,3,1684.666667,16.57,235.0,3.0,1,15,0.2
17,15 a 29,6266.0,West,North,5,542.0,9.98,124.0,3.0,1,15,0.333333
18,30 a 44,6757.0,West,South,4,1696.0,15.3375,241.75,3.0,1,15,0.266667
18,45 a 59,15078.0,East,North,6,1382.833333,21.758333,127.5,2.666667,1,15,0.4
7,45 a 59,15073.0,West,North,5,753.2,17.45,93.6,3.0,-1,15,0.333333


In [7]:
#Merge dataset with official street sections from the Municipality
geo_sections = extract_geo_sections(meta, buffer=10)
jps_per_timeslot.reset_index(inplace=True)
geo_jps_per_timeslot = geo_sections.merge(jps_per_timeslot, how="inner", on="SctnId")
geo_jps_per_timeslot.set_index(["SctnId", "SctnDscNome", "LonDirection","LatDirection", "hour", "minute_bin"], inplace=True)

columns = {"MgrcDateStartnunique": "Total de sinais do Waze",
         "JpsIdcount": "Engarrafamentos registrados",
         "traffic_prob":"traffic_prob",
         "JamSpdKmPerHourmean": "Velocidade Média (km/h)",
         "JamQtdLengthMetersmean": "Fila média (m)",
         "JamTimeDelayInSecondsmean": "Atraso médio (s)",
         "JamIndLevelOfTrafficmean": "Nível médio de congestionamento (0 a 5)",
         "periodmax": "period",
        }
geo_jps_per_timeslot.rename(columns=columns, inplace=True)
col_list = [col for col in columns.values()]
col_list.append("section_LineString")
geo_jps_per_timeslot = geo_jps_per_timeslot[col_list]
geo_jps_per_timeslot.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Total de sinais do Waze,Engarrafamentos registrados,traffic_prob,Velocidade Média (km/h),Fila média (m),Atraso médio (s),Nível médio de congestionamento (0 a 5),period,section_LineString
SctnId,SctnDscNome,LonDirection,LatDirection,hour,minute_bin,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
8483,OTTO PFUETZENREUTER,East,South,18,0 a 14,15,2,0.133333,15.925,549.0,76.5,3.0,1,POLYGON ((-48.87215306745956 -26.2697146342054...
8781,DONA FRANCISCA,East,South,17,45 a 59,15,1,0.066667,27.21,1283.0,74.0,2.0,1,POLYGON ((-48.86717144763441 -26.2586790074454...
5211,RIO BRANCO,East,North,18,45 a 59,15,2,0.133333,4.81,225.0,123.0,3.5,1,POLYGON ((-48.84385303163265 -26.3035136218178...
6709,SANTA CATARINA,East,South,18,45 a 59,15,5,0.333333,19.926,1067.4,106.8,2.6,1,POLYGON ((-48.84738250871619 -26.3382822562144...
15901,PREFEITO HELMUT FALLGATTER,West,South,7,45 a 59,15,6,0.4,13.836667,1950.833333,415.333333,3.166667,-1,POLYGON ((-48.82441906245155 -26.3010532143063...


In [None]:
#Show probability and criticity indicators only for sections of interest
if 'sections_interest' not in globals():
    sections_interest = pd.read_csv(project_dir + "/data/external/vias_estudo.csv", index_col=0, decimal=',')
    
sections_interest.columns = sections_interest.columns.str.strip() 
sections_interest["geometry"] = sections_interest.apply(
                                    lambda row: Point(row["Longitude"], row["Latitude"]), axis=1)
crs = geo_jps_per_timeslot.crs
geo_sections_interest = gpd.GeoDataFrame(sections_interest, crs=crs, geometry="geometry")
prob_matrix = gpd.sjoin(geo_sections_interest, geo_jps_per_timeslot, how="left", op="within")
prob_matrix.sample(5)

In [None]:
prob_matrix["notraffic_prob"] = 1 - prob_matrix["traffic_prob"]
prob_matrix["weighted_Velocidade Média (km/h)"] = prob_matrix["traffic_prob"]*prob_matrix["Velocidade Média (km/h)"]
prob_matrix["weighted_Fila média (m)"] = prob_matrix["traffic_prob"]*prob_matrix["Fila média (m)"]
prob_matrix["weighted_Atraso médio (s)"] = prob_matrix["traffic_prob"]*prob_matrix["Atraso médio (s)"]
prob_matrix["weighted_Nível médio de congestionamento (0 a 5)"] = prob_matrix["traffic_prob"]*prob_matrix["Nível médio de congestionamento (0 a 5)"]

traffic_indicators = prob_matrix.groupby(["SctnId", "SctnDscNome", "Longitude", "Latitude", "LonDirection", "LatDirection", "period"]).agg({'notraffic_prob': np.prod,
                                                                      'traffic_prob': np.sum,
                                                                       "weighted_Velocidade Média (km/h)": np.sum,
                                                                       "weighted_Fila média (m)": np.sum,
                                                                       "weighted_Atraso médio (s)": np.sum,
                                                                       "weighted_Nível médio de congestionamento (0 a 5)": np.sum}) 


traffic_indicators["Probabilidade de Trânsito"] = 1 - traffic_indicators["notraffic_prob"]
traffic_indicators["Velocidade Média (km/h)"] = traffic_indicators["weighted_Velocidade Média (km/h)"] / traffic_indicators["traffic_prob"]
traffic_indicators["Fila média (m)"] = traffic_indicators["weighted_Fila média (m)"] / traffic_indicators["traffic_prob"]
traffic_indicators["Atraso médio (s)"] = traffic_indicators["weighted_Atraso médio (s)"] / traffic_indicators["traffic_prob"]
traffic_indicators["Nível médio de congestionamento (0 a 5)"] = traffic_indicators["weighted_Nível médio de congestionamento (0 a 5)"] / traffic_indicators["traffic_prob"]

traffic_indicators = traffic_indicators[["Probabilidade de Trânsito",
     "Velocidade Média (km/h)",
     "Fila média (m)",
     "Atraso médio (s)",
     "Nível médio de congestionamento (0 a 5)"
     ]
  ]

traffic_indicators.sort_values(by="Probabilidade de Trânsito", ascending=False).head(20)

In [None]:
#Save as csv
traffic_indicators.to_csv(project_dir + "/data/processed/traffic_indicators.csv")