In [1]:
import os
import sys
project_dir = os.path.join(os.pardir, os.pardir)
sys.path.append(project_dir)

import dotenv
dotenv_path = os.path.join(project_dir, '.env')
dotenv.load_dotenv(dotenv_path)

import time
import numpy as np
import pandas as pd
import geopandas as gpd
from sqlalchemy import MetaData, create_engine, extract, select
from sqlalchemy.engine.url import URL
from sqlalchemy.sql import or_
import datetime
from pytz import timezone
from shapely.geometry import Point

from src.data.processing_func import (get_direction, connect_database, extract_geo_sections)
from src.data.load_func import (extract_jps,
                                transf_flow_features,
                                transf_flow_labels)

pd.options.display.max_columns = 30

In [2]:
#Connection and initial setup
meta = connect_database()

date_begin = datetime.date(day=1, month=10, year=2017)
date_end = datetime.date(day=21, month=10, year=2017)

periods = [(7,9), (17,19)]

df_jps = extract_jps(meta, date_begin, date_end, periods=periods, weekends=True, summary=False)
print(df_jps.shape)
df_jps.sample(5)

(169563, 17)


Unnamed: 0,MgrcDateStart,JpsId,SctnId,JamId,JamIndLevelOfTraffic,JamQtdLengthMeters,JamSpdMetersPerSecond,JamTimeDelayInSeconds,JamDscCoordinatesLonLat,JamSpdKmPerHour,LonDirection,LatDirection,date,hour,minute,period,minute_bin
75575,2017-10-11 08:46:00-03:00,315704.0,5149.0,506734.0,3.0,315.0,1.886111,116.0,"[{'y': -26.340822, 'x': -48.818881}, {'y': -26...",6.79,East,North,2017-10-11,8,46,-1,45 a 59
119758,2017-10-17 17:21:00-02:00,419500.0,2388.0,530936.0,4.0,1018.0,1.975,408.0,"[{'y': -26.310942, 'x': -48.85579}, {'y': -26....",7.11,West,South,2017-10-17,17,21,1,15 a 29
115077,2017-10-17 07:16:00-02:00,482515.0,11178.0,527702.0,3.0,345.0,2.552778,105.0,"[{'x': -48.826485, 'y': -26.319467}, {'x': -48...",9.19,West,North,2017-10-17,7,16,-1,15 a 29
60744,2017-10-10 17:46:00-03:00,379732.0,6713.0,503090.0,3.0,1184.0,3.455556,233.0,"[{'y': -26.331106, 'x': -48.846813}, {'y': -26...",12.44,West,South,2017-10-10,17,46,1,45 a 59
18229,2017-10-04 18:36:00-03:00,268874.0,14000.0,483688.0,3.0,314.0,2.213889,117.0,"[{'y': -26.315747, 'x': -48.832565}, {'y': -26...",7.97,East,South,2017-10-04,18,36,1,30 a 44


In [3]:
#Filter holidays from list of holidays in Brazil, from ANBIMA
holidays = pd.read_excel(project_dir + "/data/external/feriados_nacionais.xls", skip_footer=9)
holidays["Data"] = holidays["Data"].dt.date
holiday_list = holidays["Data"].tolist()
df_jps = df_jps[~df_jps["date"].isin(holiday_list)]

#Aggregate traffic in slots of 15 minutes
jps_per_timeslot = df_jps.groupby(["SctnId", "hour",
                                 "minute_bin", "LonDirection","LatDirection"]) \
                                      .agg({"JpsId": ['count'],
                                           "JamQtdLengthMeters": ["mean"],
                                           "JamSpdKmPerHour": ["mean"],
                                           "JamTimeDelayInSeconds": ["mean"],
                                           "JamIndLevelOfTraffic": ["mean"],
                                           "period": ["max"],
                                           })
jps_per_timeslot.columns = [''.join(col_name).strip() for col_name in jps_per_timeslot.columns.values]
print(jps_per_timeslot.shape)
jps_per_timeslot.sample(5)

(26633, 6)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,JpsIdcount,JamQtdLengthMetersmean,JamSpdKmPerHourmean,JamTimeDelayInSecondsmean,JamIndLevelOfTrafficmean,periodmax
SctnId,hour,minute_bin,LonDirection,LatDirection,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
14626.0,18,0 a 14,West,South,1,1305.0,23.95,71.0,2.0,1
286.0,17,15 a 29,East,South,3,669.666667,12.003333,97.333333,2.333333,1
16048.0,17,45 a 59,West,South,2,398.5,9.565,113.0,3.0,1
3531.0,8,0 a 14,West,South,2,1203.5,13.645,173.5,3.0,-1
15374.0,17,45 a 59,West,North,2,759.0,17.155,96.5,2.0,1


In [4]:
#Merge dataset with total number of waze signals, for each bin
jps_per_timeslot.reset_index(level=["SctnId", "LonDirection","LatDirection"], inplace=True)

wazesignals_per_timeslot = df_jps.groupby(["hour", "minute_bin"]).agg({"MgrcDateStart": [pd.Series.nunique]})
wazesignals_per_timeslot.columns = ["".join(x) for x in wazesignals_per_timeslot.columns.ravel()]
jps_per_timeslot = jps_per_timeslot.join(wazesignals_per_timeslot, how="outer")

jps_per_timeslot["slot_traffic_prob"] = jps_per_timeslot["JpsIdcount"]/jps_per_timeslot["MgrcDateStartnunique"]

print(jps_per_timeslot.shape)
jps_per_timeslot.sample(5)

(26633, 11)


Unnamed: 0_level_0,Unnamed: 1_level_0,SctnId,LonDirection,LatDirection,JpsIdcount,JamQtdLengthMetersmean,JamSpdKmPerHourmean,JamTimeDelayInSecondsmean,JamIndLevelOfTrafficmean,periodmax,MgrcDateStartnunique,slot_traffic_prob
hour,minute_bin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
17,30 a 44,4937.0,West,North,1,1618.0,15.15,218.0,2.0,1,57,0.017544
17,0 a 14,3119.0,West,North,3,262.0,6.52,89.666667,3.0,1,57,0.052632
17,30 a 44,3337.0,West,South,4,848.5,15.2875,109.5,2.75,1,57,0.070175
8,45 a 59,16002.0,West,North,5,3272.4,19.106,371.2,3.0,-1,59,0.084746
17,0 a 14,3256.0,West,South,1,1269.0,17.43,92.0,2.0,1,57,0.017544


In [5]:
jps_per_timeslot["notraffic_prob"] = 1 - jps_per_timeslot["slot_traffic_prob"]
jps_per_timeslot["weighted_JamSpdKmPerHourmean"] = jps_per_timeslot["slot_traffic_prob"]*jps_per_timeslot["JamSpdKmPerHourmean"]
jps_per_timeslot["weighted_JamQtdLengthMetersmean"] = jps_per_timeslot["slot_traffic_prob"]*jps_per_timeslot["JamQtdLengthMetersmean"]
jps_per_timeslot["weighted_JamTimeDelayInSecondsmean"] = jps_per_timeslot["slot_traffic_prob"]*jps_per_timeslot["JamTimeDelayInSecondsmean"]
jps_per_timeslot["weighted_JamIndLevelOfTrafficmean"] = jps_per_timeslot["slot_traffic_prob"]*jps_per_timeslot["JamIndLevelOfTrafficmean"]

traffic_indicators = jps_per_timeslot.groupby(["SctnId", "LonDirection", "LatDirection",
                                               "periodmax"]).agg({"notraffic_prob": np.prod,
                                                                  "slot_traffic_prob": np.sum,
                                                                  "weighted_JamSpdKmPerHourmean": np.sum,
                                                                  "weighted_JamQtdLengthMetersmean": np.sum,
                                                                  "weighted_JamTimeDelayInSecondsmean": np.sum,
                                                                  "weighted_JamIndLevelOfTrafficmean": np.sum}) 

traffic_indicators["traffic_prob"] = 1 - traffic_indicators["notraffic_prob"]
traffic_indicators["JamSpdKmPerHourmean"] = traffic_indicators["weighted_JamSpdKmPerHourmean"] / traffic_indicators["slot_traffic_prob"]
traffic_indicators["JamQtdLengthMetersmean"] = traffic_indicators["weighted_JamQtdLengthMetersmean"] / traffic_indicators["slot_traffic_prob"]
traffic_indicators["JamTimeDelayInSecondsmean"] = traffic_indicators["weighted_JamTimeDelayInSecondsmean"] / traffic_indicators["slot_traffic_prob"]
traffic_indicators["JamIndLevelOfTrafficmean"] = traffic_indicators["weighted_JamIndLevelOfTrafficmean"] / traffic_indicators["slot_traffic_prob"]
traffic_indicators.drop(["weighted_JamSpdKmPerHourmean",
                        "weighted_JamQtdLengthMetersmean",
                        "weighted_JamTimeDelayInSecondsmean",
                        "weighted_JamIndLevelOfTrafficmean",
                        "slot_traffic_prob"], axis=1, inplace=True)

print(traffic_indicators.shape)
traffic_indicators.sort_values("JamIndLevelOfTrafficmean", ascending=False).head(5)

(6943, 6)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,notraffic_prob,traffic_prob,JamSpdKmPerHourmean,JamQtdLengthMetersmean,JamTimeDelayInSecondsmean,JamIndLevelOfTrafficmean
SctnId,LonDirection,LatDirection,periodmax,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
6146.0,West,North,-1,0.966102,0.033898,6.31,437.0,209.0,4.0
1837.0,East,South,1,0.96522,0.03478,6.84,375.0,162.0,4.0
8904.0,East,North,1,0.982456,0.017544,4.88,171.0,87.0,4.0
10350.0,East,South,1,0.96522,0.03478,5.95,263.0,99.0,4.0
11307.0,East,South,-1,0.807265,0.192735,4.128163,224.409917,178.146076,4.0


In [6]:
#Merge dataset with official street sections from the Municipality
geo_sections = extract_geo_sections(meta, buffer=10)
traffic_indicators.reset_index(inplace=True)
geo_traffic_indicators = geo_sections.merge(traffic_indicators, how="inner", on="SctnId")

columns = {"SctnId": "Codigo do Trecho",
         "SctnDscNome": "Rua",
         "LonDirection": "Direcao L/O",
         "LatDirection": "Direcao N/S",
         "periodmax": "period",
         "traffic_prob":"Probabilidade de Transito",
         "JamSpdKmPerHourmean": "Velocidade Media (km/h)",
         "JamQtdLengthMetersmean": "Fila media (m)",
         "JamTimeDelayInSecondsmean": "Atraso medio (s)",
         "JamIndLevelOfTrafficmean": "Nivel medio de congestionamento (0 a 5)",
         }
geo_traffic_indicators.rename(columns=columns, inplace=True)
col_list = [col for col in columns.values()]
col_list.append("section_LineString")
geo_traffic_indicators = geo_traffic_indicators[col_list]

print(geo_traffic_indicators.shape)
geo_traffic_indicators.sort_values(["Probabilidade de Transito", "Atraso medio (s)"], ascending=False, inplace=True)
geo_traffic_indicators.head(10)

(6943, 11)


Unnamed: 0,Codigo do Trecho,Rua,Direcao L/O,Direcao N/S,period,Probabilidade de Transito,Velocidade Media (km/h),Fila media (m),Atraso medio (s),Nivel medio de congestionamento (0 a 5),section_LineString
3677,8322,FLORIANOPOLIS,East,South,1,0.999399,9.488199,546.922794,154.466912,3.176471,POLYGON ((-48.83822475403328 -26.3228380722825...
5849,13990,FLORIANOPOLIS,East,South,1,0.999399,9.488199,546.922794,154.466912,3.176471,POLYGON ((-48.83856901236984 -26.3223585139055...
1854,5171,FLORIANOPOLIS,East,South,1,0.99926,9.509775,551.101124,155.29588,3.179775,POLYGON ((-48.83761163183942 -26.3236965472258...
4997,11432,NOVE DE MARCO,East,South,1,0.999193,9.152456,582.859649,149.52193,2.986842,POLYGON ((-48.84168784171818 -26.3018900162340...
5902,14047,OTTOKAR DOERFFEL,West,South,1,0.999071,9.762273,1088.242424,313.268939,3.450758,POLYGON ((-48.85839700701319 -26.3130386950682...
691,2388,OTTOKAR DOERFFEL,West,South,1,0.998934,9.918276,1101.678161,314.628352,3.436782,POLYGON ((-48.86027092304061 -26.3134803794554...
695,2389,OTTOKAR DOERFFEL,West,South,1,0.998934,9.918276,1101.678161,314.628352,3.436782,POLYGON ((-48.85934181437936 -26.3134695470942...
3486,8110,OTTOKAR DOERFFEL,West,South,1,0.998598,9.570709,1097.661417,320.69685,3.476378,POLYGON ((-48.85807244633456 -26.3127786290922...
3853,8738,OTTOKAR DOERFFEL,West,South,1,0.998562,9.886047,1118.023715,319.758893,3.438735,POLYGON ((-48.86100415168816 -26.3140322095625...
2119,5517,NOVE DE MARCO,East,South,1,0.998277,9.788785,618.089069,137.611336,2.850202,POLYGON ((-48.84537311219218 -26.3016928521619...


In [7]:
#Save as GeoJSON
#geo_traffic_indicators.to_file(project_dir + "/data/interim/traffic_indicators.geoJSON", driver="GeoJSON")

In [8]:
#Show probability and criticity indicators only for sections of interest
if 'sections_interest' not in globals():
    sections_interest = pd.read_csv(project_dir + "/data/external/vias_estudo.csv", index_col=0, decimal=',')
    
sections_interest.columns = sections_interest.columns.str.strip() 
sections_interest["geometry"] = sections_interest.apply(
                                    lambda row: Point(row["Longitude"], row["Latitude"]), axis=1)
crs = geo_traffic_indicators.crs
geo_sections_interest = gpd.GeoDataFrame(sections_interest, crs=crs, geometry="geometry")
prob_matrix = gpd.sjoin(geo_sections_interest, geo_traffic_indicators, how="left", op="within")
prob_matrix

Unnamed: 0,Latitude,Longitude,Endereço,Sentido,geometry,index_right,Codigo do Trecho,Rua,Direcao L/O,Direcao N/S,period,Probabilidade de Transito,Velocidade Media (km/h),Fila media (m),Atraso medio (s),Nivel medio de congestionamento (0 a 5)
1,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,L/O,POINT (-48.84334 -26.2833),4870.0,11046,PADRE ANTONIO VIEIRA,East,North,1.0,0.209437,5.052308,208.000000,133.076923,3.923077
1,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,L/O,POINT (-48.84334 -26.2833),4874.0,11047,PADRE ANTONIO VIEIRA,East,North,1.0,0.209437,5.052308,208.000000,133.076923,3.923077
1,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,L/O,POINT (-48.84334 -26.2833),4871.0,11046,PADRE ANTONIO VIEIRA,West,South,-1.0,0.150635,6.033818,270.261628,138.112403,3.777132
1,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,L/O,POINT (-48.84334 -26.2833),4875.0,11047,PADRE ANTONIO VIEIRA,West,South,-1.0,0.150635,6.033818,270.261628,138.112403,3.777132
1,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,L/O,POINT (-48.84334 -26.2833),4872.0,11046,PADRE ANTONIO VIEIRA,West,South,1.0,0.085581,6.682000,277.000000,121.400000,4.000000
1,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,L/O,POINT (-48.84334 -26.2833),4876.0,11047,PADRE ANTONIO VIEIRA,West,South,1.0,0.085581,6.682000,277.000000,121.400000,4.000000
1,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,L/O,POINT (-48.84334 -26.2833),4869.0,11046,PADRE ANTONIO VIEIRA,East,North,-1.0,0.017544,5.810000,208.000000,107.000000,4.000000
1,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,L/O,POINT (-48.84334 -26.2833),4873.0,11047,PADRE ANTONIO VIEIRA,East,North,-1.0,0.017544,5.810000,208.000000,107.000000,4.000000
2,-26.28422,-48.84364,AV. JOSÉ VIEIRA,S/N,POINT (-48.84364 -26.28422),174.0,657,AVENIDA JOSE VIEIRA,West,North,1.0,0.034780,23.335000,1287.500000,72.000000,1.000000
3,-26.28263,-48.84504,AV. ALUÍSIO PIRES CONDEIXA,S/N,POINT (-48.84504 -26.28263),,,,,,,,,,,


Export table of traffic probability per timeslot for sections of interest

In [15]:
if jps_per_timeslot.index.dtype != int:
    jps_per_timeslot.reset_index(inplace=True)
    
geo_jps_per_timeslot = geo_sections.merge(jps_per_timeslot, how="inner", on="SctnId")
prob_matrix_per_timeslot = gpd.sjoin(geo_sections_interest, geo_jps_per_timeslot, how="left", op="within")
columns = {"SctnId": "Codigo do Trecho",
           "Latitude": "Latitude",
           "Longitude": "Longitude",
           "Endereço": "Endereço",
         "SctnDscNome": "Rua",
         "LonDirection": "Direcao L/O",
         "LatDirection": "Direcao N/S",
         "hour": "Hora",
         "minute_bin":"Slot_minutos",
         "MgrcDateStartnunique": "Sinais Waze",
         "JpsIdcount": "Engarrafamentos",
         "slot_traffic_prob": "slot_traffic_prob",
         "JamSpdKmPerHourmean": "Velocidade Media (km/h)",
         "JamQtdLengthMetersmean": "Fila media (m)",
         "JamTimeDelayInSecondsmean": "Atraso medio (s)",
         "JamIndLevelOfTrafficmean": "Nivel medio de congestionamento (0 a 5)",
         }
prob_matrix_per_timeslot.rename(columns=columns, inplace=True)
col_list = [col for col in columns.values()]
prob_matrix_per_timeslot = prob_matrix_per_timeslot[col_list]
prob_matrix_per_timeslot.reset_index(inplace=True, drop=True)
prob_matrix_per_timeslot

Unnamed: 0,Codigo do Trecho,Latitude,Longitude,Endereço,Rua,Direcao L/O,Direcao N/S,Hora,Slot_minutos,Sinais Waze,Engarrafamentos,slot_traffic_prob,Velocidade Media (km/h),Fila media (m),Atraso medio (s),Nivel medio de congestionamento (0 a 5)
0,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,East,North,7.0,15 a 29,57.0,1.0,0.017544,5.810000,208.000000,107.000000,4.000000
1,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,West,South,7.0,45 a 59,58.0,6.0,0.103448,5.991667,266.833333,138.000000,3.833333
2,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,West,South,8.0,0 a 14,57.0,3.0,0.052632,6.116667,277.000000,138.333333,3.666667
3,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,East,North,17.0,0 a 14,57.0,3.0,0.052632,4.483333,208.000000,151.333333,4.000000
4,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,East,North,17.0,15 a 29,57.0,2.0,0.035088,6.105000,208.000000,100.000000,3.500000
5,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,East,North,17.0,30 a 44,57.0,5.0,0.087719,4.908000,208.000000,137.200000,4.000000
6,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,West,South,18.0,0 a 14,57.0,1.0,0.017544,7.290000,277.000000,108.000000,4.000000
7,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,East,North,18.0,15 a 29,57.0,1.0,0.017544,3.780000,208.000000,176.000000,4.000000
8,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,West,South,18.0,15 a 29,57.0,3.0,0.052632,6.333333,277.000000,129.333333,4.000000
9,11046,-26.28330,-48.84334,RUA PADRE ANTÔNIO VIÊIRA,PADRE ANTONIO VIEIRA,West,South,18.0,30 a 44,57.0,1.0,0.017544,7.120000,277.000000,111.000000,4.000000


In [16]:
#Save as CSV
prob_matrix_per_timeslot.to_csv(project_dir + "/data/interim/prob_matrix_per_timeslot.csv")