In [1]:
import os
import sys
project_dir = os.path.join(os.pardir, os.pardir)
sys.path.append(project_dir)

import dotenv
dotenv_path = os.path.join(project_dir, '.env')
dotenv.load_dotenv(dotenv_path)

from IPython.display import display

import glob
import time
import numpy as np
import pandas as pd

from src.data.processing_func import (connect_database, extract_geo_sections)

Get processed datasets

In [3]:
filenames = glob.glob(project_dir+"/data/interim/temp_df_jps_2017_10_1*")
filenames.sort()
frame = pd.DataFrame()
list_ = []
for file_ in filenames:
    df = pd.read_csv(file_, index_col=0)
    list_.append(df)
df_jps = pd.concat(list_)
display(df_jps.shape)
display(df_jps.sample(5))

(760635, 18)

Unnamed: 0,MgrcDateStart,JpsId,SctnId,JamId,JamIndLevelOfTraffic,JamQtdLengthMeters,JamSpdMetersPerSecond,JamTimeDelayInSeconds,JamDscCoordinatesLonLat,JamSpdKmPerHour,LonDirection,LatDirection,MajorDirection,date,hour,minute,period,minute_bin
33403,2017-11-15 18:27:00-02:00,7149412.0,13973.0,657898.0,3.0,249.0,1.6,124.0,"[{'x': -48.851461, 'y': -26.255502}, {'x': -48...",5.76,East,North,Norte/Sul,2017-11-15,18,27,1,15 a 29
44944,2017-11-08 17:37:00-02:00,6982574.0,3729.0,622836.0,3.0,1020.0,3.233333,209.0,"[{'x': -48.84162, 'y': -26.324041}, {'x': -48....",11.64,East,South,Norte/Sul,2017-11-08,17,37,1,30 a 44
10065,2017-11-27 18:57:00-02:00,7390939.0,10903.0,709553.0,3.0,1130.0,5.161111,92.0,"[{'x': -48.855327, 'y': -26.323842}, {'x': -48...",18.58,East,North,Leste/Oeste,2017-11-27,18,57,1,45 a 59
43452,2017-10-26 18:06:00-02:00,6716769.0,13968.0,570178.0,3.0,1735.0,4.716667,240.0,"[{'y': -26.394519, 'x': -48.813401}, {'y': -26...",16.98,West,North,Norte/Sul,2017-10-26,18,6,1,0 a 14
777,2017-12-26 07:57:00-02:00,8029387.0,5970.0,852435.0,5.0,372.0,0.0,-1.0,"[{'x': -48.815729, 'y': -26.234097}, {'x': -48...",0.0,West,South,Leste/Oeste,2017-12-26,7,57,-1,45 a 59


Get data from SIG and merge with Waze's data:

In [13]:
#Connection and initial setup
DATABASE = {
'drivername': os.environ.get("db_drivername"),
'host': os.environ.get("db_host"), 
'port': os.environ.get("db_port"),
'username': os.environ.get("db_username"),
'password': os.environ.get("db_password"),
'database': os.environ.get("db_database"),
}

meta = connect_database(DATABASE)
geo_sections = extract_geo_sections(meta, main_buffer=10)
df_waze_sig = geo_sections.merge(df_jps, how="inner", on="SctnId")
display(df_waze_sig.shape)
display(df_waze_sig.head())

(760468, 39)

Unnamed: 0,SctnId,SctnIdArcgis,SctnCodRua,SctnDscNome,SctnQtdMetrosAcumulados,SctnQtdComprimento,SctnDscCoordxUtmComeco,SctnDscCoordyUtmComeco,SctnDscCoordxUtmMeio,SctnDscCoordyUtmMeio,...,JamDscCoordinatesLonLat,JamSpdKmPerHour,LonDirection,LatDirection,MajorDirection,date,hour,minute,period,minute_bin
0,0,16092,2686,DR. JOAO COLIN,2335,89.087809,714734.1111,7091467.602,714737.35335,7091423.0,...,"[{'x': -48.849316, 'y': -26.282206}, {'x': -48...",17.32,West,North,Norte/Sul,2017-10-02,18,31,1,30 a 44
1,0,16092,2686,DR. JOAO COLIN,2335,89.087809,714734.1111,7091467.602,714737.35335,7091423.0,...,"[{'y': -26.281302, 'x': -48.8494}, {'y': -26.2...",12.65,West,North,Norte/Sul,2017-10-04,18,31,1,30 a 44
2,0,16092,2686,DR. JOAO COLIN,2335,89.087809,714734.1111,7091467.602,714737.35335,7091423.0,...,"[{'y': -26.281302, 'x': -48.8494}, {'y': -26.2...",12.7,West,North,Norte/Sul,2017-10-04,18,36,1,30 a 44
3,0,16092,2686,DR. JOAO COLIN,2335,89.087809,714734.1111,7091467.602,714737.35335,7091423.0,...,"[{'x': -48.8494, 'y': -26.281302}, {'x': -48.8...",16.86,West,North,Norte/Sul,2017-12-07,18,17,1,15 a 29
4,0,16092,2686,DR. JOAO COLIN,2335,89.087809,714734.1111,7091467.602,714737.35335,7091423.0,...,"[{'x': -48.849316, 'y': -26.282206}, {'x': -48...",10.53,West,North,Norte/Sul,2017-12-07,18,32,1,30 a 44


Get undirected Traffic Network

In [14]:
un_network = pd.read_csv(project_dir + "/data/external/od/Tabela_atributos_CSV.csv",
                         encoding="latin-1",
                         sep=";",
                         decimal=',')
un_network.head()
col_dict={'objectid,N,19,11': 'SctnIdArcgis',
            'codlogra,N,19,11': 'SctnCodRua',
            'acumulo,N,19,11': 'SctnQtdMetrosAcumulados',
            'nomelog,C,254': 'SctnDscNome',
            'x_ini,N,19,11': 'x_ini',
            'y_ini,N,19,11': 'y_ini',
            'x_med,N,19,11': 'x_med',
            'y_med,N,19,11': 'y_med',
            'x_fim,N,19,11': 'x_fim',
            'y_fim,N,19,11': 'y_fim',
         }
un_network.rename(columns=col_dict, inplace=True)
un_network = un_network[list(col_dict.values())]
un_network.drop_duplicates(["SctnCodRua", "SctnQtdMetrosAcumulados"], inplace=True)
un_network.set_index(["SctnCodRua", "SctnQtdMetrosAcumulados"], inplace=True, verify_integrity=True)
un_network = un_network.astype(int, errors='ignore')
un_network["initial_node"] = list(zip(un_network.x_ini, un_network.y_ini))
un_network["final_node"] = list(zip(un_network.x_fim, un_network.y_fim))
un_network.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,SctnIdArcgis,SctnDscNome,x_ini,y_ini,x_med,y_med,x_fim,y_fim,initial_node,final_node
SctnCodRua,SctnQtdMetrosAcumulados,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1788.0,838.0,19339,COPACABANA,713872,7086107,713884,7085972,713814,7085849,"(713872, 7086107)","(713814, 7085849)"
8274.0,4770.0,25555,QUINZE DE NOVEMBRO,711213,7089782,711254,7089770,711296,7089757,"(711213, 7089782)","(711296, 7089757)"
4047.0,859.0,17237,GRACILIANO RAMOS,716722,7087433,716749,7087423,716755,7087395,"(716722, 7087433)","(716755, 7087395)"
8664.0,27.0,30823,SANTO AGOSTINHO,716754,7087360,716741,7087362,716728,7087359,"(716754, 7087360)","(716728, 7087359)"
1252.0,27844.0,0,GOVERNADOR MARIO COVAS,713711,7081880,713676,7082038,713642,7082196,"(713711, 7081880)","(713642, 7082196)"


Get number of directions per section

In [41]:
def get_two_ways(row):
    if row["SectionDirection"] == "Norte/Sul":
        if row["LatDirection"] == 2:
            return "two_way"
    
    if row["SectionDirection"] == "Leste/Oeste":
        if row["LonDirection"] == 2:
            return "two_way"
        
    return "unknown"

df_waze_directions = df_waze_sig.groupby(["SctnCodRua",
                                          "SctnQtdMetrosAcumulados",
                                          "SctnDscNome",
                                          "SectionDirection",
                                          "SctnDscCoordxUtmComeco",
                                          "SctnDscCoordyUtmComeco",
                                          "SctnDscCoordxUtmFinal",
                                          "SctnDscCoordyUtmFinal"]).agg({"LatDirection": pd.Series.nunique,
                                                                           "LonDirection": pd.Series.nunique
                                                                          })
df_waze_directions["total_directions"] = df_waze_directions["LatDirection"] + df_waze_directions["LonDirection"]
df_waze_directions.reset_index(inplace=True)
df_waze_directions["way"] = df_waze_directions.apply(get_two_ways, axis=1)
df_waze_directions.drop_duplicates(subset=["SctnCodRua", "SctnQtdMetrosAcumulados"], inplace=True)
df_waze_directions.set_index(["SctnCodRua", "SctnQtdMetrosAcumulados"], inplace=True, verify_integrity=True)
df_waze_directions.sort_values("total_directions", ascending=False, inplace=True)
df_waze_directions.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,SctnDscNome,SectionDirection,SctnDscCoordxUtmComeco,SctnDscCoordyUtmComeco,SctnDscCoordxUtmFinal,SctnDscCoordyUtmFinal,LatDirection,LonDirection,total_directions,way
SctnCodRua,SctnQtdMetrosAcumulados,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2691,380,DOUTOR PLACIDO GOMES,Leste/Oeste,714960.2945,7088144.0,715074.0814,7088149.0,2,2,4,two_way
8274,4410,QUINZE DE NOVEMBRO,Norte/Sul,711545.2532,7089831.0,711601.03,7089977.0,2,2,4,two_way
3497,324,ESPIGAO,Norte/Sul,720725.6297,7084441.0,720740.2162,7084416.0,2,2,4,two_way
3497,400,ESPIGAO,Norte/Sul,720690.8223,7084508.0,720725.6297,7084441.0,2,2,4,two_way
3505,2149,BARBANTE,Norte/Sul,708208.328,7087394.0,707425.4,7088275.0,2,2,4,two_way


Cross-checking classification:

In [70]:
directions_gb = df_waze_directions.groupby(["SctnDscNome", "way"]).agg({"way": "count"}).sort_values("way",
                                                                                     ascending=False)
directions_gb.xs("two_way", level="way", drop_level=False)
#directions_gb.to_csv(project_dir + "/data/interim/od/check_sentidos.csv")
df_waze_directions[df_waze_directions.SctnDscNome == "VISCONDE DE TAUNAY"]
df_waze_sig[(df_waze_sig.SctnCodRua==9719) & (df_waze_sig.SctnQtdMetrosAcumulados==498)][["SctnDscNome",
                                                                                          "SctnId",
                                                                                          "SctnDscCoordxUtmComeco",
                                                                                          "SctnDscCoordyUtmComeco",
                                                                                          "SctnDscCoordxUtmFinal",
                                                                                          "SctnDscCoordyUtmFinal",
                                                                                          "MgrcDateStart",
                                                                                          "JamId",
                                                                                          "LonDirection",
                                                                                          "LatDirection",
                                                                                          "StreetDirection",
                                                                                          "SectionDirection",
                                                                                          "MajorDirection",
                                                                                          "JamQtdLengthMeters"
                                                                                         ]]

Unnamed: 0,SctnDscNome,SctnId,SctnDscCoordxUtmComeco,SctnDscCoordyUtmComeco,SctnDscCoordxUtmFinal,SctnDscCoordyUtmFinal,MgrcDateStart,JamId,LonDirection,LatDirection,StreetDirection,SectionDirection,MajorDirection,JamQtdLengthMeters
331117,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-10-02 18:21:00-03:00,479047.0,West,South,Norte/Sul,Leste/Oeste,Leste/Oeste,348.0
331118,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-10-02 18:31:00-03:00,479150.0,West,South,Norte/Sul,Leste/Oeste,Norte/Sul,1127.0
331119,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-10-03 18:26:00-03:00,481589.0,West,South,Norte/Sul,Leste/Oeste,Leste/Oeste,348.0
331120,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-12-04 18:27:00-02:00,740930.0,West,South,Norte/Sul,Leste/Oeste,Norte/Sul,1129.0
331121,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-12-04 18:27:00-02:00,740931.0,East,North,Norte/Sul,Leste/Oeste,Leste/Oeste,161.0
331122,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-12-04 18:32:00-02:00,741067.0,West,South,Norte/Sul,Leste/Oeste,Norte/Sul,1110.0
331123,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-12-04 18:32:00-02:00,741068.0,East,North,Norte/Sul,Leste/Oeste,Leste/Oeste,161.0
331124,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-12-04 18:37:00-02:00,741199.0,West,South,Norte/Sul,Leste/Oeste,Norte/Sul,1129.0
331125,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-12-04 18:42:00-02:00,741321.0,West,South,Norte/Sul,Leste/Oeste,Norte/Sul,999.0
331126,VISCONDE DE TAUNAY,7434,714603.3836,7088915.131,714469.7817,7.088789e+06,2017-12-05 18:02:00-02:00,746501.0,West,North,Norte/Sul,Leste/Oeste,Leste/Oeste,192.0


In [72]:
df_jps[df_jps.JamId == 740931]["JamDscCoordinatesLonLat"].iloc[0]

"[{'x': -48.850465, 'y': -26.303469}, {'x': -48.850301, 'y': -26.303503}, {'x': -48.84983, 'y': -26.303093}, {'x': -48.849674, 'y': -26.302957}, {'x': -48.849264, 'y': -26.3026}]"

Generate traffic network duplicating two-way streets. To achieve that we will assume that any street that contains AT LEAST ONE two-way section is two-way in its entirety.

In [58]:
un_network_join = un_network.join(df_waze_directions["way"])
un_network_join = un_network_join.groupby("SctnDscNome").agg({"way": lambda x: "two_way" in x.values})
un_network_join[un_network_join.way == True]


Unnamed: 0_level_0,way
SctnDscNome,Unnamed: 1_level_1
6 DE JANEIRO,True
ADOLFO DA VEIGA,True
AGULHAS NEGRAS,True
ALBANO SCHMIDT,True
ALMIRANTE JACEGUAY,True
ALOIS FINDER,True
AMERICO VESPUCIO,True
ANABURGO,True
ANITA GARIBALDI,True
AQUIDABAN,True
