In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import geopandas as gpd
from tqdm import tqdm
import pandas as pd
import folium
import json

pd.set_option('display.max_columns', None)
pd.set_option('mode.chained_assignment', None)

In [2]:
res = pd.read_pickle("data/computed/finally_matches.pkl")

In [3]:
stops = pd.read_csv("data/timetables/gtfs3Sept/stops.txt")
stops_sh = gpd.read_file("data/map/2109_STIB_MIVB_Network/ACTU_STOPS.shp")
lines_sh = gpd.read_file("data/map/2109_STIB_MIVB_Network/ACTU_LINES.shp")

## Delays or Advances

In [4]:
def turn_sec_to_hours(time_in_sec: float) -> str:
    hours = int(time_in_sec / 3600)
    minutes = int(((time_in_sec / 3600) - hours) * 60)
    seconds = int(time_in_sec - hours * 3600 - minutes * 60)
    if len(str(minutes)) == 1:
        minutes = "0" + str(minutes)
    if len(str(hours)) == 1:
        hours = "0" + str(hours)
    if len(str(seconds)) == 1:
        seconds = "0" + str(seconds)
    return str(hours) + ":" + str(minutes) + ":" + str(seconds)

def turn_hour_to_seconds(time_in_hour:str, timetable:pd.DataFrame, col_name:str ="time_seconds", format24:bool=True) -> pd.DataFrame:
    timetable = timetable.dropna(subset=time_in_hour)
    time = timetable[time_in_hour].str.split(":", expand=True)
    timetable[col_name] = (
        time[0].astype(int) * 3600 + time[1].astype(int) * 60 + time[2].astype(int)
    )
    if format24:
        timetable[col_name] = np.where(
            timetable[col_name] < 7200,
            timetable[col_name] + 86400,
            timetable[col_name] 
        )

    return timetable

In [5]:
res = turn_hour_to_seconds("adjusted_arrival_time", res, "adjusted_arrival_time(ts)")
res = turn_hour_to_seconds("departure_time", res, "time_seconds")

In [6]:
res["test_diff_time"] = res["time_seconds"] - res["adjusted_arrival_time(ts)"]

In [7]:
res

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,route_id,service_id,trip_headsign,direction_id,block_id,shape_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,time_seconds,headway_min,clusters,cluster_agg_value,regularity,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date,start_date_ft,end_date_ft,label,date_normalized,stop_id_clean_c,descr_fr,descr_nl,geometry,geom_position_before,dist_m,date,time,distance_from_point,stop_id__current,stop_id_cleaned,stop_id__terminus,stop_code__terminus,stop_name__terminus,stop_desc__terminus,stop_lat__terminus,stop_lon__terminus,zone_id__terminus,stop_url__terminus,location_type__terminus,parent_station__terminus,stop_id_cleaned__terminus,date_label,ligne_cleaned,mode,lag_-1,lag_+1,t_lag_before,t_diff(sec),adjusted_arrival_time(ts),adjusted_arrival_time,diff_time,vehicule_id,approx_rec,test_diff_time
0,112947202236269500,07:30:56,07:30:56,8733,1,0,0,,GARE DE L'OUEST,,50.848999,4.320948,,,0.0,2.0,2,236269500,STOCKEL,0,8912798,001m0080,1,GARE DE L'OUEST - STOCKEL,,1,,C4008F,FFFFFF,27056,15.0,cluster_0,15.0,0,0,0,0,0,0,1,0,20210904,20210918,2021-09-04,2021-09-18,saturday,11-09-2021,8733,GARE DE L'OUEST,WESTSTATION,POINT (146633.5 170956.4),,,11-09-2021,07:25:38,0.0,8733,8733,8161,,STOCKEL,,50.841872,4.464541,,,0.0,29.0,8161,saturday,1,M,0.0,0.0,26151.0,587.0,26738,07:25:38,318.000,0,,318
1,112947202236269500,07:31:53,07:31:53,8742,2,0,0,,BEEKKANT,,50.853386,4.322974,,,0.0,1.0,2,236269500,STOCKEL,0,8912798,001m0080,1,GARE DE L'OUEST - STOCKEL,,1,,C4008F,FFFFFF,27113,15.0,cluster_0,15.0,0,0,0,0,0,0,1,0,20210904,20210918,2021-09-04,2021-09-18,saturday,11-09-2021,8742,BEEKKANT,BEEKKANT,POINT (146776.5 171444.3),POINT (146633.5 170956.4),508.424439,11-09-2021,07:32:21,0.0,8742,8742,8161,,STOCKEL,,50.841872,4.464541,,,0.0,29.0,8161,saturday,1,M,1.0,1.0,26308.0,833.0,27141,07:32:21,28.000,0,,-28
2,112947202236269500,07:33:33,07:33:33,8292,3,0,0,,ETANGS NOIRS,,50.857125,4.333143,,,0.0,12.0,2,236269500,STOCKEL,0,8912798,001m0080,1,GARE DE L'OUEST - STOCKEL,,1,,C4008F,FFFFFF,27213,15.0,cluster_0,15.0,0,0,0,0,0,0,1,0,20210904,20210918,2021-09-04,2021-09-18,saturday,11-09-2021,8292,ETANGS NOIRS,ZWARTE VIJVERS,POINT (147492.7 171859.9),POINT (146776.5 171444.3),828.049395,11-09-2021,07:33:53,0.0,8292,8292,8161,,STOCKEL,,50.841872,4.464541,,,0.0,29.0,8161,saturday,1,M,1.0,1.0,26370.0,863.0,27233,07:33:53,20.000,0,,-20
3,112947202236269500,07:34:44,07:34:44,8282,4,0,0,,COMTE DE FLANDRE,,50.854705,4.340542,,,0.0,13.0,2,236269500,STOCKEL,0,8912798,001m0080,1,GARE DE L'OUEST - STOCKEL,,1,,C4008F,FFFFFF,27284,15.0,cluster_0,15.0,0,0,0,0,0,0,1,0,20210904,20210918,2021-09-04,2021-09-18,saturday,11-09-2021,8282,COMTE DE FLANDRE,GRAAF VAN VLAAND.,POINT (148013.6 171590.4),POINT (147492.7 171859.9),586.487050,11-09-2021,07:34:55,0.0,8282,8282,8161,,STOCKEL,,50.841872,4.464541,,,0.0,29.0,8161,saturday,1,M,1.0,1.0,26432.0,863.0,27295,07:34:55,11.000,0,,-11
4,112947202236269500,07:36:08,07:36:08,8272,5,0,0,,SAINTE-CATHERINE,,50.851900,4.348012,,,0.0,14.0,2,236269500,STOCKEL,0,8912798,001m0080,1,GARE DE L'OUEST - STOCKEL,,1,,C4008F,FFFFFF,27368,15.0,cluster_0,15.0,0,0,0,0,0,0,1,0,20210904,20210918,2021-09-04,2021-09-18,saturday,11-09-2021,8272,SAINTE-CATHERINE,SINT-KATELIJNE,POINT (148539.5 171278.2),POINT (148013.6 171590.4),611.587811,11-09-2021,07:36:25,0.0,8272,8272,8161,,STOCKEL,,50.841872,4.464541,,,0.0,29.0,8161,saturday,1,M,1.0,1.0,26524.0,861.0,27385,07:36:25,17.000,0,,-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5686370,113354575237586501,15:26:44,15:26:44,5916,9,0,0,,EGIDE VAN OPHEM,,50.796220,4.324184,,,0.0,,67,237586501,HEROS,0,8945427,098b0078,98,CERIA - HEROS,,3,,ED7807,000000,55604,30.0,cluster_0,30.0,0,0,0,0,0,0,1,0,20210904,20210919,2021-09-04,2021-09-19,saturday,18-09-2021,5916,EGIDE VAN OPHEM,EGIDE VAN OPHEM,POINT (146857.9 165085.1),POINT (146620.2 165054.3),239.687150,18-09-2021,15:29:45,46.0,5916,5916,1951,,HEROS,,50.803326,4.342649,,,0.0,,1951,saturday,98,B,133.0,94.0,51975.0,3810.0,55774,15:29:34,170.385,258581,,-170
5686371,113354575237586501,15:27:53,15:27:53,5917,10,0,0,,WAGON,,50.796832,4.329461,,,0.0,,67,237586501,HEROS,0,8945427,098b0078,98,CERIA - HEROS,,3,,ED7807,000000,55673,30.0,cluster_0,30.0,0,0,0,0,0,0,1,0,20210904,20210919,2021-09-04,2021-09-19,saturday,18-09-2021,5917,WAGON,WAGEN,POINT (147230 165153),POINT (146857.9 165085.1),378.244392,18-09-2021,15:30:49,1.0,5917,5917,1951,,HEROS,,50.803326,4.342649,,,0.0,,1951,saturday,98,B,553.0,181.0,54109.0,1740.0,55848,15:30:48,175.769,258581,,-175
5686372,113354575237586501,15:30:00,15:30:00,1985B,11,0,0,,GLOBE,,50.800560,4.337300,,,0.0,,67,237586501,HEROS,0,8945427,098b0078,98,CERIA - HEROS,,3,,ED7807,000000,55800,30.0,cluster_0,30.0,0,0,0,0,0,0,1,0,20210904,20210919,2021-09-04,2021-09-19,saturday,18-09-2021,1985,GLOBE,GLOBE,POINT (147782.8 165567.4),POINT (147230 165153),690.880019,18-09-2021,15:32:59,8.0,1985B,1985,1951,,HEROS,,50.803326,4.342649,,,0.0,,1951,saturday,98,B,456.0,40.0,54206.0,1773.0,55977,15:32:57,177.154,258581,,-177
5686373,113354575237586501,15:32:28,15:32:28,5058,12,0,0,,HEROS,,50.803542,4.341571,,,0.0,,67,237586501,HEROS,0,8945427,098b0078,98,CERIA - HEROS,,3,,ED7807,000000,55948,30.0,cluster_0,30.0,0,0,0,0,0,0,1,0,20210904,20210919,2021-09-04,2021-09-19,saturday,18-09-2021,5058,HEROS,HELDEN,POINT (148084 165899),POINT (147782.8 165567.4),447.973213,18-09-2021,15:35:37,22.0,5058,5058,1951,,HEROS,,50.803326,4.342649,,,0.0,,1951,saturday,98,B,71.0,0.0,54616.0,1521.0,56131,15:35:31,183.923,258581,,-183


In [8]:
points = res[["stop_id", "stop_name", "geometry"]].drop_duplicates("stop_id")
points

Unnamed: 0,stop_id,stop_name,geometry
0,8733,GARE DE L'OUEST,POINT (146633.5 170956.4)
1,8742,BEEKKANT,POINT (146776.5 171444.3)
2,8292,ETANGS NOIRS,POINT (147492.7 171859.9)
3,8282,COMTE DE FLANDRE,POINT (148013.6 171590.4)
4,8272,SAINTE-CATHERINE,POINT (148539.5 171278.2)
...,...,...,...
5664808,2660,DROGENBOS-SHOP.,POINT (145775 164707)
5664809,5923,EGGERGAT,POINT (146277 164964)
5664810,5921B,CARREFOUR STALLE,POINT (146620.2 165054.3)
5664811,5916,EGIDE VAN OPHEM,POINT (146857.9 165085.1)


In [9]:
test = res.groupby(["stop_id"]).agg({"test_diff_time" : ["median"]}).reset_index()
test.columns = test.columns.get_level_values(0)
test = stops_sh.merge(
    test,
    "right",
    "stop_id"
    
).dropna(subset ="Code_Ligne").drop_duplicates("stop_id")
test

Unnamed: 0,Code_Ligne,Variante,succession,stop_id,descr_fr,descr_nl,alpha_fr,alpha_nl,coord_x,coord_y,mode,numero_lig,terminus,geometry,test_diff_time
0,039t,1.0,1.0,0089,MONTGOMERY,MONTGOMERY,Montgomery,Montgomery,152832.9,169733.3,T,39.0,BAN-EIK,POINT (152832.900 169733.300),-37.0
4,009t,1.0,1.0,0470F,SIMONIS,SIMONIS,Simonis,Simonis,147244.5,172587.6,T,9.0,ARBRE BALLON,POINT (147244.500 172587.600),-25.0
5,019t,1.0,11.0,0471,SIMONIS,SIMONIS,Simonis,Simonis,147218.0,172595.0,T,19.0,DE WAND,POINT (147218.000 172595.000),-36.0
6,019t,2.0,12.0,0472,SIMONIS,SIMONIS,Simonis,Simonis,147203.0,172574.0,T,19.0,GROOT-BIJGAARDEN,POINT (147203.000 172574.000),-41.0
7,009t,2.0,10.0,0473F,SIMONIS,SIMONIS,Simonis,Simonis,147274.0,172560.0,T,9.0,SIMONIS,POINT (147274.000 172560.000),-49.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3914,003t,1.0,26.0,9959F,CHURCHILL,CHURCHILL,Churchill,Churchill,148969.0,166805.0,T,3.0,CHURCHILL,POINT (148969.000 166805.000),-152.5
3915,007t,1.0,3.0,9963F,CHURCHILL,CHURCHILL,Churchill,Churchill,148976.0,166822.0,T,7.0,HEYSEL,POINT (148976.000 166822.000),-14.0
3916,007t,2.0,34.0,9969F,CHURCHILL,CHURCHILL,Churchill,Churchill,148936.0,166838.0,T,7.0,VANDERKINDERE,POINT (148936.000 166838.000),-111.0
3917,097t,1.0,29.0,9972F,DIEWEG,DIEWEG,Dieweg,Dieweg,148549.2,165118.4,T,97.0,DIEWEG,POINT (148549.200 165118.400),-185.0


In [10]:
map = folium.Map(location= [50.85045, 4.34878],  zoom_start=12, tiles="cartodbpositron",)
test.explore(
    column = "test_diff_time",
    m = map,
    cmap="jet",
)
# lines_sh.explore(
#     m = map,
#     color ="grey",
# )



## Headway

In [20]:
test = res[(res.route_short_name == "7") & (res.stop_id == "5706") & (res.trip_headsign == "VANDERKINDERE")].sort_values("time_seconds")

In [28]:
test

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,route_id,service_id,trip_headsign,direction_id,block_id,shape_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,time_seconds,headway_min,clusters,cluster_agg_value,regularity,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date,start_date_ft,end_date_ft,label,date_normalized,stop_id_clean_c,descr_fr,descr_nl,geometry,geom_position_before,dist_m,date,time,distance_from_point,stop_id__current,stop_id_cleaned,stop_id__terminus,stop_code__terminus,stop_name__terminus,stop_desc__terminus,stop_lat__terminus,stop_lon__terminus,zone_id__terminus,stop_url__terminus,location_type__terminus,parent_station__terminus,stop_id_cleaned__terminus,date_label,ligne_cleaned,mode,lag_-1,lag_+1,t_lag_before,t_diff(sec),adjusted_arrival_time(ts),adjusted_arrival_time,diff_time,vehicule_id,approx_rec,headway_obs
2928237,113339168237534000,05:02:00,05:02:00,5706,5,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,6,237534000,VANDERKINDERE,1,8944464,007t0697,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,18143,,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,05:02:23,40.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,841.0,40.0,1083.0,17060.0,18134.166000,05:02:14,14.166,0,,
2929375,113339244237534000,05:31:00,05:31:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,6,237534000,VANDERKINDERE,1,8944456,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,19873,29.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,05:31:13,93.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,855.0,235.0,19625.0,248.0,19852.460000,05:30:52,7.540,0,,28.638233
2867503,113338928237534000,05:43:00,05:43:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,6,237534000,VANDERKINDERE,1,8944450,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,20591,12.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,05:43:11,40.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,40.0,40.0,20184.0,407.0,20805.009111,05:46:45,2.166,0,1.0,15.875819
2867863,113338929237534000,05:53:00,05:53:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,6,237534000,VANDERKINDERE,1,8944458,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,21310,10.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,05:55:10,601.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,322.0,761.0,21279.0,31.0,21559.533761,05:59:19,2.736,0,1.0,12.575411
2868563,113338931237534000,06:01:00,06:01:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,6,237534000,VANDERKINDERE,1,8944455,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,21777,8.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,06:02:57,476.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,74.0,648.0,21747.0,30.0,21671.871000,06:01:11,11.871,0,,1.872287
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2916735,113339130237534000,23:30:00,23:30:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,6,237534000,VANDERKINDERE,1,8944448,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,84709,16.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,23:31:49,48.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,684.0,48.0,83958.0,751.0,84698.399000,23:31:38,98.399,0,,0.000000
2927525,113339166237534000,23:44:00,23:44:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,6,237534000,VANDERKINDERE,1,8944452,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,85488,9.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,23:44:48,0.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,802.0,0.0,85302.0,186.0,85488.000000,23:44:48,48.000,0,,13.160017
634268,113374288237645001,23:45:00,23:45:00,5706,5,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,5,237645001,VANDERKINDERE,0,8946744,003t0550,7,ESPLANADE - CHURCHILL,,0,,B5BA05,000000,85488,15.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210906,20210917,2021-09-06,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,23:44:48,0.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,802.0,0.0,85302.0,186.0,85488.000000,23:44:48,12.000,0,,0.000000
2924473,113339157237534000,24:00:00,24:00:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0.0,,6,237534000,VANDERKINDERE,1,8944456,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,85582,16.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364,09-09-2021,23:46:22,0.0,5706,5706,6421F,,VANDERKINDERE,,50.813378,4.348149,,,0.0,,6421,workdays,7,T,470.0,712.0,85552.0,30.0,86503.009111,24:01:43,818.000,0,1.0,16.916819


In [11]:
test = res[(res.route_short_name == "7") & (res.stop_id == "5706") & (res.trip_headsign == "VANDERKINDERE")  & (res.date_label == "workdays") & (res.service_id == 237645001)]

def compute_headway(
    df: pd.DataFrame,
    headway_col_name: str = "headway_min",
    time_sec_col_name: str = "time_seconds",
) -> pd.DataFrame:
    df[headway_col_name] = (
        df[time_sec_col_name].sort_values()
        - df[time_sec_col_name].sort_values().shift()
    ) / 60
    return df

test = compute_headway(test, headway_col_name = "headway_obs", time_sec_col_name= "adjusted_arrival_time(ts)")

t_g = test.groupby("date")

dfs = []
for name, group in t_g:
    dfs.append(compute_headway(group.sort_values("time_seconds"), headway_col_name = "headway_obs", time_sec_col_name= "adjusted_arrival_time(ts)"))
    
test = pd.concat(dfs)

fig = px.scatter(
    test.sort_values("time_seconds"),
    x= "departure_time",
    y= "headway_obs",
    color= "date"
)
fig.show()

In [184]:
test[test.date == "09-09-2021"][["date", "trip_id" ,"departure_time", "time", "adjusted_arrival_time", "headway_min" ,"headway_obs", "trip_headsign", "service_id"]].head(50)

Unnamed: 0,date,trip_id,departure_time,time,adjusted_arrival_time,headway_min,headway_obs,trip_headsign,service_id
2928237,09-09-2021,113339168237534000,05:02:00,05:02:23,05:02:14,,,VANDERKINDERE,237534000
2929375,09-09-2021,113339244237534000,05:31:00,05:31:13,05:30:52,29.0,28.633333,VANDERKINDERE,237534000
2867503,09-09-2021,113338928237534000,05:43:00,05:43:11,05:46:45,12.0,15.883333,VANDERKINDERE,237534000
2867863,09-09-2021,113338929237534000,05:53:00,05:55:10,05:59:19,10.0,12.566667,VANDERKINDERE,237534000
2868563,09-09-2021,113338931237534000,06:01:00,06:02:57,06:01:11,8.0,1.866667,VANDERKINDERE,237534000
2914763,09-09-2021,113339118237534000,06:18:00,06:21:40,06:20:25,9.0,19.233333,VANDERKINDERE,237534000
617444,09-09-2021,113374217237645001,06:21:00,06:21:10,06:20:37,11.0,0.2,VANDERKINDERE,237645001
2892443,09-09-2021,113339008237534000,06:25:00,06:24:51,06:24:51,7.0,4.233333,VANDERKINDERE,237534000
2884163,09-09-2021,113338978237534000,06:31:00,06:32:15,06:32:15,6.0,0.0,VANDERKINDERE,237534000
616484,09-09-2021,113374213237645001,06:31:00,06:32:15,06:32:15,10.0,7.4,VANDERKINDERE,237645001


In [166]:
sched_nn = pd.read_pickle("data/computed/schedule_normalized.pkl")

In [171]:
test_nn = sched_nn[(sched_nn.route_short_name == "7") & (sched_nn.stop_id == "5706") & (sched_nn.trip_headsign == "VANDERKINDERE")  & (sched_nn.label == "workdays") & (sched_nn.date_normalized == "09-09-2021")]
test_nn

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,route_id,service_id,trip_headsign,direction_id,block_id,shape_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,time_seconds,headway_min,clusters,cluster_agg_value,regularity,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date,start_date_ft,end_date_ft,label,date_normalized
3878000,113338771237534000,09:47:00,09:47:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944452,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,35220,6.0,cluster_1,6.0,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3878360,113338772237534000,10:03:00,10:03:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944456,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,36180,8.0,cluster_2,8.0,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3878720,113338773237534000,12:03:00,12:03:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944452,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,43380,8.0,cluster_2,8.0,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3879080,113338774237534000,13:15:00,13:15:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944472,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,47700,8.0,cluster_2,8.0,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3879440,113338775237534000,13:31:00,13:31:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944461,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,48660,8.0,cluster_2,8.0,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3980240,113339163237534000,20:58:00,20:58:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944448,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,75480,15.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3981280,113339166237534000,23:44:00,23:44:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944452,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,85440,9.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3981640,113339167237534000,15:15:00,15:15:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944467,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,54900,6.0,cluster_3,6.0,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3981993,113339168237534000,05:02:00,05:02:00,5706,5,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944464,007t0697,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,18120,,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021


In [176]:
test_nn.sort_values("time_seconds")

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,route_id,service_id,trip_headsign,direction_id,block_id,shape_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,time_seconds,headway_min,clusters,cluster_agg_value,regularity,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date,start_date_ft,end_date_ft,label,date_normalized
3981993,113339168237534000,05:02:00,05:02:00,5706,5,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944464,007t0697,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,18120,,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3983190,113339244237534000,05:31:00,05:31:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944456,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,19860,29.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3918140,113338928237534000,05:43:00,05:43:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944450,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,20580,12.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3918500,113338929237534000,05:53:00,05:53:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944458,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,21180,10.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3919200,113338931237534000,06:01:00,06:01:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944455,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,21660,8.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3979160,113339160237534000,22:58:00,22:58:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944453,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,82680,14.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3979880,113339162237534000,23:14:00,23:14:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944470,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,83640,16.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3969220,113339130237534000,23:30:00,23:30:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944448,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,84600,16.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021
3981280,113339166237534000,23:44:00,23:44:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944452,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,000000,85440,9.0,cluster_5,15.0,0,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021


In [177]:
sched_nn_dist = pd.read_pickle("data/computed/schedule_normalized_dist.pkl")

In [179]:
test_nn_dist = sched_nn_dist[(sched_nn_dist.route_short_name == "7") & (sched_nn_dist.stop_id == "5706") & (sched_nn_dist.trip_headsign == "VANDERKINDERE")  & (sched_nn_dist.label == "workdays") & (sched_nn_dist.date_normalized == "09-09-2021")]
test_nn_dist.sort_values("time_seconds").head(50)

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,route_id,service_id,trip_headsign,direction_id,block_id,shape_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,time_seconds,headway_min,clusters,cluster_agg_value,regularity,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date,start_date_ft,end_date_ft,label,date_normalized,stop_id_clean_c,descr_fr,descr_nl,geometry,geom_position_before,dist_m
3981968,113339168237534000,05:02:00,05:02:00,5706,5,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944464,007t0697,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,18120,,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3983166,113339244237534000,05:31:00,05:31:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944456,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,19860,29.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3918116,113338928237534000,05:43:00,05:43:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944450,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,20580,12.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3918476,113338929237534000,05:53:00,05:53:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944458,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,21180,10.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3919176,113338931237534000,06:01:00,06:01:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944455,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,21660,8.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3967446,113339120237534000,06:09:00,06:09:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944457,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,22140,8.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3967056,113339118237534000,06:18:00,06:18:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944454,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,22680,9.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3943296,113339008237534000,06:25:00,06:25:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944445,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,23100,7.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3934806,113338978237534000,06:31:00,06:31:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944461,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,23460,6.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
3942186,113339004237534000,06:40:00,06:40:00,5706,6,0,0,,HEEMBEEK,,50.888468,4.375248,,,0,,6,237534000,VANDERKINDERE,1,8944446,007t0634,7,VANDERKINDERE - HEYSEL,,0,,ECE354,0,24000,9.0,cluster_0,8.5,1,1,1,1,1,1,0,0,20210901,20210917,2021-09-01,2021-09-17,workdays,09-09-2021,5706,HEEMBEEK,HEEMBEEK,POINT (150457 175346),POINT (150008 175660),547.902364
