In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("../src/")

In [2]:
from typing import Optional
from typing_extensions import Self
import random

import pandas as pd
import folium

from package import storage, strtime
from package.structs import build
from package.raptor.mcraptor import McRaptor
from package.raptor import bag
from package.raptor.example_labels import ActivityDurationLabel, ArrivalTimeLabel
from package.tracer.folium import add_tracer_list_to_folium_map
from package.output import TraceEnricher

In [3]:
footpaths_dict = storage.read_any_dict("../data/footpaths.pkl")
footpaths_dict = footpaths_dict["footpaths"]

structs_dict = storage.read_any_dict("../data/structs.pkl")
build.validate_structs_dict(structs_dict)

In [4]:
label_class = ActivityDurationLabel
# label_class = ArrivalTimeLabel

mcraptor = McRaptor(structs_dict, footpaths_dict, 10, 180, {}, {}, label_class)
bags = mcraptor.run("818", "", "15:00:00")

In [5]:
labels_per_stop_id = pd.DataFrame(
    index=pd.Series(bags.keys(), name="stop_id"),
    data=(len(b) for b in bags.values()),
    columns=["n_labels"],
)
labels_per_stop_id.sort_values("n_labels", ascending=False)

Unnamed: 0_level_0,n_labels
stop_id,Unnamed: 1_level_1
182,144
771,133
253,126
849,124
334,120
...,...
34,8
819,4
316,3
317,1


## Convert bags with labels results to dataframe

In [6]:
labels = pd.DataFrame.from_dict(
    {
        (outer_key, inner_key): values
        for outer_key, inner_dict in bags.items()
        for inner_key, values in enumerate(inner_dict)
    }
).T

labels = labels.reset_index(names=["stop_id", "label_id"])
labels

Unnamed: 0,stop_id,label_id,arrival_time,travel_time,walking_time,waiting_time,stops,trips,traces
0,316,0,15:19:56,00:01:00,00:10:56,00:08:00,"[818, 818, 317, 819, 316]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
1,316,1,15:11:00,00:03:00,00:00:00,00:08:00,"[818, 818, 317, 316]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
2,316,2,15:31:02,00:02:00,00:08:02,00:21:00,"[818, 818, 317, 317, 819, 316]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3,311,0,15:35:54,00:05:00,00:15:54,00:15:00,"[818, 818, 317, 317, 303, 303, 304, 837, 311]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
4,311,1,15:35:58,00:10:00,00:07:22,00:18:36,"[818, 818, 317, 317, 303, 325, 328, 328, 327, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
...,...,...,...,...,...,...,...,...,...
4492,247,55,15:53:32,00:17:00,00:25:20,00:11:12,"[818, 818, 317, 316, 34, 35, 9, 7, 7, 30, 867,...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
4493,247,56,15:42:39,00:25:00,00:06:39,00:11:00,"[818, 818, 317, 316, 34, 35, 9, 752, 7, 7, 30,...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
4494,247,57,15:34:39,00:09:00,00:06:39,00:19:00,"[818, 818, 317, 317, 303, 303, 325, 326, 246, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
4495,247,58,15:59:04,00:07:00,00:35:44,00:16:20,"[818, 818, 317, 309, 306, 325, 325, 326, 246, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."


#### Example of the bag/labels for a given stop

In [7]:
labels[labels["stop_id"] == "885"].sort_values("arrival_time")

Unnamed: 0,stop_id,label_id,arrival_time,travel_time,walking_time,waiting_time,stops,trips,traces
3110,885,5,15:13:55,00:01:00,00:04:55,00:08:00,"[818, 818, 317, 885]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3107,885,2,15:23:00,00:02:00,00:00:00,00:21:00,"[818, 818, 317, 317, 885]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3109,885,4,15:23:00,00:03:00,00:02:54,00:17:06,"[818, 818, 317, 819, 819, 317, 885]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3111,885,6,15:23:00,00:06:00,00:04:43,00:12:17,"[818, 818, 317, 316, 323, 323, 819, 317, 885]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3113,885,8,15:38:00,00:19:00,00:03:50,00:15:10,"[818, 818, 317, 316, 34, 35, 35, 305, 304, 307...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3105,885,0,15:41:00,00:24:00,00:00:00,00:17:00,"[818, 818, 317, 316, 34, 35, 35, 653, 34, 37, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3106,885,1,15:41:00,00:21:00,00:00:00,00:20:00,"[818, 818, 317, 316, 34, 35, 35, 34, 313, 313,...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3112,885,7,15:41:00,00:22:00,00:00:02,00:18:58,"[818, 818, 317, 316, 34, 35, 653, 653, 34, 37,...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3108,885,3,15:42:04,00:22:00,00:03:04,00:17:00,"[818, 818, 317, 316, 34, 35, 35, 653, 34, 37, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."


## Parse time columns

In [8]:
time_cols = ["arrival_time", "travel_time", "walking_time", "waiting_time"]
for col in time_cols:
	labels[col] = labels[col].apply(pd.to_timedelta)

In [9]:
labels.describe()

Unnamed: 0,label_id,arrival_time,travel_time,walking_time,waiting_time
count,4497.0,4497,4497,4497,4497
mean,37.797421,0 days 16:01:37.035801645,0 days 00:10:03.815877251,0 days 00:21:19.040471425,0 days 00:30:14.179452968
std,31.209866,0 days 01:10:24.664092103,0 days 00:06:37.235587528,0 days 00:17:26.449278592,0 days 01:12:13.083222346
min,0.0,0 days 15:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
25%,12.0,0 days 15:40:38,0 days 00:05:00,0 days 00:06:27,0 days 00:11:43
50%,30.0,0 days 15:49:12,0 days 00:09:00,0 days 00:16:56,0 days 00:15:05
75%,58.0,0 days 16:00:26,0 days 00:13:00,0 days 00:34:05,0 days 00:22:09
max,143.0,1 days 00:08:00,0 days 00:52:00,0 days 01:16:49,0 days 08:26:27


## Map

In [10]:
stops_df = storage.read_df('../data/cleaned/stops.csv')
trips_df = storage.read_df('../data/cleaned/trips.csv')
routes_df = storage.read_df('../data/cleaned/routes.csv')

trace_enricher = TraceEnricher(stops_df, trips_df, routes_df)

In [11]:
labels["traces"] = labels["traces"].apply(trace_enricher.enrich_traces)

In [12]:
latest_arrival_time = strtime.str_time_to_seconds("15:30:00")
max_walking_time = strtime.str_time_to_seconds("00:30:00")
max_waiting_time = strtime.str_time_to_seconds("00:10:00")
max_travel_time = strtime.str_time_to_seconds("11:00:00")

In [13]:
bags_filtered = {
    stop_id: [
        label
        for label in b
        if strtime.str_time_to_seconds(label["arrival_time"]) <= latest_arrival_time
        and strtime.str_time_to_seconds(label["walking_time"]) <= max_walking_time
        and strtime.str_time_to_seconds(label["waiting_time"]) <= max_waiting_time
        and strtime.str_time_to_seconds(label["travel_time"]) <= max_travel_time
    ]
    for stop_id, b in bags.items()
}


In [14]:
sample_stop = stops_df.sample(1)

m = folium.Map(location=[sample_stop.stop_lat, sample_stop.stop_lon], zoom_start=12)
for i, row in stops_df.iterrows():
    l = bags_filtered[row.stop_id]
    color = "green" if len(l) else "red"
    folium.CircleMarker(
        location=[row.stop_lat, row.stop_lon],
        radius=2,
        color=color,
        popup=f"{row.stop_name} ({row.stop_id}) n_labels:{len(l)}",
    ).add_to(m)
m

  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]


In [15]:
non_empty_bags = {k:v for k,v in bags_filtered.items() if len(v)}

traces = random.choice(list(non_empty_bags.values()))[0]["traces"]
traces = trace_enricher.enrich_traces(traces)

add_tracer_list_to_folium_map(traces, m, stops_df)
m
