In [284]:
# add . to module name
import sys
sys.path.append("../src/")

In [285]:
from typing import Optional
from typing_extensions import Self
import random

import pandas as pd
import folium

from package import storage, strtime
from package.structs import build
from package.raptor.mcraptor import McRaptor
from package.raptor import bag
from package.raptor.example_labels import ActivityDurationLabel
from package.tracer.folium import add_tracer_list_to_folium_map
from package.output import TraceEnricher

In [286]:
footpaths_dict = storage.read_any_dict("../data/footpaths.pkl")
footpaths_dict = footpaths_dict["footpaths"]

structs_dict = storage.read_any_dict("../data/structs.pkl")
build.validate_structs_dict(structs_dict)

In [287]:
class CustomLabel(bag.LabelInterface):
    def __init__(self, time: int, stop: Optional[str] = None):
        self.arrival_time = time
        self.travel_time = 0
        self.walking_time = 0
        self.waiting_time = 0
        self.stops = []
        self.trips = []
        if stop is not None:
            self.stops.append(stop)

        self.last_update = "start"

    def __repr__(self):
        return f"Label({self.arrival_time}, t={self.travel_time}, w={self.walking_time}, wait={self.waiting_time})"

    def strictly_dominates(self, other: Self) -> bool:
        return (
            self.arrival_time <= other.arrival_time
            and self.travel_time <= other.travel_time
            and self.walking_time <= other.walking_time
            and self.waiting_time <= other.waiting_time
        )

    def update_along_trip(self, arrival_time: int, stop_id: str, trip_id: str):
        interval = arrival_time - self.arrival_time
        if interval < 0:
            print(self)
        # assert interval >= 0
        self.arrival_time = arrival_time
        self.travel_time += interval
        self.last_update = "trip"
        self.stops.append(stop_id)
        self.trips.append(trip_id)

    def update_along_footpath(self, walking_time: int, stop_id: str):
        self.arrival_time = self.arrival_time + walking_time
        self.walking_time += walking_time
        self.last_update = "footpath"
        self.stops.append(stop_id)

    def update_before_route_bag_merge(self, departure_time: int, stop_id: str):
        interval = departure_time - self.arrival_time
        if interval < 0:
            print(self)
        # assert interval >= 0
        self.arrival_time = departure_time
        self.waiting_time += interval
        self.last_update = "waiting"
        self.stops.append(stop_id)

    def to_human_readable(self):
        return {
            "arrival_time": strtime.seconds_to_str_time(self.arrival_time),
            "travel_time": strtime.seconds_to_str_time(self.travel_time),
            "walking_time": strtime.seconds_to_str_time(self.walking_time),
            "waiting_time": strtime.seconds_to_str_time(self.waiting_time),
            "stops": self.stops,
            "trips": self.trips,
        }

    def copy(self: Self) -> Self:
        label = CustomLabel(self.arrival_time)

        label.stops = self.stops.copy()
        label.trips = self.trips.copy()
        label.last_update = self.last_update

        label.travel_time = self.travel_time
        label.walking_time = self.walking_time
        label.waiting_time = self.waiting_time

        return label

In [288]:
label_class = ActivityDurationLabel

mcraptor = McRaptor(structs_dict, footpaths_dict, 2, 60, {}, {}, label_class)
bags = mcraptor.run("818", "", "15:00:00")

## Convert bags with labels results to dataframe

In [289]:
labels = pd.DataFrame.from_dict(
    {
        (outer_key, inner_key): values
        for outer_key, inner_dict in bags.items()
        for inner_key, values in enumerate(inner_dict)
    }
).T

labels = labels.reset_index(names=["stop_id", "label_id"])
labels

Unnamed: 0,stop_id,label_id,arrival_time,travel_time,walking_time,waiting_time,stops,trips,traces
0,819,0,15:11:54,00:01:00,00:02:54,00:08:00,"[818, 818, 317, 819]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
1,819,1,15:43:00,00:26:00,00:00:00,00:17:00,"[818, 818, 317, 316, 34, 35, 35, 653, 34, 37, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
2,819,2,15:43:00,00:24:00,00:00:02,00:18:58,"[818, 818, 317, 316, 34, 35, 653, 653, 34, 37,...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
3,819,3,15:23:00,00:02:00,00:00:00,00:21:00,"[818, 818, 317, 317, 819]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
4,257,0,15:42:04,00:23:00,00:07:04,00:12:00,"[818, 818, 317, 316, 34, 35, 9, 752, 752, 7, 8...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
...,...,...,...,...,...,...,...,...,...
1293,255,2,15:33:00,00:24:00,00:00:00,00:09:00,"[818, 818, 317, 316, 34, 35, 9, 752, 7, 7, 30,...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
1294,255,3,15:29:02,00:12:00,00:08:02,00:09:00,"[818, 818, 317, 317, 303, 325, 326, 246, 245, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
1295,255,4,15:36:00,00:20:00,00:04:40,00:11:20,"[818, 818, 317, 316, 34, 35, 9, 752, 752, 7, 3...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
1296,255,5,15:31:00,00:18:00,00:00:00,00:13:00,"[818, 818, 317, 316, 34, 35, 35, 36, 32, 30, 3...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."


#### Example of the bag/labels for a given stop

In [290]:
labels[labels["stop_id"] == "885"].sort_values("arrival_time")

Unnamed: 0,stop_id,label_id,arrival_time,travel_time,walking_time,waiting_time,stops,trips,traces
260,885,6,15:13:55,00:01:00,00:04:55,00:08:00,"[818, 818, 317, 885]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
255,885,1,15:23:00,00:02:00,00:00:00,00:21:00,"[818, 818, 317, 317, 885]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
256,885,2,15:23:00,00:06:00,00:04:43,00:12:17,"[818, 818, 317, 316, 323, 323, 819, 317, 885]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
258,885,4,15:23:00,00:03:00,00:02:54,00:17:06,"[818, 818, 317, 819, 819, 317, 885]",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
257,885,3,15:41:00,00:22:00,00:00:02,00:18:58,"[818, 818, 317, 316, 34, 35, 653, 653, 34, 37,...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
259,885,5,15:41:00,00:24:00,00:00:00,00:17:00,"[818, 818, 317, 316, 34, 35, 35, 653, 34, 37, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."
254,885,0,15:42:04,00:22:00,00:03:04,00:17:00,"[818, 818, 317, 316, 34, 35, 35, 653, 34, 37, ...",[424-16-001-343.1.12:150700-50-2_A6387AE1-84C0...,"[Start at 818 at 15:00:00, Trip 424-16-001-343..."


## Parse time columns

In [291]:
time_cols = ["arrival_time", "travel_time", "walking_time", "waiting_time"]
for col in time_cols:
	labels[col] = labels[col].apply(pd.to_timedelta)

In [292]:
labels.describe()

Unnamed: 0,label_id,arrival_time,travel_time,walking_time,waiting_time
count,1298.0,1298,1298,1298,1298
mean,9.483821,0 days 16:07:36.284283513,0 days 00:14:12.295839753,0 days 00:11:44.284283513,0 days 00:41:39.704160246
std,8.409625,0 days 01:43:09.147939788,0 days 00:09:20.511220557,0 days 00:11:03.212132845,0 days 01:44:12.588825405
min,0.0,0 days 15:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
25%,3.0,0 days 15:31:12.750000,0 days 00:07:00,0 days 00:04:23.250000,0 days 00:09:13
50%,7.0,0 days 15:42:20,0 days 00:13:00,0 days 00:08:06,0 days 00:14:28
75%,14.0,0 days 15:51:46,0 days 00:21:00,0 days 00:16:59.750000,0 days 00:20:20
max,39.0,1 days 01:08:34,0 days 00:52:00,0 days 01:04:19,0 days 09:53:00


## Map

In [293]:
stops_df = storage.read_df('../data/cleaned/stops.csv')
trips_df = storage.read_df('../data/cleaned/trips.csv')
routes_df = storage.read_df('../data/cleaned/routes.csv')

trace_enricher = TraceEnricher(stops_df, trips_df, routes_df)

In [294]:
labels["traces"] = labels["traces"].apply(trace_enricher.enrich_traces)

In [295]:
latest_arrival_time = strtime.str_time_to_seconds("15:30:00")
max_walking_time = strtime.str_time_to_seconds("00:30:00")
max_waiting_time = strtime.str_time_to_seconds("00:10:00")
max_travel_time = strtime.str_time_to_seconds("11:00:00")

In [296]:
bags_filtered = {
    stop_id: [
        label
        for label in b
        if strtime.str_time_to_seconds(label["arrival_time"]) <= latest_arrival_time
        and strtime.str_time_to_seconds(label["walking_time"]) <= max_walking_time
        and strtime.str_time_to_seconds(label["waiting_time"]) <= max_waiting_time
        and strtime.str_time_to_seconds(label["travel_time"]) <= max_travel_time
    ]
    for stop_id, b in bags.items()
}


In [297]:
sample_stop = stops_df.sample(1)

m = folium.Map(location=[sample_stop.stop_lat, sample_stop.stop_lon], zoom_start=12)
for i, row in stops_df.iterrows():
    l = bags_filtered[row.stop_id]
    color = "green" if len(l) else "red"
    folium.CircleMarker(
        location=[row.stop_lat, row.stop_lon],
        radius=2,
        color=color,
        popup=f"{row.stop_name} ({row.stop_id}) n_labels:{len(l)}",
    ).add_to(m)
m

  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]


In [298]:
# sample_label = labels.sample(1).iloc[0]
# traces = trace_enricher.enrich_traces(sample_label.traces)

# get random from filtered bag

traces = random.choice(list(bags_filtered.values()))[0]["traces"]

add_tracer_list_to_folium_map(traces, m, stops_df)
m
