In [51]:
# add . to module name
import sys
sys.path.append("../src/")

In [52]:
from typing import Optional
from typing_extensions import Self

import pandas as pd
import folium

from package import storage, strtime
from package.structs import build
from package.raptor.mcraptor import McRaptor
from package.raptor import bag

In [53]:
footpaths_dict = storage.read_any_dict("../data/footpaths.pkl")
footpaths_dict = footpaths_dict["footpaths"]

structs_dict = storage.read_any_dict("../data/structs.pkl")
build.validate_structs_dict(structs_dict)

In [92]:
class CustomLabel(bag.LabelInterface):
    def __init__(self, time: int, stop: Optional[str] = None):
        self.arrival_time = time
        self.travel_time = 0
        self.walking_time = 0
        self.waiting_time = 0
        self.stops = []
        self.trips = []
        if stop is not None:
            self.stops.append(stop)

        self.last_update = "start"

    def __repr__(self):
        return f"Label({self.arrival_time}, t={self.travel_time}, w={self.walking_time}, wait={self.waiting_time})"

    def strictly_dominates(self, other: Self) -> bool:
        return (
            self.arrival_time <= other.arrival_time
            and self.travel_time <= other.travel_time
            and self.walking_time <= other.walking_time
            and self.waiting_time <= other.waiting_time
        )

    def update_along_trip(self, arrival_time: int, stop_id: str, trip_id: str):
        interval = arrival_time - self.arrival_time
        if interval < 0:
            print(self)
        # assert interval >= 0
        self.arrival_time = arrival_time
        self.travel_time += interval
        self.last_update = "trip"
        self.stops.append(stop_id)
        self.trips.append(trip_id)

    def update_along_footpath(self, walking_time: int, stop_id: str):
        self.arrival_time = self.arrival_time + walking_time
        self.walking_time += walking_time
        self.last_update = "footpath"
        self.stops.append(stop_id)

    def update_before_route_bag_merge(self, departure_time: int, stop_id: str):
        interval = departure_time - self.arrival_time
        if interval < 0:
            print(self)
        # assert interval >= 0
        self.arrival_time = departure_time
        self.waiting_time += interval
        self.last_update = "waiting"
        self.stops.append(stop_id)

    def to_human_readable(self):
        return {
            "arrival_time": strtime.seconds_to_str_time(self.arrival_time),
            "travel_time": strtime.seconds_to_str_time(self.travel_time),
            "walking_time": strtime.seconds_to_str_time(self.walking_time),
            "waiting_time": strtime.seconds_to_str_time(self.waiting_time),
            "stops": self.stops,
            "trips": self.trips,
        }

    def copy(self: Self) -> Self:
        label = CustomLabel(self.arrival_time)

        self.stops = self.stops.copy()
        self.trips = self.trips.copy()
        self.last_update = self.last_update

        label.travel_time = self.travel_time
        label.walking_time = self.walking_time
        label.waiting_time = self.waiting_time

        return label

In [93]:
label_class = CustomLabel

mcraptor = McRaptor(structs_dict, footpaths_dict, 2, 60, {}, {}, label_class)
bags = mcraptor.run("818", "", "15:00:00")

## Convert bags with labels results to dataframe

In [94]:
labels = pd.DataFrame.from_dict(
    {
        (outer_key, inner_key): values
        for outer_key, inner_dict in bags.items()
        for inner_key, values in enumerate(inner_dict)
    }
).T

labels = labels.reset_index(names=["stop_id", "label_id"])
labels

Unnamed: 0,stop_id,label_id,arrival_time,travel_time,walking_time,waiting_time,stops,trips
0,825,0,15:45:00,00:37:00,00:07:57,00:00:03,[],[]
1,825,1,15:23:00,00:10:00,00:00:00,00:13:00,[],[]
2,825,2,15:16:00,00:09:00,00:06:00,00:01:00,[],[]
3,825,3,15:26:34,00:14:00,00:12:01,00:00:33,[],[]
4,825,4,15:19:14,00:06:00,00:12:14,00:01:00,[],[]
...,...,...,...,...,...,...,...,...
2108,9,4,15:46:48,00:03:00,00:43:48,00:00:00,[],[]
2109,9,5,15:25:35,00:05:00,00:20:35,00:00:00,[],[]
2110,9,6,15:18:00,00:07:00,00:08:06,00:02:54,[],[]
2111,9,7,16:31:19,00:02:00,01:09:56,00:19:23,[],[]


#### Example of the bag/labels for a given stop

In [95]:
labels[labels["stop_id"] == "825"].sort_values("arrival_time")

Unnamed: 0,stop_id,label_id,arrival_time,travel_time,walking_time,waiting_time,stops,trips
15,825,15,15:15:00,00:14:00,00:00:00,00:01:00,[],[]
2,825,2,15:16:00,00:09:00,00:06:00,00:01:00,[],[]
22,825,22,15:17:34,00:11:00,00:05:34,00:01:00,[],[]
10,825,10,15:18:25,00:08:00,00:09:25,00:01:00,[],[]
4,825,4,15:19:14,00:06:00,00:12:14,00:01:00,[],[]
6,825,6,15:22:10,00:05:00,00:16:10,00:01:00,[],[]
17,825,17,15:22:48,00:05:00,00:17:48,00:00:00,[],[]
1,825,1,15:23:00,00:10:00,00:00:00,00:13:00,[],[]
8,825,8,15:24:51,00:01:00,00:23:51,00:00:00,[],[]
19,825,19,15:25:00,00:12:00,00:12:27,00:00:33,[],[]


## Parse time columns

In [58]:
time_cols = ["arrival_time", "travel_time", "walking_time", "waiting_time"]
for col in time_cols:
	labels[col] = labels[col].apply(pd.to_timedelta)

In [59]:
labels.describe()

Unnamed: 0,label_id,arrival_time,travel_time,walking_time,waiting_time
count,2113.0,2113,2113,2113,2113
mean,14.607194,0 days 15:44:13.491717936,0 days 00:13:54.150496923,0 days 00:19:17.103170847,0 days 00:11:02.238050165
std,11.238925,0 days 00:54:49.496841569,0 days 00:08:47.054287202,0 days 00:14:29.502372460,0 days 00:55:39.806493276
min,0.0,0 days 15:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
25%,5.0,0 days 15:27:04,0 days 00:08:00,0 days 00:08:06,0 days 00:00:06
50%,12.0,0 days 15:35:54,0 days 00:13:00,0 days 00:16:09,0 days 00:01:00
75%,22.0,0 days 15:47:32,0 days 00:17:00,0 days 00:27:07,0 days 00:05:33
max,54.0,1 days 01:06:00,0 days 00:46:00,0 days 01:26:46,0 days 09:53:00


## Map

In [60]:
stops_df = storage.read_df('../data/cleaned/stops.csv')

In [88]:
latest_arrival_time = strtime.str_time_to_seconds("15:30:00")
max_walking_time = strtime.str_time_to_seconds("00:30:00")
max_waiting_time = strtime.str_time_to_seconds("00:03:00")
max_travel_time = strtime.str_time_to_seconds("11:00:00")

In [89]:
bags_filtered = {
    stop_id: [
        label
        for label in b
        if strtime.str_time_to_seconds(label["arrival_time"]) <= latest_arrival_time
        and strtime.str_time_to_seconds(label["walking_time"]) <= max_walking_time
        and strtime.str_time_to_seconds(label["waiting_time"]) <= max_waiting_time
        and strtime.str_time_to_seconds(label["travel_time"]) <= max_travel_time
    ]
    for stop_id, b in bags.items()
}


In [90]:
sample_stop = stops_df.sample(1)

m = folium.Map(location=[sample_stop.stop_lat, sample_stop.stop_lon], zoom_start=12)
for i, row in stops_df.iterrows():
    l = bags_filtered[row.stop_id]
    color = "green" if len(l) else "red"
    folium.CircleMarker(
        location=[row.stop_lat, row.stop_lon],
        radius=1,
        color=color,
        popup=f"{row.stop_name} ({row.stop_id}) n_labels:{len(l)}",
    ).add_to(m)
m

  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]


In [91]:
bags_filtered["769"]

[{'arrival_time': '15:29:48',
  'travel_time': '00:09:00',
  'walking_time': '00:19:48',
  'waiting_time': '00:01:00'},
 {'arrival_time': '15:29:17',
  'travel_time': '00:15:00',
  'walking_time': '00:12:23',
  'waiting_time': '00:01:54'},
 {'arrival_time': '15:29:08',
  'travel_time': '00:13:00',
  'walking_time': '00:14:14',
  'waiting_time': '00:01:54'},
 {'arrival_time': '15:29:08',
  'travel_time': '00:12:00',
  'walking_time': '00:16:21',
  'waiting_time': '00:00:47'},
 {'arrival_time': '15:29:17',
  'travel_time': '00:16:00',
  'walking_time': '00:10:44',
  'waiting_time': '00:02:33'},
 {'arrival_time': '15:29:17',
  'travel_time': '00:14:00',
  'walking_time': '00:14:30',
  'waiting_time': '00:00:47'},
 {'arrival_time': '15:29:08',
  'travel_time': '00:14:00',
  'walking_time': '00:12:35',
  'waiting_time': '00:02:33'}]