In [4]:
%load_ext autoreload
%autoreload 2
# add . to module name
import sys
sys.path.append('../src/')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
from package.logger import Timed, rlog, setup
from package import storage
setup("INFO")

In [6]:
from mcr_py import run_mlc_with_bags, GraphCache

In [7]:
from copy import deepcopy
from typing import Any, Tuple

import pandas as pd
import geopandas as gpd

import mcr_py
from mcr_py import GraphCache
import pyrosm
from package import storage, strtime
from package.logger import Timed
from package.mcr.label import McRAPTORLabel, merge_intermediate_bags
from package.mcr.path import PathManager, PathType
from package.osm import osm, graph
from package.raptor.mcraptor_single import McRaptorSingle
from package.raptor.bag import Bag
from package.mcr.bag import (
    convert_mc_raptor_bags_to_intermediate_bags,
    convert_mlc_bags_to_intermediate_bags,
)



ACCURACY = 1
ACCURACY_MULTIPLIER = 10 ** (ACCURACY - 1)

AVG_WALKING_SPEED = 1.4  # m/s
AVG_BIKING_SPEED = 4.0  # m/s


In [8]:
def prefix_id(
    gdf: pd.DataFrame, prefix: str, column: str, save_old=False
) -> pd.DataFrame:
    if save_old:
        gdf[f"{column}_old"] = gdf[column]
    gdf[column] = prefix + gdf[column].astype(str)

    return gdf


def get_graph(
    osm_reader: pyrosm.OSM, stops_df: gpd.GeoDataFrame
) -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
    with Timed.info("Getting OSM graph"):
        nodes, edges = osm.get_graph_for_city_cropped_to_stops(osm_reader, stops_df)

    return nodes, edges


def mark_bicycles(nodes: pd.DataFrame) -> pd.DataFrame:
    nodes["has_bicycle"] = False
    nodes.loc[nodes.sample(100).index, "has_bicycle"] = True
    return nodes


def create_multi_modal_graph(
    nodes: pd.DataFrame, edges: pd.DataFrame
) -> dict[str, pd.DataFrame]:
    edges = add_reverse_edges(edges)

    walking_nodes = nodes.copy()
    bike_nodes = nodes.copy()
    walking_edges = edges.copy()
    bike_edges = edges.copy()

    walking_nodes = prefix_id(walking_nodes, "W", "id", save_old=True)
    bike_nodes = prefix_id(bike_nodes, "B", "id", save_old=True)

    walking_edges = prefix_id(walking_edges, "W", "u")
    walking_edges = prefix_id(walking_edges, "W", "v")
    bike_edges = prefix_id(bike_edges, "B", "u")
    bike_edges = prefix_id(bike_edges, "B", "v")

    transfer_edges = create_transfer_edges(nodes)

    walking_edges = add_travel_time(walking_edges, AVG_WALKING_SPEED)
    bike_edges = add_travel_time(bike_edges, AVG_BIKING_SPEED)

    bike_edges["travel_time_bike"] = bike_edges["travel_time"]

    edges = combine_edges(walking_edges, bike_edges, transfer_edges)
    nodes = pd.concat([walking_nodes, bike_nodes])
    return {
        "nodes": nodes,
        "edges": edges,
        "walking_nodes": walking_nodes,
        "walking_edges": walking_edges,
    }


def add_reverse_edges(edges: pd.DataFrame) -> pd.DataFrame:
    reverse_edges = edges.copy()
    reverse_edges = reverse_edges.rename(columns={"u": "v", "v": "u"})
    return pd.concat([edges, reverse_edges])



# create transfer edges from bike to walk at all nodes
def create_transfer_edges(nodes: pd.DataFrame):
    transfer_edges_values: pd.Series = nodes.apply(
        lambda x: ["B" + str(x.id), "W" + str(x.id), 0], axis=1
    )  # type: ignore
    transfer_edges = pd.DataFrame(
        transfer_edges_values.tolist(), columns=["u", "v", "length"]
    )

    return transfer_edges


def add_travel_time(edges: pd.DataFrame, speed: float) -> pd.DataFrame:
    edges["travel_time"] = edges.length / speed

    return edges


def combine_edges(
    walking_edges: pd.DataFrame,
    bike_edges: pd.DataFrame,
    transfer_edges: pd.DataFrame,
) -> pd.DataFrame:
    edges = pd.concat([walking_edges, bike_edges, transfer_edges], ignore_index=True)

    # fill travel_time for transfer edges and
    # travel_time_bike for walking and transfer edges
    edges = edges.fillna(0)

    return edges


def add_multi_modal_weights(edges: pd.DataFrame) -> pd.DataFrame:
    edges["weights"] = (
        "("
        + (edges["travel_time"].round(ACCURACY) * ACCURACY_MULTIPLIER)
        .astype(int)
        .astype(str)
        + ",0)"
    )
    edges["hidden_weights"] = (
        "("
        + (edges["travel_time_bike"].round(ACCURACY) * ACCURACY_MULTIPLIER)
        .astype(int)
        .astype(str)
        + ")"
    )

    return edges


def add_single_modal_weights(edges: pd.DataFrame) -> pd.DataFrame:
    edges["weights"] = (
        "("
        + (edges["travel_time"].round(ACCURACY) * ACCURACY_MULTIPLIER)
        .astype(int)
        .astype(str)
        + ",0)"
    )
    return edges


def reset_node_ids(
    nodes: pd.DataFrame, edges: pd.DataFrame
) -> Tuple[pd.DataFrame, pd.DataFrame, dict[Any, int]]:
    node_map = {}
    for i, node_id in enumerate(nodes.id.unique()):
        node_map[node_id] = i

    nodes["old_id"] = nodes["id"]
    nodes["id"] = nodes["id"].map(node_map)
    edges["u"] = edges["u"].map(node_map)
    edges["v"] = edges["v"].map(node_map)

    total_na = edges.isna().sum().sum() + nodes.isna().sum().sum()
    if total_na > 0:
        raise ValueError(f"Found {total_na} NaNs in graph")

    return nodes, edges, node_map


def get_reverse_map(d: dict[Any, Any]) -> dict[Any, Any]:
    return {v: k for k, v in d.items()}



In [9]:
stops_path = "../data/cleaned/stops.csv"
city_id = "Koeln"
osm_path = ""
structs="../data/structs.pkl"

structs_dict = storage.read_any_dict(structs)
with Timed.info("Reading stops"):
    stops_df = storage.read_gdf(stops_path)

with Timed.info("Preparing graphs"):
    osm_reader = osm.get_osm_reader_for_city_id_or_osm_path(city_id, osm_path)
    nodes, edges = osm.get_graph_for_city_cropped_to_stops(osm_reader, stops_df)
    nxgraph = graph.create_nx_graph(osm_reader, nodes, edges)

    nodes: pd.DataFrame = nodes[["id"]]  # type: ignore
    edges: pd.DataFrame = edges[["u", "v", "length"]]  # type: ignore

    stops_df = graph.add_nearest_node_to_stops(stops_df, nxgraph)

    stops_df["stop_id"] = stops_df["stop_id"].astype(int)
    stop_node_map = stops_df.set_index("stop_id")["nearest_node"].to_dict()
    node_stop_map = {v: k for k, v in stop_node_map.items()}

    nodes = mark_bicycles(nodes)

    graph_components = create_multi_modal_graph(nodes, edges)
    nodes, edges, walking_nodes, walking_edges = (
        graph_components["nodes"],
        graph_components["edges"],
        graph_components["walking_nodes"],
        graph_components["walking_edges"],
    )

    nodes, edges, node_map = reset_node_ids(nodes, edges)
    walking_nodes, walking_edges, walking_node_map = reset_node_ids(
        walking_nodes, walking_edges
    )
    reverse_node_map = get_reverse_map(node_map)
    reverse_walking_node_map = get_reverse_map(walking_node_map)

    edges = add_multi_modal_weights(edges)
    walking_edges = add_single_modal_weights(walking_edges)

    raw_edges = edges[["u", "v", "weights", "hidden_weights"]].to_dict("records")
    raw_walking_edges = walking_edges[["u", "v", "weights"]].to_dict("records")

bicycle_transfer_nodes_walking_node_ids = walking_nodes[
    walking_nodes["has_bicycle"]
].id.values

with Timed.info("Creating graph cache"):
    gc = GraphCache()
    gc.set_graph(raw_edges)
    walking_gc = GraphCache()
    walking_gc.set_graph(raw_walking_edges)

with Timed.info("Running Dijkstra step"):
    start_node_id = 295101994
    walking_result_bags = mcr_py.run_mlc_with_node_and_time(
        walking_gc,
        walking_node_map[f"W{start_node_id}"],
        strtime.str_time_to_seconds("08:00:00"),
    )

path_manager = PathManager()
walking_result_bags = convert_mlc_bags_to_intermediate_bags(walking_result_bags)
path_manager.extract_all_paths_from_bags(walking_result_bags, PathType.WALKING)

# translates a node id from the walking graph to the corresponding bicycle
# node id from the multi-modal graph
def translate_walking_node_id_to_bicycle_node_id(
    walking_node_id: int,
) -> int:
    original_walking_node = reverse_walking_node_map[walking_node_id]
    original_bicycle_node = original_walking_node.replace("W", "B")
    bicycle_node_id = node_map[original_bicycle_node]
    return bicycle_node_id

# filter bags at bicycle nodes
bicycle_bags = {
    node_id: bag
    for node_id, bag in walking_result_bags.items()
    if node_id in bicycle_transfer_nodes_walking_node_ids
}
# translate node ids
bicycle_bags = {
    translate_walking_node_id_to_bicycle_node_id(node_id): [
        label.to_mlc_label(translate_walking_node_id_to_bicycle_node_id(node_id))
        for label in labels
    ]
    for node_id, labels in bicycle_bags.items()
}

# validation
for node_id in bicycle_bags.keys():
    gc.validate_node_id(node_id)

bicycle_result_bags = mcr_py.run_mlc_with_bags(gc, bicycle_bags, update_label_func="next_bike_tariff")  # type: ignore
bicycle_result_bags = convert_mlc_bags_to_intermediate_bags(bicycle_result_bags)
path_manager.extract_all_paths_from_bags(
    bicycle_result_bags, PathType.CYCLING_WALKING, path_index_offset=1
)

# --- McRAPTOR step
def translate_walking_node_id_to_stop_id(walking_node_id: int) -> str | None:
    original_walking_node = reverse_walking_node_map[walking_node_id]
    original_bicycle_node = int(original_walking_node[1:])
    if original_bicycle_node in node_stop_map:
        stop_id = str(node_stop_map[original_bicycle_node])
        return stop_id
    return None

mc_raptor_bags = {
    node_id: bag
    for node_id, bag in walking_result_bags.items()
    if translate_walking_node_id_to_stop_id(node_id) is not None
}
mc_raptor_bags = {
    translate_walking_node_id_to_stop_id(node_id): Bag.from_labels(
        [
            label.to_mc_raptor_label(
                translate_walking_node_id_to_stop_id(node_id), null_cost=True  # type: ignore
            )
            for label in labels
        ]
    )
    for node_id, labels in mc_raptor_bags.items()
}

mc_raptor = McRaptorSingle(
    structs_dict,
    60,
    McRAPTORLabel,
)

mc_raptor_result_bags = mc_raptor.run(mc_raptor_bags)  # type: ignore

def translate_stop_id_to_node_id(stop_id: int) -> int:
    node_id = stop_node_map[stop_id]
    return node_map[f"W{node_id}"]

mc_raptor_result_bags = {
    translate_stop_id_to_node_id(int(stop_id)): bag
    for stop_id, bag in mc_raptor_result_bags.items()
}
mc_raptor_result_bags = convert_mc_raptor_bags_to_intermediate_bags(
    mc_raptor_result_bags,
    min_path_length=2,
)
path_manager.extract_all_paths_from_bags(
    mc_raptor_result_bags, PathType.PUBLIC_TRANSPORT, path_index_offset=1
)

combined_bags = deepcopy(
    bicycle_result_bags
)  # remove deepcopy, if you are sure, that bicycle_result_bags is not used anymore
for node_id, bag in mc_raptor_result_bags.items():
    merged_bag = (
        merge_intermediate_bags(combined_bags[node_id], bag)
        if node_id in combined_bags
        else bag
    )
    combined_bags[node_id] = merged_bag
for bag in combined_bags.values():
    # nullify hidden_values
    for label in bag:
        label.hidden_values = []

walking_result_bags_2 = mcr_py.run_mlc_with_bags(
    walking_gc,
    combined_bags,
)
walking_result_bags_2 = convert_mlc_bags_to_intermediate_bags(walking_result_bags_2)
path_manager.extract_all_paths_from_bags(
    walking_result_bags_2, PathType.WALKING, path_index_offset=2
)

storage.write_any_dict(
    {
        "walking_result_bags": walking_result_bags,
        "walking_result_bags_2": walking_result_bags_2,
        "path_manager": path_manager,
        "bicycle_result_bags": bicycle_result_bags,
        "mc_raptor_result_bags": mc_raptor_result_bags,
        "mc_raptor_bags": mc_raptor_bags,
        "combined_bags": combined_bags,
        "node_map": node_map,
        "walking_node_map": walking_node_map,
        "stops_df": stops_df,
    },
    "/home/moritz/dev/uni/mcr-py/data/bags.pkl",  # type: ignore
)


In [100]:
data = storage.read_any_dict("/home/moritz/dev/uni/mcr-py/data/bags.pkl")
walking_result_bags = data["walking_result_bags"]
walking_result_bags_2 = data["walking_result_bags_2"]
bicycle_result_bags = data["bicycle_result_bags"]
combined_bags = data["combined_bags"]
path_manager = data["path_manager"]
node_map = data["node_map"]
walking_node_map = data["walking_node_map"]
mc_raptor_bags = data["mc_raptor_bags"]
mc_raptor_result_bags = data["mc_raptor_result_bags"]
reverse_node_map = {v: k for k, v in node_map.items()}
reverse_walking_node_map = {v: k for k, v in walking_node_map.items()}
stops_df = data["stops_df"]


In [101]:
import os
import folium
from package.mcr.path import PathType

In [102]:
city_id = "Koeln"
stops_path = "../data/cleaned/stops.csv"
osm_path = osm.get_osm_path_from_city_id(city_id)

with Timed.info("Reading stops"):
	other_stops_df = storage.read_gdf(stops_path)

if not os.path.exists(osm_path) and city_id:
	rlog.info("Downloading OSM data")
	osm.download_city(city_id, osm_path)
else:
	rlog.info("Using existing OSM data")

osm_reader = osm.new_osm_reader(osm_path)

with Timed.info("Getting OSM graph"):
	nodes, edges = osm.get_graph_for_city_cropped_to_stops(osm_reader, other_stops_df)

In [12]:
translator_map = {
    PathType.WALKING: reverse_walking_node_map,
    PathType.CYCLING_WALKING: reverse_node_map,
    PathType.PUBLIC_TRANSPORT: None,
}
no_prefix_reverse_walking_node_map = {
    k: int(v[1:]) for k, v in reverse_walking_node_map.items()
}
no_prefix_reverse_node_map = {k: int(v[1:]) for k, v in reverse_node_map.items()}
no_prefix_translator_map = {
    PathType.WALKING: no_prefix_reverse_walking_node_map,
    PathType.CYCLING_WALKING: no_prefix_reverse_node_map,
    PathType.PUBLIC_TRANSPORT: None,
}


In [103]:
sample_label = list(walking_result_bags.values())[0][0]
sample_node_id = int(reverse_walking_node_map[sample_label.node_id].replace('W', ''))
nodes_by_id = nodes.set_index('id')
sample_node = nodes_by_id.loc[sample_node_id]

m = folium.Map(location=[sample_node.lat, sample_node.lon], zoom_start=15)
counter = 0
for bag in walking_result_bags.values():
	bag = walking_result_bags[i]
	if counter > 10:
		break
	for label in bag:
		counter += 1


		node_id = int(reverse_walking_node_map[label.node_id].replace('W', ''))
		node = nodes_by_id.loc[node_id]
		popup=f"node_id: {node_id}\n label: {round(label.values[0]/60, 2)}min {strtime.seconds_to_str_time(label.values[0])}"
		folium.CircleMarker(
			location=[node.lat, node.lon],
			radius=3,
			popup=popup,
			color='red',
		).add_to(m)

		# path
		paths = path_manager.reconstruct_and_translate_path_for_label(label, no_prefix_translator_map)
		for path in paths:
			path_nodes = [nodes_by_id.loc[node_id] for node_id in path.path]
			path_lat_lon = [(node.lat, node.lon) for node in path_nodes]
			folium.PolyLine(path_lat_lon, color='blue', weight=2).add_to(m)
m

In [104]:
color_map = {
	PathType.WALKING: 'red',
	PathType.CYCLING_WALKING: 'blue',
}

In [105]:
# sample_label = list(bicycle_result_bags.values())[0][0]
# sample_node_id = int(reverse_node_map[label.node_id].replace('B', '').replace('W', ''))
# nodes_by_id = nodes.set_index('id')
# sample_node = nodes_by_id.loc[sample_node_id]

# m = folium.Map(location=[sample_node.lat, sample_node.lon], zoom_start=15)
# counter = 0
# for bag in bicycle_result_bags.values():
# 	if counter > 5:
# 		break
# 	for label in bag:
# 		counter += 1


# 		node_id = int(reverse_node_map[label.node_id].replace('B', '').replace('W', ''))
# 		node = nodes_by_id.loc[node_id]
# 		popup=f"node_id: {node_id}\n label: {round(label.values[0]/60, 2)}min"
# 		folium.CircleMarker(
# 			location=[node.lat, node.lon],
# 			radius=3,
# 			popup=popup,
# 			color='red',
# 		).add_to(m)

# 		# path
# 		paths = path_manager.reconstruct_and_translate_path_for_label(label, no_prefix_translator_map)
# 		for path in paths:
# 			path_nodes = [nodes_by_id.loc[node_id] for node_id in path.path]
# 			path_lat_lon = [(node.lat, node.lon) for node in path_nodes]
# 			color = color_map[path.path_type]
# 			folium.PolyLine(path_lat_lon, color=color, weight=2).add_to(m)
# m

In [106]:
from package.mcr.path import Path, GTFSPath

In [107]:
stops_df['stop_id'] = stops_df['stop_id'].astype(int)
stops_by_id = stops_df.set_index('stop_id')
stops_by_id.head(2)

Unnamed: 0_level_0,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,geometry,nearest_node,nearest_node_dist
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,,Köln Heumarkt,,50.935705,6.959995,,,0,,,POINT (6.96000 50.93570),673702632,16.33734
2,,Köln Neumarkt,,50.93577,6.947677,,,0,,,POINT (6.94768 50.93577),443861,17.695676


In [108]:
import random
mc_raptor_bags_flat = [(key, value) for key, values in mc_raptor_result_bags.items() for value in values]
mc_raptor_bags_flat[:5]

[(13223,
  IntermediateLabel(values=[34620, 0], hidden_values=[], path=[122416, 823699], node_id=13223)),
 (321,
  IntermediateLabel(values=[34320, 0], hidden_values=[], path=[122416, 823700], node_id=321)),
 (39347,
  IntermediateLabel(values=[31560, 0], hidden_values=[], path=[38789, 823701], node_id=39347)),
 (110788,
  IntermediateLabel(values=[34500, 0], hidden_values=[], path=[49354, 823702], node_id=110788)),
 (30799,
  IntermediateLabel(values=[30540, 0], hidden_values=[], path=[67316, 823703], node_id=30799))]

In [109]:
# sample_label = list(mc_raptor_result_bags.values())[0][0]
# sample_node_id = int(reverse_node_map[sample_label.node_id].replace("B", "").replace("W", ""))
# nodes_by_id = nodes.set_index("id")
# sample_node = nodes_by_id.loc[sample_node_id]

# m = folium.Map(location=[sample_node.lat, sample_node.lon], zoom_start=15)
# counter = 0

# for end_stop_id, label in random.sample(mc_raptor_bags_flat, 5):
#     paths = path_manager.reconstruct_and_translate_path_for_label(
#         label, no_prefix_translator_map
#     )
#     for path in paths:
#         if isinstance(path, Path):
#             path_nodes = [nodes_by_id.loc[node_id] for node_id in path.path]
#             path_lat_lon = [(node.lat, node.lon) for node in path_nodes]
#             color = color_map[path.path_type]
#             folium.PolyLine(path_lat_lon, color=color, weight=2).add_to(m)
#         elif isinstance(path, GTFSPath):
#             start_stop_id = path.start_stop_id
#             end_stop_id = path.end_stop_id
#             start_stop = stops_by_id.loc[start_stop_id]
#             end_stop = stops_by_id.loc[end_stop_id]
#             trip = path.trip_id

#             path_lat_lon = [(float(start_stop.stop_lat), float(start_stop.stop_lon)), (float(end_stop.stop_lat), float(end_stop.stop_lon))]
#             folium.PolyLine(
#                 path_lat_lon,
#                 color="green",
#                 weight=2,
#                 popup=f"Trip: {trip}",
#             ).add_to(m)

#             folium.CircleMarker(
#                 location=[float(start_stop.stop_lat), float(start_stop.stop_lon)],
#                 popup=f"Start: {start_stop.stop_name}",
#                 color="green",
#                 radius=3,
#             ).add_to(m)
#             folium.CircleMarker(
#                 location=[float(end_stop.stop_lat), float(end_stop.stop_lon)],
#                 popup=f"End: {end_stop.stop_name}",
#                 color="green",
#                 radius=3,
#             ).add_to(m)
# m


In [110]:
combined_bags_flat = [
    (key, value) for key, values in combined_bags.items() for value in values
]
walking_result_bags_2_flat = [
    (key, value)
    for key, values in walking_result_bags_2.items()
    for value in values
]
combined_bags_flat[:5]

[(163791,
  IntermediateLabel(values=[37642, 0], hidden_values=[], path=[80093, 123773], node_id=163791)),
 (163791,
  IntermediateLabel(values=[32745, 4], hidden_values=[], path=[105833, 123774], node_id=163791)),
 (163791,
  IntermediateLabel(values=[34122, 3], hidden_values=[], path=[114447, 123775], node_id=163791)),
 (163791,
  IntermediateLabel(values=[35505, 2], hidden_values=[], path=[100076, 123776], node_id=163791)),
 (240975,
  IntermediateLabel(values=[31745, 3], hidden_values=[], path=[92734, 123777], node_id=240975))]

In [111]:
end_stop_selection = random.sample(list(mc_raptor_result_bags.keys()), 5)
end_stop_selection

[90967, 49348, 6339, 88970, 26534]

In [112]:
from package import strtime
strtime.seconds_to_str_time(30240)

'08:24:00'

In [113]:
end_stop = end_stop_selection[4]
print(end_stop)
print(mc_raptor_result_bags[end_stop])
print(bicycle_result_bags[end_stop])
print(combined_bags[end_stop])

26534
[IntermediateLabel(values=[31740, 0], hidden_values=[], path=[38444, 823800], node_id=26534)]
[IntermediateLabel(values=[34038, 0], hidden_values=[595], path=[114447, 757258], node_id=26534), IntermediateLabel(values=[31815, 2], hidden_values=[1691], path=[105833, 757259], node_id=26534), IntermediateLabel(values=[32834, 1], hidden_values=[1197], path=[105833, 757260], node_id=26534)]
[IntermediateLabel(values=[31740, 0], hidden_values=[], path=[38444, 823800], node_id=26534)]


In [114]:
selection = [
    tu
    for tu in combined_bags_flat
    if tu[0] in end_stop_selection
]
selection

[(88970,
  IntermediateLabel(values=[29880, 0], hidden_values=[], path=[20701, 823876], node_id=88970)),
 (6339,
  IntermediateLabel(values=[30757, 0], hidden_values=[], path=[105833, 513819], node_id=6339)),
 (6339,
  IntermediateLabel(values=[30648, 1], hidden_values=[], path=[92734, 513820], node_id=6339)),
 (26534,
  IntermediateLabel(values=[31740, 0], hidden_values=[], path=[38444, 823800], node_id=26534)),
 (49348,
  IntermediateLabel(values=[31807, 1], hidden_values=[], path=[13069, 790343], node_id=49348)),
 (49348,
  IntermediateLabel(values=[32694, 0], hidden_values=[], path=[13069, 790344], node_id=49348)),
 (49348,
  IntermediateLabel(values=[31534, 3], hidden_values=[], path=[92734, 790345], node_id=49348)),
 (90967,
  IntermediateLabel(values=[30300, 0], hidden_values=[], path=[67316, 823869], node_id=90967))]

In [115]:
path_objs_with_ids = pd.Series(
    list(
        map(
            lambda x: (
                x[0],
                path_manager.reconstruct_and_translate_path_for_label(
                    x[1], no_prefix_translator_map
                ),
            ),
            walking_result_bags_2_flat[:100000],
        )
    )
)
node_ids, path_objs = list(zip(*path_objs_with_ids))
path_objs[:3]

([<package.mcr.path.Path at 0x7f842fb2c670>,
  <package.mcr.path.Path at 0x7f842fb2c940>,
  <package.mcr.path.Path at 0x7f842fb2cf10>],
 [<package.mcr.path.Path at 0x7f842fb2c5e0>,
  <package.mcr.path.Path at 0x7f842fb2cc10>,
  <package.mcr.path.Path at 0x7f842fb2cc70>],
 [<package.mcr.path.Path at 0x7f842fb2cdf0>,
  <package.mcr.path.Path at 0x7f842fb2c4c0>,
  <package.mcr.path.Path at 0x7f842fb2c9d0>])

In [116]:
len(walking_result_bags_2_flat)

577330

In [117]:
node_ids = pd.Series(node_ids)
path_objs = pd.Series(list(path_objs))

In [118]:
df = pd.DataFrame(path_objs, columns=['paths'])
df["contains_gtfs"] = df["paths"].apply(lambda x: str(type(x[1]))).str.contains("GTFS")
df["third_path_length"] = df["paths"].apply(lambda x: len(x[2].path))
df['node_id'] = node_ids
df.head(3)

Unnamed: 0,paths,contains_gtfs,third_path_length,node_id
0,[<package.mcr.path.Path object at 0x7f842fb2c6...,False,0,140120
1,[<package.mcr.path.Path object at 0x7f842fb2c5...,False,0,140120
2,[<package.mcr.path.Path object at 0x7f842fb2cd...,False,0,50037


In [119]:
df[(~df.contains_gtfs & df.third_path_length != 0)].size

0

In [120]:
df[df.contains_gtfs].size / df.size

0.16437

In [121]:
df[df.contains_gtfs]

Unnamed: 0,paths,contains_gtfs,third_path_length,node_id
3,[<package.mcr.path.Path object at 0x7f842fb2c1...,True,143,50037
12,[<package.mcr.path.Path object at 0x7f842fb2d7...,True,51,73315
18,[<package.mcr.path.Path object at 0x7f842fb2de...,True,63,116247
20,[<package.mcr.path.Path object at 0x7f842fb2e0...,True,46,62870
28,[<package.mcr.path.Path object at 0x7f842fb2e9...,True,46,51768
...,...,...,...,...
99956,[<package.mcr.path.Path object at 0x7f8032f87c...,True,57,53622
99966,[<package.mcr.path.Path object at 0x7f8032e5c7...,True,198,65088
99967,[<package.mcr.path.Path object at 0x7f8032e5c8...,True,73,19681
99968,[<package.mcr.path.Path object at 0x7f8032e5c9...,True,66,58569


In [123]:
effective_by_public_transport = df[df.contains_gtfs].node_id.unique()


In [130]:
import random
i = random.sample(list(walking_result_bags_2.keys()),1)[0]
# i = 113757
print(i)

3959


In [131]:
te = walking_result_bags[i]
te

[IntermediateLabel(values=[38085], hidden_values=[], path=[95765], node_id=3959)]

In [132]:
sel = [tu for tu in walking_result_bags_2_flat if tu[0] == i]
sel

[(3959,
  IntermediateLabel(values=[34543, 0], hidden_values=[], path=[38444, 823826, 1231859], node_id=3959)),
 (3959,
  IntermediateLabel(values=[34487, 2], hidden_values=[], path=[114447, 721107, 1231860], node_id=3959)),
 (3959,
  IntermediateLabel(values=[33481, 3], hidden_values=[], path=[105833, 721106, 1231861], node_id=3959)),
 (3959,
  IntermediateLabel(values=[32816, 4], hidden_values=[], path=[105833, 721109, 1231862], node_id=3959))]

In [133]:
print("\n".join(map(str, combined_bags[i])))
print()
print("\n".join(map(str, walking_result_bags_2[i])))

IntermediateLabel(values=[33481, 3], hidden_values=[], path=[105833, 721106], node_id=3959)
IntermediateLabel(values=[34487, 2], hidden_values=[], path=[114447, 721107], node_id=3959)
IntermediateLabel(values=[35703, 1], hidden_values=[], path=[114447, 721108], node_id=3959)
IntermediateLabel(values=[32816, 4], hidden_values=[], path=[105833, 721109], node_id=3959)
IntermediateLabel(values=[36919, 0], hidden_values=[], path=[114447, 721110], node_id=3959)

IntermediateLabel(values=[34543, 0], hidden_values=[], path=[38444, 823826, 1231859], node_id=3959)
IntermediateLabel(values=[34487, 2], hidden_values=[], path=[114447, 721107, 1231860], node_id=3959)
IntermediateLabel(values=[33481, 3], hidden_values=[], path=[105833, 721106, 1231861], node_id=3959)
IntermediateLabel(values=[32816, 4], hidden_values=[], path=[105833, 721109, 1231862], node_id=3959)


In [134]:
from folium import plugins
from folium.plugins import HeatMap

In [135]:
toloop = sel

sample_label = sel[0][1]
sample_node_id = int(
    reverse_node_map[sample_label.node_id].replace("B", "").replace("W", "")
)
nodes_by_id = nodes.set_index("id")
nodes_by_id["id"] = nodes_by_id.index
sample_node = nodes_by_id.loc[sample_node_id]

m = folium.Map(location=[sample_node.lat, sample_node.lon], zoom_start=13)

# for end_stop_id, label in random.sample(combined_bags_flat, 100):
# for end_stop_id, label in random.sample(walking_result_bags_2_flat, 1000):
# for end_stop_node_id, label in selection:
for end_node_id , label in toloop:

    end_node = nodes_by_id.loc[int(reverse_node_map[end_node_id][1:])]

    folium.CircleMarker(
        location=[end_node.lat, end_node.lon],
        popup=f"End: {end_node_id}",
        color="red",
        radius=3,
    ).add_to(m)

    paths = path_manager.reconstruct_and_translate_path_for_label(
        label, translator_map
    )
    for path in paths:
        if isinstance(path, Path):
            if path.path == []:
                continue
            cycling_path_nodes = [nodes_by_id.loc[int(node_id[1:])] for node_id in path.path if node_id[0] == "B"]
            walking_path_nodes = [nodes_by_id.loc[int(node_id[1:])] for node_id in path.path if node_id[0] == "W"]
            # print(f"cycling length: {len(cycling_path_nodes)}")
            # print(f"walking length: {len(walking_path_nodes)}\n")
            path_lat_lon = [(node.lat, node.lon) for node in cycling_path_nodes]
            if path_lat_lon != []:
                folium.PolyLine(path_lat_lon, color="blue", weight=2).add_to(m)
            path_lat_lon = [(node.lat, node.lon) for node in walking_path_nodes]
            if path_lat_lon != []:
                folium.PolyLine(path_lat_lon, color="red", weight=2).add_to(m)
        elif isinstance(path, GTFSPath):
            start_stop_id = path.start_stop_id
            end_stop_id = path.end_stop_id
            start_stop = stops_by_id.loc[start_stop_id]
            end_stop = stops_by_id.loc[end_stop_id]
            trip = path.trip_id

            path_lat_lon = [
                (float(start_stop.stop_lat), float(start_stop.stop_lon)),
                (float(end_stop.stop_lat), float(end_stop.stop_lon)),
            ]
            folium.PolyLine(
                path_lat_lon,
                color="green",
                weight=2,
                popup=f"Trip: {trip}",
            ).add_to(m)

            folium.CircleMarker(
                location=[float(start_stop.stop_lat), float(start_stop.stop_lon)],
                popup=f"Start: {start_stop.stop_name}",
                color="green",
                radius=3,
            ).add_to(m)
            folium.CircleMarker(
                location=[float(end_stop.stop_lat), float(end_stop.stop_lon)],
                popup=f"End: {end_stop.stop_name}",
                color="green",
                radius=3,
            ).add_to(m)


for _, stop in stops_df.iterrows():
    node = nodes_by_id.loc[stop["nearest_node"]]

    folium.CircleMarker(
        location=[node.lat, node.lon],
        radius=3,
        popup=f"node_id: {node_id}",
        color="green",
    ).add_to(m)

# for node_id in effective_by_public_transport:

#     node = nodes_by_id.loc[no_prefix_reverse_node_map[node_id]]
#     folium.CircleMarker(
#         location=[node.lat, node.lon],
#         radius=1,
#         popup=f"node_id: {node_id}",
#         color="red",
#     ).add_to(m)

# heat_map_nodes = [nodes_by_id.loc[no_prefix_reverse_node_map[node_id]] for node_id in effective_by_public_transport]
# HeatMap(
#     [(node.lat, node.lon) for node in heat_map_nodes],
# ).add_to(m)

m