In [1]:
%load_ext autoreload
%autoreload 2
# add . to module name
import sys
sys.path.append('../src/')

In [2]:
from package.logger import Timed, rlog, setup
from package import storage
setup("INFO")

In [3]:
from mcr_py import run_mlc_with_bags, GraphCache

In [4]:
from copy import deepcopy
from typing import Any, Tuple

import pandas as pd
import geopandas as gpd

import mcr_py
from mcr_py import GraphCache
import pyrosm
from package import storage, strtime
from package.logger import Timed
from package.mcr.label import McRAPTORLabel, merge_intermediate_bags
from package.mcr.path import PathManager, PathType
from package.osm import osm, graph
from package.raptor.mcraptor_single import McRaptorSingle
from package.raptor.bag import Bag
from package.mcr.bag import (
    convert_mc_raptor_bags_to_intermediate_bags,
    convert_mlc_bags_to_intermediate_bags,
)



# ACCURACY = 1
# ACCURACY_MULTIPLIER = 10 ** (ACCURACY - 1)

# AVG_WALKING_SPEED = 1.4  # m/s
# AVG_BIKING_SPEED = 4.0  # m/s


In [5]:
# stops_path = "../data/cleaned/stops.csv"
# city_id = "Koeln"
# osm_path = ""
# structs="../data/structs.pkl"

In [6]:
path_source = "/home/moritz//dev/uni/mcr-py/data/bags.pkl"

data = storage.read_any_dict(path_source)
path_manager = data["path_manager"]
node_map = data["multi_modal_node_to_resetted_map"]
walking_node_map = data["walking_node_to_resetted_map"]
reverse_node_map = {v: k for k, v in node_map.items()}
reverse_walking_node_map = {v: k for k, v in walking_node_map.items()}
stops_df = data["stops_df"]
bags_i = data["bags_i"]


In [7]:
import os
import folium
from package.mcr.path import PathType

In [8]:
city_id = "Koeln"
stops_path = "../data/cleaned/stops.csv"
osm_path = osm.get_osm_path_from_city_id(city_id)

with Timed.info("Reading stops"):
	other_stops_df = storage.read_gdf(stops_path)

if not os.path.exists(osm_path) and city_id:
	rlog.info("Downloading OSM data")
	osm.download_city(city_id, osm_path)
else:
	rlog.info("Using existing OSM data")

osm_reader = osm.new_osm_reader(osm_path)

with Timed.info("Getting OSM graph"):
	nodes, edges = osm.get_graph_for_city_cropped_to_stops(osm_reader, other_stops_df)

In [9]:
labels = pd.DataFrame(
    [
        (label.node_id, label.values[0], label.values[1], n_transfers)
        for n_transfers, bags in bags_i.items()
        for bag in bags.values()
        for label in bag
    ],
    columns=["osm_node_id", "time", "cost", "n_transfers"],
)

labels["human_readable_time"] = labels["time"].apply(strtime.seconds_to_str_time)
labels


Unnamed: 0,osm_node_id,time,cost,n_transfers,human_readable_time
0,6170722754,33703,0,0,09:21:43
1,1310404123,35840,0,0,09:57:20
2,2850273318,33956,0,0,09:25:56
3,5826954535,34143,0,0,09:29:03
4,3138821907,29850,0,0,08:17:30
...,...,...,...,...,...
696105,1039560883,30004,0,2,08:20:04
696106,1039560883,29965,1,2,08:19:25
696107,8737836610,31026,0,2,08:37:06
696108,8737836610,30789,2,2,08:33:09


In [10]:
labels[["time", "cost", "n_transfers"]].describe()

Unnamed: 0,time,cost,n_transfers
count,696110.0,696110.0,696110.0
mean,31121.528474,0.705413,1.251251
std,1247.6651,0.869672,0.738853
min,28800.0,0.0,0.0
25%,30292.0,0.0,1.0
50%,30840.0,0.0,1.0
75%,31604.0,1.0,2.0
max,37161.0,3.0,2.0


In [11]:
max_time = strtime.str_time_to_seconds("09:00:00")
max_cost = 10
max_transfers = 10

valid_labels = labels[
	(labels["time"] <= max_time)
	& (labels["cost"] <= max_cost)
	& (labels["n_transfers"] <= max_transfers)
]

In [12]:
selection = valid_labels.sample(min(1000, len(valid_labels)))
# selection = valid_labels

max_cost = selection.cost.max()

from typing import Dict
from colorsys import hls_to_rgb

def hsl_to_hex(h: float, l: float, s: float) -> str:
    r, g, b = [int(x * 255.0) for x in hls_to_rgb(h, l, s)]
    return "#{:02x}{:02x}{:02x}".format(r, g, b)

def generate_colors_hsl(max_cost: float) -> Dict[int, str]:
    if max_cost == 0:
        return {0: "#ffcccc"}
    return {
        cost: hsl_to_hex(h=0, l=0.5 + 0.4 * (max_cost - cost) / max_cost, s=0.8)
        for cost in range(int(max_cost) + 1)
    }

colors = generate_colors_hsl(max_cost)

In [13]:
start_node_id = labels[labels.time == labels.time.min()].iloc[0].osm_node_id


In [14]:
nodes_by_id = nodes.set_index("id")
nodes_by_id["id"] = nodes_by_id.index

start_node = nodes_by_id.loc[start_node_id]

m = folium.Map(location=[start_node.lat, start_node.lon], zoom_start=13)

for label in selection.itertuples():
	node = nodes_by_id.loc[label.osm_node_id]
	folium.CircleMarker(
		location=[node.lat, node.lon],
		radius=3,
		weight=1,
		fill=True,
		fill_color=colors[label.cost],
		fill_opacity=1,
		color="black",
		popup=f"{label.human_readable_time} ({label.cost})",
	).add_to(m)

folium.CircleMarker(
	location=[start_node.lat, start_node.lon],
	radius=5,
	weight=1,
	fill=True,
	fill_color="green",
	fill_opacity=1,
	color="black",
	popup=f"Start",
).add_to(m)

m

In [15]:
translator_map = {
    PathType.WALKING: reverse_walking_node_map,
    PathType.CYCLING_WALKING: reverse_node_map,
    PathType.PUBLIC_TRANSPORT: None,
}
no_prefix_reverse_walking_node_map = {
    k: int(v[1:]) for k, v in reverse_walking_node_map.items()
}
no_prefix_reverse_node_map = {k: int(v[1:]) for k, v in reverse_node_map.items()}
no_prefix_translator_map = {
    PathType.WALKING: no_prefix_reverse_walking_node_map,
    PathType.CYCLING_WALKING: no_prefix_reverse_node_map,
    PathType.PUBLIC_TRANSPORT: None,
}


In [16]:
from package.mcr.path import Path, GTFSPath

In [17]:
stops_df['stop_id'] = stops_df['stop_id'].astype(int)
stops_by_id = stops_df.set_index('stop_id')
stops_by_id.head(2)

Unnamed: 0_level_0,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,geometry,nearest_node,nearest_node_dist
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,,Köln Heumarkt,,50.935705,6.959995,,,0,,,POINT (6.96000 50.93570),673702632,16.33734
2,,Köln Neumarkt,,50.93577,6.947677,,,0,,,POINT (6.94768 50.93577),443861,17.695676


In [18]:
walking_result_bags = bags_i[2]

In [19]:
walking_result_bags_flat = []
for node_id, bag in walking_result_bags.items():
	for label in bag:
		walking_result_bags_flat.append((node_id, label))

In [20]:
path_objs_with_ids = pd.Series(
    list(
        map(
            lambda x: (
                x[0],
                path_manager.reconstruct_and_translate_path_for_label(
                    x[1], no_prefix_translator_map
                ),
            ),
            walking_result_bags_flat[:10000],
        )
    )
)
node_ids, path_objs = list(zip(*path_objs_with_ids))
path_objs[:3]

([Path(path_type=PathType.WALKING, path=[394001227, 394001227, 1679917802, 4394977417, 2694582111, 4297860764, 266709751, 8805331129, 737784943, 266709711, 626052074, 3141997563, 737845925, 266710405, 2427289544, 4297860767, 2116624890, 12248559, 5107768736, 3827676841, 2269266541, 2353880933, 5538021268, 266567755, 943957938, 8789517654, 1469714740], meta={'values:': [29181, 0], 'hidden_values': []}),
  <package.mcr.path.GTFSPath at 0x7fcc6e26f340>,
  Path(path_type=PathType.WALKING, path=[2107792899, 359576, 7929046353, 7929046356, 7929046357, 7929046358, 408497683, 276659756, 3924459781, 1704324146], meta={'values:': [30626, 0], 'hidden_values': []}),
  Path(path_type=PathType.CYCLING_WALKING, path=[257100010, 1704324146, 1704324157, 2492536467, 2492536471, 1704324166, 2415878662, 2492536476, 1704324174, 2492536479, 2492536470], meta={'values:': [30658, 1], 'hidden_values': [32]}),
  Path(path_type=PathType.WALKING, path=[], meta={'values:': [30658, 1], 'hidden_values': []})],
 [Pat

In [21]:
len(walking_result_bags_flat)

299425

In [22]:
node_ids = pd.Series(node_ids)
path_objs = pd.Series(list(path_objs))

In [23]:
df = pd.DataFrame(path_objs, columns=["paths"])
df["contains_gtfs"] = df["paths"].apply(
    lambda x: any([isinstance(path, GTFSPath) for path in x])
)
df["first_path_length"] = df["paths"].apply(lambda x: len(x[0].path))
df["third_path_length"] = df["paths"].apply(lambda x: len(x[2].path))
df["fourth_path_length"] = df["paths"].apply(lambda x: len(x[3].path) if isinstance(x[3], Path) else 0)
df["fifth_path_length"] = df["paths"].apply(lambda x: len(x[4].path))
# df["sixth_path_length"] = df["paths"].apply(lambda x: len(x[5].path) if isinstance(x[5], Path) else 0)
# df["seventh_path_length"] = df["paths"].apply(lambda x: len(x[6].path))
df["path_types"] = df["paths"].apply(
    lambda x: [path.path_type if isinstance(path, Path) else "GTFS" for path in x]
)
df["node_id"] = node_ids
df.head(3)


Unnamed: 0,paths,contains_gtfs,first_path_length,third_path_length,fourth_path_length,fifth_path_length,path_types,node_id
0,"[Path(path_type=PathType.WALKING, path=[394001...",True,27,10,11,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",2492536470
1,"[Path(path_type=PathType.WALKING, path=[394001...",True,27,10,11,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",2492536470
2,"[Path(path_type=PathType.WALKING, path=[394001...",True,16,11,248,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",2492536470


In [24]:
df[df.contains_gtfs].size / df.size

0.7399

In [25]:
df[df.contains_gtfs].node_id.unique().size / df.node_id.unique().size

0.9145196243679268

In [26]:
df[df.contains_gtfs]

Unnamed: 0,paths,contains_gtfs,first_path_length,third_path_length,fourth_path_length,fifth_path_length,path_types,node_id
0,"[Path(path_type=PathType.WALKING, path=[394001...",True,27,10,11,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",2492536470
1,"[Path(path_type=PathType.WALKING, path=[394001...",True,27,10,11,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",2492536470
2,"[Path(path_type=PathType.WALKING, path=[394001...",True,16,11,248,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",2492536470
3,"[Path(path_type=PathType.WALKING, path=[394001...",True,16,11,266,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",2893101948
4,"[Path(path_type=PathType.WALKING, path=[394001...",True,16,0,0,100,"[PathType.WALKING, GTFS, PathType.WALKING, GTF...",2893101948
...,...,...,...,...,...,...,...,...
9995,"[Path(path_type=PathType.WALKING, path=[394001...",True,27,13,147,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",239776726
9996,"[Path(path_type=PathType.WALKING, path=[394001...",True,16,0,0,37,"[PathType.WALKING, GTFS, PathType.WALKING, GTF...",239776726
9997,"[Path(path_type=PathType.WALKING, path=[394001...",True,16,21,259,0,"[PathType.WALKING, GTFS, PathType.WALKING, Pat...",256572380
9998,"[Path(path_type=PathType.WALKING, path=[394001...",True,16,0,0,40,"[PathType.WALKING, GTFS, PathType.WALKING, GTF...",256572380


In [27]:
effective_by_public_transport = df[df.contains_gtfs].node_id.unique()

In [52]:
import random
i = random.sample(list(df.node_id.unique()), 2)[1]
# i = 113757
sel = [tu for tu in walking_result_bags_flat if tu[0] == i]
print(f"num GTFS paths = {df[df.node_id == i].contains_gtfs.sum()}")
print(i)

num GTFS paths = 1
4845678702


In [53]:
print("\n".join(map(str, walking_result_bags[i])))

IntermediateLabel(values=[30543, 0], hidden_values=[], path=[97037, 468739, 565604, 1027683, 1057744], node_id=4845678702)
IntermediateLabel(values=[29736, 1], hidden_values=[], path=[63415, 224478, 470173, 1027682, 1057745], node_id=4845678702)


In [54]:
from folium import plugins
from folium.plugins import HeatMap

In [55]:
start_time = strtime.str_time_to_seconds("08:00:00")

In [56]:
def format_meta(meta, previous_meta):
	values = meta["values:"]
	arrival_time = values[0]
	cost = values[1]

	if previous_meta:
		previous_values = previous_meta["values:"]
		previous_arrival_time = previous_values[0]
		previous_cost = previous_values[1]
		
		arrival_time -= previous_arrival_time
		cost -= previous_cost
	else:
		arrival_time -= start_time

	return f"{strtime.seconds_to_str_time(arrival_time)} ({cost})"

In [57]:
toloop = sel

sample_label = sel[0][1]
sample_node_id = sample_label.node_id
nodes_by_id = nodes.set_index("id")
nodes_by_id["id"] = nodes_by_id.index
sample_node = nodes_by_id.loc[sample_node_id]

m = folium.Map(location=[sample_node.lat, sample_node.lon], zoom_start=13)

for end_node_id , label in toloop:

    end_node = nodes_by_id.loc[end_node_id]

    folium.CircleMarker(
        location=[end_node.lat, end_node.lon],
        popup=f"End: {end_node_id}",
        color="red",
        radius=3,
    ).add_to(m)

    paths = path_manager.reconstruct_and_translate_path_for_label(
        label, translator_map
    )
    for i, path in enumerate(paths):
        if isinstance(path, Path):
            if path.path == []:
                continue
            cycling_path_nodes = [nodes_by_id.loc[int(node_id[1:])] for node_id in path.path if node_id[0] == "B"]
            walking_path_nodes = [nodes_by_id.loc[int(node_id[1:])] for node_id in path.path if node_id[0] == "W"]
            # print(f"cycling length: {len(cycling_path_nodes)}")
            # print(f"walking length: {len(walking_path_nodes)}\n")
            path_lat_lon = [(node.lat, node.lon) for node in cycling_path_nodes]
            previous_meta = paths[i-1].meta if i > 0 else None
            meta = format_meta(path.meta, previous_meta)
            if path_lat_lon != []:
                folium.PolyLine(path_lat_lon, color="blue", weight=2, popup=str(meta)).add_to(m)
            path_lat_lon = [(node.lat, node.lon) for node in walking_path_nodes]
            if path_lat_lon != []:
                folium.PolyLine(path_lat_lon, color="red", weight=2, popup=str(meta)).add_to(m)
        elif isinstance(path, GTFSPath):
            start_stop_id = path.start_stop_id
            end_stop_id = path.end_stop_id
            start_stop = stops_by_id.loc[start_stop_id]
            end_stop = stops_by_id.loc[end_stop_id]
            trip = path.trip_id

            path_lat_lon = [
                (float(start_stop.stop_lat), float(start_stop.stop_lon)),
                (float(end_stop.stop_lat), float(end_stop.stop_lon)),
            ]
            folium.PolyLine(
                path_lat_lon,
                color="green",
                weight=2,
                popup=f"Trip: {trip}",
            ).add_to(m)

            folium.CircleMarker(
                location=[float(start_stop.stop_lat), float(start_stop.stop_lon)],
                popup=f"Start: {start_stop.stop_name}",
                color="green",
                radius=3,
            ).add_to(m)
            folium.CircleMarker(
                location=[float(end_stop.stop_lat), float(end_stop.stop_lon)],
                popup=f"End: {end_stop.stop_name}",
                color="green",
                radius=3,
            ).add_to(m)
        else:
            raise Exception("Unknown path type")

m


In [33]:
m = folium.Map(location=[sample_node.lat, sample_node.lon], zoom_start=13)

heat_map_nodes = [nodes_by_id.loc[node_id] for node_id in effective_by_public_transport]
HeatMap(
    [(node.lat, node.lon) for node in heat_map_nodes],
).add_to(m)

for _, stop in stops_df.iterrows():
    node = nodes_by_id.loc[stop["nearest_node"]]

    folium.CircleMarker(
        location=[node.lat, node.lon],
        radius=3,
        popup=f"node_id: {node_id}",
        color="green",
    ).add_to(m)


m