In [1]:
%load_ext autoreload
%autoreload 2
# add . to module name
import sys
sys.path.append('../src/')

In [5]:
from package.logger import Timed, rlog, setup
setup("INFO")


from package import storage
import pandas as pd
from package import strtime
import folium
import os
from package.osm import osm
from package.geometa import GeoMeta

In [7]:
city_id = "Koeln"
stops_path = "../data/cleaned/stops.csv"
osm_path = osm.get_osm_path_from_city_id(city_id)


geo_meta = GeoMeta.load("../data/geometa.pkl")

with Timed.info("Reading stops"):
	other_stops_df = storage.read_gdf(stops_path)

if not os.path.exists(osm_path) and city_id:
	rlog.info("Downloading OSM data")
	osm.download_city(city_id, osm_path)
else:
	rlog.info("Using existing OSM data")

osm_reader = osm.new_osm_reader(osm_path)

with Timed.info("Getting OSM graph"):
	nodes, edges = osm.get_graph_for_city_cropped_to_boundary(osm_reader, geo_meta)

In [9]:
path_source = "/home/moritz/dev/uni/mcr-py/data/bags.pkl"

data = storage.read_any_dict(path_source)
path_manager = data["path_manager"]
node_map = data["multi_modal_node_to_resetted_map"]
walking_node_map = data["walking_node_to_resetted_map"]
reverse_node_map = {v: k for k, v in node_map.items()}
reverse_walking_node_map = {v: k for k, v in walking_node_map.items()}
stops_df = data["stops_df"]
bags_i = data["bags_i"]

In [None]:
labels = pd.DataFrame(
    [
        (label.node_id, label.values[0], label.values[1], n_transfers)
        for n_transfers, bags in bags_i.items()
        for bag in bags.values()
        for label in bag
    ],
    columns=["osm_node_id", "time", "cost", "n_transfers"],
)

labels["human_readable_time"] = labels["time"].apply(strtime.seconds_to_str_time)
labels

Unnamed: 0,osm_node_id,time,cost,n_transfers,human_readable_time
0,79740953,33733,0,0,09:22:13
1,9036315859,36594,0,0,10:09:54
2,1595145824,35673,0,0,09:54:33
3,1343923548,37480,0,0,10:24:40
4,987635366,34074,0,0,09:27:54
...,...,...,...,...,...
1051297,5922655627,33791,200,2,09:23:11
1051298,5922655627,31765,620,2,08:49:25
1051299,5922655627,31895,400,2,08:51:35
1051300,5922655627,35638,100,2,09:53:58


In [None]:
max_time = strtime.str_time_to_seconds("09:00:00")
max_cost = 1000
max_transfers = 10

valid_labels = labels[
	(labels["time"] <= max_time)
	& (labels["cost"] <= max_cost)
	& (labels["n_transfers"] <= max_transfers)
]

In [None]:
selection = valid_labels.sample(min(1000, len(valid_labels)))
# selection = valid_labels

max_cost = selection.cost.max()

from typing import Dict
from colorsys import hls_to_rgb

def hsl_to_hex(h: float, l: float, s: float) -> str:
    r, g, b = [int(x * 255.0) for x in hls_to_rgb(h, l, s)]
    return "#{:02x}{:02x}{:02x}".format(r, g, b)

def generate_colors_hsl(max_cost: float) -> Dict[int, str]:
    if max_cost == 0:
        return {0: "#ffcccc"}
    return {
        cost: hsl_to_hex(h=0, l=0.5 + 0.4 * (max_cost - cost) / max_cost, s=0.8)
        for cost in range(int(max_cost) + 1)
    }

colors = generate_colors_hsl(max_cost)

In [None]:
start_node_id = labels[labels.time == labels.time.min()].iloc[0].osm_node_id

In [None]:
nodes_by_id = nodes.set_index("id")
nodes_by_id["id"] = nodes_by_id.index

start_node = nodes_by_id.loc[start_node_id]

m = folium.Map(location=[start_node.lat, start_node.lon], zoom_start=13)

for label in selection.itertuples():
	node = nodes_by_id.loc[label.osm_node_id]
	folium.CircleMarker(
		location=[node.lat, node.lon],
		radius=3,
		weight=1,
		fill=True,
		fill_color=colors[label.cost],
		fill_opacity=1,
		color="black",
		popup=f"{label.human_readable_time} ({label.cost})",
	).add_to(m)

folium.CircleMarker(
	location=[start_node.lat, start_node.lon],
	radius=5,
	weight=1,
	fill=True,
	fill_color="green",
	fill_opacity=1,
	color="black",
	popup=f"Start",
).add_to(m)

m

In [None]:
from package.mcr.path import Path, GTFSPath, PathType

translator_map = {
    PathType.WALKING: reverse_walking_node_map,
    PathType.CYCLING_WALKING: reverse_node_map,
    PathType.PUBLIC_TRANSPORT: None,
}
no_prefix_reverse_walking_node_map = {
    k: int(v[1:]) for k, v in reverse_walking_node_map.items()
}
no_prefix_reverse_node_map = {k: int(v[1:]) for k, v in reverse_node_map.items()}
no_prefix_translator_map = {
    PathType.WALKING: no_prefix_reverse_walking_node_map,
    PathType.CYCLING_WALKING: no_prefix_reverse_node_map,
    PathType.PUBLIC_TRANSPORT: None,
}

In [None]:
walking_result_bags = bags_i[2]

In [None]:
walking_result_bags_flat = []
for node_id, bag in walking_result_bags.items():
	for label in bag:
		walking_result_bags_flat.append((node_id, label))

In [None]:
path_objs_with_ids = pd.Series(
    list(
        map(
            lambda x: (
                x[0],
                path_manager.reconstruct_and_translate_path_for_label(
                    x[1], no_prefix_translator_map
                ),
            ),
            walking_result_bags_flat[:10000],
        )
    )
)
node_ids, path_objs = list(zip(*path_objs_with_ids))
path_objs[:3]

([Path(path_type=PathType.WALKING, path=[1638242852, 1638242852, 603686447, 1314961973, 300424402, 582726945, 603686462, 603686461, 603686463, 603686469, 582726943, 603686465, 1314962008, 300424401, 1638243018, 603686484, 1638243080, 300424397, 300424396, 1314961967, 1638243161, 700335900, 1638243185, 1314961989, 1638243222, 300424395, 1638243283, 1638243293, 1638243296, 1638243302, 1638243303, 447910471, 447910473, 447910444, 1638243336, 1638243355], meta={'values': [29066, 0], 'hidden_values': [0, 0]}),
  <package.mcr.path.GTFSPath at 0x7fe7c24ceef0>,
  Path(path_type=PathType.WALKING, path=[2283344073, 1349256466, 32356576, 8544537537, 8544537538, 6249003758, 368834], meta={'values': [29932, 320], 'hidden_values': [0, 6]}),
  Path(path_type=PathType.CYCLING_WALKING, path=[6407729283, 368834, 6249003758, 8544537538, 8544537537, 32356576, 3267056899, 254372621, 32356644, 2348391750, 2348391751, 359658, 273219076, 359656, 359659, 2140214877, 8544537540, 1093872884, 359651, 2671442578, 

In [None]:
# df = pd.DataFrame(path_objs, columns=["paths"])
# df["contains_gtfs"] = df["paths"].apply(
#     lambda x: any([isinstance(path, GTFSPath) for path in x])
# )
# df["first_path_length"] = df["paths"].apply(lambda x: len(x[0].path))
# df["third_path_length"] = df["paths"].apply(lambda x: len(x[2].path))
# df["fourth_path_length"] = df["paths"].apply(lambda x: len(x[3].path) if isinstance(x[3], Path) else 0)
# df["fifth_path_length"] = df["paths"].apply(lambda x: len(x[4].path))
# # df["sixth_path_length"] = df["paths"].apply(lambda x: len(x[5].path) if isinstance(x[5], Path) else 0)
# # df["seventh_path_length"] = df["paths"].apply(lambda x: len(x[6].path))
# df["path_types"] = df["paths"].apply(
#     lambda x: [path.path_type if isinstance(path, Path) else "GTFS" for path in x]
# )
# df["node_id"] = node_ids
# df.head(3)

In [None]:
import random
i = random.sample(list(nodes.id.unique()), 1)[0]
i = 3922625314
print(i)
sel = [tu for tu in walking_result_bags_flat if tu[0] == i]
sel

3922625314


[(3922625314,
  IntermediateLabel(values=[30111, 200], hidden_values=[0, 0], path=[73655, 198329, 582508, 1008902, 1605747], node_id=3922625314)),
 (3922625314,
  IntermediateLabel(values=[30569, 100], hidden_values=[0, 0], path=[73655, 169304, 628642, 1008900, 1605748], node_id=3922625314)),
 (3922625314,
  IntermediateLabel(values=[31935, 0], hidden_values=[0, 0], path=[69998, 198330, 582507, 1008901, 1605749], node_id=3922625314))]

In [None]:
from folium import plugins
from folium.plugins import HeatMap

In [None]:
start_time = strtime.str_time_to_seconds("08:00:00")

In [None]:
def format_meta(meta, previous_meta):
	values = meta["values"]
	arrival_time = values[0]
	cost = values[1]

	if previous_meta:
		previous_values = previous_meta["values"]
		previous_arrival_time = previous_values[0]
		previous_cost = previous_values[1]
		
		arrival_time -= previous_arrival_time
		cost -= previous_cost
	else:
		arrival_time -= start_time

	return f"{strtime.seconds_to_str_time(arrival_time)} ({cost})"

In [None]:
toloop = sel

stops_by_id = stops_df.set_index("stop_id")
sample_label = sel[0][1]
sample_node_id = sample_label.node_id
nodes_by_id = nodes.set_index("id")
nodes_by_id["id"] = nodes_by_id.index
sample_node = nodes_by_id.loc[sample_node_id]

m = folium.Map(location=[sample_node.lat, sample_node.lon], zoom_start=13)

for end_node_id , label in toloop:
    end_node = nodes_by_id.loc[end_node_id]

    folium.CircleMarker(
        location=[end_node.lat, end_node.lon],
        popup=f"End: {end_node_id}",
        color="red",
        radius=3,
    ).add_to(m)

    paths = path_manager.reconstruct_and_translate_path_for_label(
        label, translator_map
    )
    for i, path in enumerate(paths):
        if isinstance(path, Path):
            if path.path == []:
                continue
            cycling_path_nodes = [nodes_by_id.loc[int(node_id[1:])] for node_id in path.path if node_id[0] == "B"]
            walking_path_nodes = [nodes_by_id.loc[int(node_id[1:])] for node_id in path.path if node_id[0] == "W"]
            path_lat_lon = [(node.lat, node.lon) for node in cycling_path_nodes]
            previous_meta = paths[i-1].meta if i > 0 else None
            meta = format_meta(path.meta, previous_meta)
            if path_lat_lon != []:
                folium.PolyLine(path_lat_lon, color="blue", weight=2, popup=str(meta)).add_to(m)
            path_lat_lon = [(node.lat, node.lon) for node in walking_path_nodes]
            if path_lat_lon != []:
                folium.PolyLine(path_lat_lon, color="red", weight=2, popup=str(meta)).add_to(m)
        elif isinstance(path, GTFSPath):
            start_stop_id = path.start_stop_id
            end_stop_id = path.end_stop_id
            start_stop = stops_by_id.loc[start_stop_id]
            end_stop = stops_by_id.loc[end_stop_id]
            trip = path.trip_id
            if len(trip) >= 10:
                trip = trip[:10] + "..."
            
            previous_meta = paths[i-1].meta if i > 0 else None
            line_msg = f"Trip: {trip}\n---\n {format_meta(path.meta, previous_meta)}"

            path_lat_lon = [
                (float(start_stop.stop_lat), float(start_stop.stop_lon)),
                (float(end_stop.stop_lat), float(end_stop.stop_lon)),
            ]
            folium.PolyLine(
                path_lat_lon,
                color="green",
                weight=2,
                popup=line_msg,
            ).add_to(m)

            folium.CircleMarker(
                location=[float(start_stop.stop_lat), float(start_stop.stop_lon)],
                popup=f"Start: {start_stop.stop_name}",
                color="green",
                radius=3,
            ).add_to(m)
            folium.CircleMarker(
                location=[float(end_stop.stop_lat), float(end_stop.stop_lon)],
                popup=f"End: {end_stop.stop_name}",
                color="green",
                radius=3,
            ).add_to(m)
        else:
            raise Exception("Unknown path type")

m