In [68]:
%load_ext autoreload
%autoreload 2
# add . to module name
import sys
sys.path.append('../src/')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [69]:
from package.logger import Timed, rlog, setup
setup("INFO")


from package import storage
import pandas as pd
from package import strtime
import folium
import os
from package.osm import osm
from package.geometa import GeoMeta

In [70]:
city_id = "Koeln"
stops_path = "../data/cleaned/stops.csv"
osm_path = osm.get_osm_path_from_city_id(city_id)


geo_meta = GeoMeta.load("../data/geometa.pkl")

with Timed.info("Reading stops"):
	other_stops_df = storage.read_gdf(stops_path)

if not os.path.exists(osm_path) and city_id:
	rlog.info("Downloading OSM data")
	osm.download_city(city_id, osm_path)
else:
	rlog.info("Using existing OSM data")

osm_reader = osm.new_osm_reader(osm_path)

with Timed.info("Getting OSM graph"):
	nodes, edges = osm.get_graph_for_city_cropped_to_boundary(osm_reader, geo_meta, "walking")

In [71]:
path_source = "/home/moritz/dev/uni/mcr-py/data/bags.pkl"

data = storage.read_any_dict(path_source)
path_manager = data["path_manager"]
# node_map = data["multi_modal_node_to_resetted_map"]
# walking_node_map = data["walking_node_to_resetted_map"]
# reverse_node_map = {v: k for k, v in node_map.items()}
# reverse_walking_node_map = {v: k for k, v in walking_node_map.items()}
# stops_df = data["stops_df"]
bags_i = data["bags_i"]

In [72]:
translation_data = storage.read_any_dict("../data/car_step_translations.pkl")
reverse_node_map = translation_data["resetted_to_multi_modal_node_map"]
node_map = translation_data["multi_modal_node_to_resetted_map"]

In [73]:
labels = pd.DataFrame(
    [
        (label.node_id, label.values[0], label.values[1], n_transfers, label)
        for n_transfers, bags in bags_i.items()
        for bag in bags.values()
        for label in bag
    ],
    columns=["osm_node_id", "time", "cost", "n_transfers", "label"],
)

labels["human_readable_time"] = labels["time"].apply(strtime.seconds_to_str_time)
labels = labels.sort_values("n_transfers")
labels = labels.drop_duplicates(subset=["osm_node_id", "time", "cost"], keep="first")
labels.drop(columns=["label"])

Unnamed: 0,osm_node_id,time,cost,n_transfers,human_readable_time
0,2096386216,28866,38,0,08:01:06
3279,1678230196,28867,38,0,08:01:07
3278,8242778059,28866,38,0,08:01:06
3277,274347462,28845,19,0,08:00:45
3276,7832023483,28867,38,0,08:01:07
...,...,...,...,...,...
1635,8476967924,28870,19,0,08:01:10
1634,631827970,28833,19,0,08:00:33
1633,8254540759,28861,38,0,08:01:01
1639,268875217,28885,19,0,08:01:25


In [74]:
labels[labels["osm_node_id"] == 9124010785]

Unnamed: 0,osm_node_id,time,cost,n_transfers,label,human_readable_time
3836,9124010785,28842,19,0,"IntermediateLabel(values=[28842, 19], hidden_v...",08:00:42
3835,9124010785,29147,0,0,"IntermediateLabel(values=[29147, 0], hidden_va...",08:05:47


In [75]:

labels[labels["osm_node_id"] == 9124010785]

Unnamed: 0,osm_node_id,time,cost,n_transfers,label,human_readable_time
3836,9124010785,28842,19,0,"IntermediateLabel(values=[28842, 19], hidden_v...",08:00:42
3835,9124010785,29147,0,0,"IntermediateLabel(values=[29147, 0], hidden_va...",08:05:47


597	9124010785	29170	0	0	IntermediateLabel(values=[29170, 0], hidden_va...	08:06:10
2684	9124010785	29039	100	1	IntermediateLabel(values=[29039, 100], hidden_...	08:03:59

In [76]:
from package.minute_city import minute_city

In [77]:
pois = minute_city.fetch_pois_for_area(geo_meta.boundary, nodes)

In [78]:
labels["target_id_osm"] = labels["osm_node_id"]
labels["start_id_hex"] = "aaa"

In [79]:
poi_labels = minute_city.add_pois_to_labels(labels, pois)

In [80]:
# types = ["grocery", "education", "health", "banks", "parks", "sustenance", "shops"]
types = list(pois["type"].unique())

In [81]:
profiles_df = minute_city.get_profiles_df(poi_labels, types)

  0%|          | 0/1 [00:00<?, ?it/s]

In [82]:
profiles_df

Unnamed: 0_level_0,cost_0,cost_19,cost_38,any_column_different,required_cost_for_optimal
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
aaa,476,176,176,True,19


In [83]:
max_time = strtime.str_time_to_seconds("09:00:00")
max_cost = 1000
max_transfers = 10

valid_labels = labels[
	(labels["time"] <= max_time)
	& (labels["cost"] <= max_cost)
	& (labels["n_transfers"] <= max_transfers)
]

In [84]:
# selection = valid_labels.sample(min(1000, len(valid_labels)))

max_cost = valid_labels.cost.max()

from typing import Dict
from colorsys import hls_to_rgb

def hsl_to_hex(h: float, l: float, s: float) -> str:
    r, g, b = [int(x * 255.0) for x in hls_to_rgb(h, l, s)]
    return "#{:02x}{:02x}{:02x}".format(r, g, b)

def generate_colors_hsl(max_cost: float) -> Dict[int, str]:
    if max_cost == 0:
        return {0: "#ffcccc"}
    return {
        cost: hsl_to_hex(h=0, l=0.5 + 0.4 * (max_cost - cost) / max_cost, s=0.8)
        for cost in range(int(max_cost) + 1)
    }

colors = generate_colors_hsl(max_cost)

In [85]:
start_node_id = labels[labels.time == labels.time.min()].iloc[0].osm_node_id

In [86]:
nodes_by_id = nodes.set_index("id")
nodes_by_id["id"] = nodes_by_id.index

start_node = nodes_by_id.loc[start_node_id]

m = folium.Map(location=[start_node.lat, start_node.lon], zoom_start=13)

# for label in selection.itertuples():
for label in labels.itertuples():
	node = nodes_by_id.loc[label.osm_node_id]
	folium.CircleMarker(
		location=[node.lat, node.lon],
		radius=3,
		weight=1,
		fill=True,
		fill_color=colors[label.cost],
		fill_opacity=1,
		color="black",
		popup=f"{label.human_readable_time} ({label.cost})",
	).add_to(m)

folium.CircleMarker(
	location=[start_node.lat, start_node.lon],
	radius=5,
	weight=1,
	fill=True,
	fill_color="green",
	fill_opacity=1,
	color="black",
	popup=f"Start",
).add_to(m)

m

In [87]:
from package.mcr.path import Path, GTFSPath, PathType

translator_map = {
    # PathType.WALKING: reverse_walking_node_map,
    # PathType.CYCLING_WALKING: reverse_node_map,
    PathType.DRIVING_WALKING: reverse_node_map,
    # PathType.PUBLIC_TRANSPORT: None,
}
no_prefix_reverse_node_map = {
    k: int(v[1:]) for k, v in reverse_node_map.items()
}
no_prefix_reverse_node_map = {k: int(v[1:])  for k, v in reverse_node_map.items()}
no_prefix_translator_map = {
    PathType.DRIVING_WALKING: no_prefix_reverse_node_map,
    PathType.PUBLIC_TRANSPORT: None,
}

In [88]:
result_bags = bags_i[1]

In [89]:
walking_result_bags_flat = []
for node_id, bag in result_bags.items():
	for label in bag:
		walking_result_bags_flat.append((node_id, label))

In [90]:
path_objs_with_ids = pd.Series(
    list(
        map(
            lambda x: (
                x[0],
                path_manager.reconstruct_and_translate_path_for_label(
                    x[1], no_prefix_translator_map
                ),
            ),
            walking_result_bags_flat[:10000],
        )
    )
)
node_ids, path_objs = list(zip(*path_objs_with_ids))
path_objs[:3]

([Path(path_type=PathType.DRIVING_WALKING, path=[394001227, 2427289539, 3141997559, 394001505, 626052090, 734098393, 2427289540, 5077989070, 3190145350, 1679917735, 1469714738, 5077989067, 1679917699, 626030717, 3149443561, 4237474683, 4237474660, 633165345, 4237474623, 1679917656, 2269291621, 8805331143, 269596443, 280128480, 2116481375, 280128714, 1677325913, 1736381481, 1736381482, 12248428, 1677325884, 256189615, 5973982932, 9674430882, 8446992644, 269593733, 1401277062, 3516740988, 256189614, 3516740989, 1678230179, 1401277071, 1401277069, 4237506071, 4237506070, 1401277064, 4237506072, 4237506073, 4237506074, 1401277072, 8544472662, 4237506077, 8544472664, 4237506079, 8544472666, 1401277074, 1401277078, 2020104626, 269593739, 256697445, 2020205017, 256201556, 1402536658, 1678156994, 302932303, 2096386222, 256697209, 2096386224, 256697580, 256697261, 2036832576, 256542202, 2096386219, 2096386218, 2096386216], meta={'values': [28866, 38], 'hidden_values': [0, 0]})],
 [Path(path_typ

In [91]:
path_objs[0]

[Path(path_type=PathType.DRIVING_WALKING, path=[394001227, 2427289539, 3141997559, 394001505, 626052090, 734098393, 2427289540, 5077989070, 3190145350, 1679917735, 1469714738, 5077989067, 1679917699, 626030717, 3149443561, 4237474683, 4237474660, 633165345, 4237474623, 1679917656, 2269291621, 8805331143, 269596443, 280128480, 2116481375, 280128714, 1677325913, 1736381481, 1736381482, 12248428, 1677325884, 256189615, 5973982932, 9674430882, 8446992644, 269593733, 1401277062, 3516740988, 256189614, 3516740989, 1678230179, 1401277071, 1401277069, 4237506071, 4237506070, 1401277064, 4237506072, 4237506073, 4237506074, 1401277072, 8544472662, 4237506077, 8544472664, 4237506079, 8544472666, 1401277074, 1401277078, 2020104626, 269593739, 256697445, 2020205017, 256201556, 1402536658, 1678156994, 302932303, 2096386222, 256697209, 2096386224, 256697580, 256697261, 2036832576, 256542202, 2096386219, 2096386218, 2096386216], meta={'values': [28866, 38], 'hidden_values': [0, 0]})]

In [92]:
reverse_node_map[35781]

'D394001227'

In [93]:
labels.drop(columns=["label"])

Unnamed: 0,osm_node_id,time,cost,n_transfers,human_readable_time,target_id_osm,start_id_hex
0,2096386216,28866,38,0,08:01:06,2096386216,aaa
3279,1678230196,28867,38,0,08:01:07,1678230196,aaa
3278,8242778059,28866,38,0,08:01:06,8242778059,aaa
3277,274347462,28845,19,0,08:00:45,274347462,aaa
3276,7832023483,28867,38,0,08:01:07,7832023483,aaa
...,...,...,...,...,...,...,...
1635,8476967924,28870,19,0,08:01:10,8476967924,aaa
1634,631827970,28833,19,0,08:00:33,631827970,aaa
1633,8254540759,28861,38,0,08:01:01,8254540759,aaa
1639,268875217,28885,19,0,08:01:25,268875217,aaa


In [94]:
import random
i = random.sample(list(labels.osm_node_id.unique()), 1)[0]
# i = 3922625314
print(i)
selection = labels[labels.osm_node_id == i]
selection.drop(columns=["label"])

11036308838


Unnamed: 0,osm_node_id,time,cost,n_transfers,human_readable_time,target_id_osm,start_id_hex
3559,11036308838,29138,0,0,08:05:38,11036308838,aaa
3558,11036308838,28870,19,0,08:01:10,11036308838,aaa


In [95]:
from folium import plugins
from folium.plugins import HeatMap

In [96]:
start_time = strtime.str_time_to_seconds("08:00:00")

In [97]:
def format_meta(meta, previous_meta):
	values = meta["values"]
	arrival_time = values[0]
	cost = values[1]

	if previous_meta:
		previous_values = previous_meta["values"]
		previous_arrival_time = previous_values[0]
		previous_cost = previous_values[1]
		
		arrival_time -= previous_arrival_time
		cost -= previous_cost
	else:
		arrival_time -= start_time

	return f"{strtime.seconds_to_str_time(arrival_time)} ({cost})"

In [109]:
toloop = selection

# stops_by_id = stops_df.set_index("stop_id")
sample_label = selection.iloc[0]
sample_node_id = sample_label.osm_node_id
nodes_by_id = nodes.set_index("id")
nodes_by_id["id"] = nodes_by_id.index
sample_node = nodes_by_id.loc[sample_node_id]

m = folium.Map(location=[sample_node.lat, sample_node.lon], zoom_start=13)

for row in toloop.itertuples():
    label = row.label
    end_node_id = row.osm_node_id
    end_node = nodes_by_id.loc[end_node_id]

    folium.CircleMarker(
        location=[end_node.lat, end_node.lon],
        popup=f"End: {end_node_id}",
        color="red",
        radius=3,
    ).add_to(m)

    paths = path_manager.reconstruct_and_translate_path_for_label(label, translator_map)
    for i, path in enumerate(paths):
        if isinstance(path, Path):
            if path.path == []:
                continue
            cycling_path_nodes = [
                nodes_by_id.loc[int(node_id[1:])]
                for node_id in path.path
                if node_id[0] == "D"
            ]
            walking_path_nodes = [
                nodes_by_id.loc[int(node_id[1:])]
                for node_id in path.path
                if node_id[0] == "W"
            ]
            path_lat_lon = [(node.lat, node.lon) for node in cycling_path_nodes]
            previous_meta = paths[i - 1].meta if i > 0 else None
            meta = format_meta(path.meta, previous_meta)
            if path_lat_lon != []:
                folium.PolyLine(
                    path_lat_lon, color="blue", weight=2, popup=str(meta)
                ).add_to(m)
            path_lat_lon = [(node.lat, node.lon) for node in walking_path_nodes]
            if path_lat_lon != []:
                folium.PolyLine(
                    path_lat_lon, color="red", weight=2, popup=str(meta)
                ).add_to(m)
        elif isinstance(path, GTFSPath):
            raise Exception("GTFSPath not supported")
            start_stop_id = path.start_stop_id
            end_stop_id = path.end_stop_id
            start_stop = stops_by_id.loc[start_stop_id]
            end_stop = stops_by_id.loc[end_stop_id]
            trip = path.trip_id
            if len(trip) >= 10:
                trip = trip[:10] + "..."

            previous_meta = paths[i - 1].meta if i > 0 else None
            line_msg = f"Trip: {trip}\n---\n {format_meta(path.meta, previous_meta)}"

            path_lat_lon = [
                (float(start_stop.stop_lat), float(start_stop.stop_lon)),
                (float(end_stop.stop_lat), float(end_stop.stop_lon)),
            ]
            folium.PolyLine(
                path_lat_lon,
                color="green",
                weight=2,
                popup=line_msg,
            ).add_to(m)

            folium.CircleMarker(
                location=[float(start_stop.stop_lat), float(start_stop.stop_lon)],
                popup=f"Start: {start_stop.stop_name}",
                color="green",
                radius=3,
            ).add_to(m)
            folium.CircleMarker(
                location=[float(end_stop.stop_lat), float(end_stop.stop_lon)],
                popup=f"End: {end_stop.stop_name}",
                color="green",
                radius=3,
            ).add_to(m)
        else:
            raise Exception("Unknown path type")

m


In [None]:
reverse[394001227]

35781

In [None]:
no_prefix_translator_map
reverse = {v: k for k, v in no_prefix_translator_map[PathType.DRIVING_WALKING].items()}
reverse

{21063145: 34508,
 7151289920: 34509,
 10929975: 34510,
 10853912: 34511,
 10990037: 4,
 68732822: 5,
 1672368051: 6,
 1672368053: 7,
 1672380621: 8,
 10989051: 9,
 2276506373: 34512,
 2276506377: 34513,
 313720425: 34514,
 2276506375: 34515,
 2234629647: 14,
 9719444415: 15,
 2603599693: 16,
 368868: 17,
 359972: 18,
 408755657: 19,
 33481155: 20,
 2107675109: 21,
 2107675123: 34516,
 359970: 34517,
 1431777755: 24,
 15341577: 34518,
 15341603: 34519,
 3500842867: 34520,
 10989169: 34525,
 25294726: 34526,
 1625403091: 34527,
 449266: 34528,
 804434762: 34529,
 1672324096: 34530,
 449270: 34531,
 8807429618: 34532,
 449272: 34533,
 805802896: 34534,
 804434772: 34535,
 449256: 34536,
 2340965976: 34537,
 8807204612: 34538,
 458606823: 34539,
 14601871: 34540,
 359927: 34541,
 1625403078: 34542,
 1003841477: 34543,
 1003841492: 34544,
 25294727: 34545,
 2277589205: 34546,
 9831414967: 50,
 2715815513: 51,
 2234647793: 52,
 1584118516: 53,
 3286717395: 54,
 2603602518: 55,
 2159925917: 