In [1]:
%load_ext autoreload
%autoreload 2
# add . to module name
import sys
sys.path.append('../src/')

In [2]:
from package.logger import Timed, rlog, setup
from package import storage
setup("WARNING")

In [3]:
import os
import psutil
import time

import folium
import geopandas as gpd
import pandas as pd
from h3 import h3
from typing import List, Set

from package import strtime
from package.osm import osm
from package.mcr import mcr
from package.mcr.mcr import MCR
from package.mcr.data import MCRGeoData
from package.mcr5.h3 import (
    get_h3_cells_for_nodes,
    get_h3_cells_for_bbox,
    plot_h3_cells_on_folium,
)
from package.mcr5.h3_osm_interaction import get_location_mappings_for_cells
from package.mcr5.mcr5 import MCR5
from package.mcr5.osm import add_nearest_osm_node_id
from package.mcr5.labels import read_labels_for_nodes


In [4]:
city_id = "Koeln"
stops_path = "../data/cleaned/stops.csv"
osm_path = osm.get_osm_path_from_city_id(city_id)

with Timed.info("Reading stops"):
	other_stops_df = storage.read_gdf(stops_path)

if not os.path.exists(osm_path) and city_id:
	rlog.info("Downloading OSM data")
	osm.download_city(city_id, osm_path)
else:
	rlog.info("Using existing OSM data")

osm_reader = osm.new_osm_reader(osm_path)

with Timed.info("Getting OSM graph"):
	nodes, edges = osm.get_graph_for_city_cropped_to_stops(osm_reader, other_stops_df)

In [5]:
h3_cells = get_h3_cells_for_nodes(nodes[["lat", "lon"]].to_dict("records"), 9)
m = folium.Map(location=[50.9333, 6.95], zoom_start=12)
plot_h3_cells_on_folium(h3_cells, m)
m

In [6]:
# Initialize Folium Map centered around Cologne, Germany
m = folium.Map(location=[50.9375, 6.9603], zoom_start=12)


# Get unique H3 cells covering the OSM nodes at a given resolution (e.g., 9)
resolution = 9

bbox_cologne_center = [50.92, 6.94, 50.96, 6.98]
h3_cells = get_h3_cells_for_bbox(*bbox_cologne_center, resolution=9)

# Plot H3 cells on the Folium map
plot_h3_cells_on_folium(h3_cells, m)

# draw bbox
folium.Rectangle(
	bounds=[[bbox_cologne_center[0], bbox_cologne_center[1]], [bbox_cologne_center[2], bbox_cologne_center[3]]],
	color='red',
	fill=False,
).add_to(m)

# Show the map
m

In [7]:
h3_cells = get_h3_cells_for_nodes(nodes[["lat", "lon"]].to_dict("records"), 9)
# h3_cells = get_h3_cells_for_bbox(*bbox_cologne_center, resolution=9)

In [8]:
location_mappings, invalid_h3_cells = get_location_mappings_for_cells(list(h3_cells), nodes, 20)

In [9]:
len(location_mappings)

1315

In [10]:
m = folium.Map(location=[50.9375, 6.9603], zoom_start=12)
plot_h3_cells_on_folium(invalid_h3_cells, m)
m

In [11]:
m = folium.Map(location=[50.9375, 6.9603], zoom_start=12)
plot_h3_cells_on_folium(map(lambda lm: lm.h3_cell, location_mappings), m)
nodes_by_id = nodes.set_index("id")
for location_mapping in location_mappings:
	node = nodes_by_id.loc[location_mapping.osm_node_id]
	folium.CircleMarker(
		location=(node.lat, node.lon),
		icon=folium.Icon(color="green"),
		radius=1,
		color="red",
	).add_to(m)
m

In [12]:
stops = "../data/cleaned/stops.csv"
city_id = "Koeln"
structs = "../data/structs.pkl"

In [13]:
mcr_geo_data = MCRGeoData(
	stops, structs, city_id
)

In [14]:
mcr5 = MCR5(geo_data=mcr_geo_data, max_processes=12)

In [15]:
mcr5_output_path = "../data/mcr5/Koeln"

In [16]:
errors = mcr5.run(location_mappings, start_time="08:00:00", output_dir=mcr5_output_path)

Available memory: 9.55GiB | active: 12 | started: 46/1315           

thread '<unnamed>' panicked at 'called `Result::unwrap()` on an `Err` value: PyErr { type: <class 'TypeError'>, value: TypeError("'str' object cannot be interpreted as an integer"), traceback: None }', src/rs/mlc_adapter.rs:148:18
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Process Process-40:
Traceback (most recent call last):
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/moritz/dev/uni/mcr-py/notebooks/../src/package/mcr5/mcr5.py", line 116, in run_mcr
    mcr_runner.run(
  File "/home/moritz/dev/uni/mcr-py/notebooks/../src/package/mcr/mcr.py", line 142, in run
    walking_result_bags = mcr_py.run_mlc_with_bags(
pyo3_runtime.PanicException: called `Result::unwrap()` on an `Err` value: PyErr { type: <class 'Ty

Available memory: 4.50GiB | active: 12 | started: 1227/1315          

thread '<unnamed>' panicked at 'called `Result::unwrap()` on an `Err` value: PyErr { type: <class 'TypeError'>, value: TypeError("'str' object cannot be interpreted as an integer"), traceback: None }', src/rs/mlc_adapter.rs:148:18
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Process Process-1225:
Traceback (most recent call last):
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/moritz/dev/uni/mcr-py/notebooks/../src/package/mcr5/mcr5.py", line 116, in run_mcr
    mcr_runner.run(
  File "/home/moritz/dev/uni/mcr-py/notebooks/../src/package/mcr/mcr.py", line 142, in run
    walking_result_bags = mcr_py.run_mlc_with_bags(
pyo3_runtime.PanicException: called `Result::unwrap()` on an `Err` value: PyErr { type: <class '

Available memory: 4.54GiB | active: 12 | started: 1301/1315         

Process Process-1298:
thread '<unnamed>' panicked at 'called `Result::unwrap()` on an `Err` value: PyErr { type: <class 'TypeError'>, value: TypeError("'str' object cannot be interpreted as an integer"), traceback: None }', src/rs/mlc_adapter.rs:148:18
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Traceback (most recent call last):
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/moritz/dev/uni/mcr-py/notebooks/../src/package/mcr5/mcr5.py", line 116, in run_mcr
    mcr_runner.run(
  File "/home/moritz/dev/uni/mcr-py/notebooks/../src/package/mcr/mcr.py", line 142, in run
    walking_result_bags = mcr_py.run_mlc_with_bags(
pyo3_runtime.PanicException: called `Result::unwrap()` on an `Err` value: PyErr { type: <class '

All processes finished.                                  15        


In [17]:
import pickle
errors_path = "../data/mcr5/Koeln/errors.pkl"
with open(errors_path, "wb") as f:
	pickle.dump(errors, f)


[autoreload of mcr_py failed: Traceback (most recent call last):
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 475, in superreload
    module = reload(module)
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 619, in _exec
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/home/moritz/miniconda3/envs/mcr-py/lib/python3.10/site-packages/mcr_py/__init__.py", line 3, in <module>
    __doc__ = mcr_py.__doc__
NameError: name 'mcr_py' is not defined
]


In [18]:
erroneous_osm_node_ids = set(map(lambda e: e[0]["osm_node_id"], errors))
erroneous_osm_node_ids

set()

In [19]:
import overpy

api = overpy.Overpass()

# Define bounding box for Cologne: [south,west,north,east]
bounding_box = "(50.8700,6.8000,51.0500,7.0500)"

# Overpass QL query for supermarkets in the bounding box
overpass_query = f"""
[out:json];
(
    node["shop"="supermarket"]{bounding_box};
    way["shop"="supermarket"]{bounding_box};
    relation["shop"="supermarket"]{bounding_box};
);
out center;
"""

result = api.query(overpass_query)

In [20]:
markets = []
for obj in result.nodes + result.ways + result.relations:
	market = {
		"name": obj.tags.get("name"),
		"id": obj.id,
	}
	if isinstance(obj, overpy.Node):
		market["lat"] = obj.lat
		market["lon"] = obj.lon
	elif isinstance(obj, overpy.Way):
		market["lat"] = obj.center_lat
		market["lon"] = obj.center_lon
	elif isinstance(obj, overpy.Relation):
		market["lat"] = obj.center_lat
		market["lon"] = obj.center_lon
	else:
		raise ValueError(f"Unknown type: {type(obj)}")
	markets.append(market)
		
markets = pd.DataFrame(markets)
markets.head(3)

Unnamed: 0,name,id,lat,lon
0,Netto Marken-Discount,55441040,50.9408725,7.0081399
1,Kaufland,102991868,50.9463208,6.9218135
2,Netto Marken-Discount,232289350,50.9585924,6.9497996


In [21]:
area_of_interest = nodes.geometry.unary_union.convex_hull
markets = gpd.GeoDataFrame(markets, geometry=gpd.points_from_xy(markets.lon, markets.lat))
# only keep markets within area of interest
markets = markets[markets.within(area_of_interest)]
markets

Unnamed: 0,name,id,lat,lon,geometry
0,Netto Marken-Discount,55441040,50.9408725,7.0081399,POINT (7.00814 50.94087)
1,Kaufland,102991868,50.9463208,6.9218135,POINT (6.92181 50.94632)
2,Netto Marken-Discount,232289350,50.9585924,6.9497996,POINT (6.94980 50.95859)
3,Netto City,242515981,50.9255261,6.9581475,POINT (6.95815 50.92553)
4,REWE,243924635,50.9547763,6.9168738,POINT (6.91687 50.95478)
...,...,...,...,...,...
369,Lidl,798140989,50.9505915,7.0005266,POINT (7.00053 50.95059)
370,REWE Center,816551392,50.9854625,6.9456900,POINT (6.94569 50.98546)
371,ALDI Süd,868619074,50.8987225,6.8862791,POINT (6.88628 50.89872)
372,Edeka Engels,901623455,50.9974473,6.9139239,POINT (6.91392 50.99745)


In [22]:
m = folium.Map(location=[50.9375, 6.9603], zoom_start=12)
for _, market in markets.iterrows():
	folium.CircleMarker(
		location=(market.lat, market.lon),
		popup=market["name"],
		radius=1,
		color="red",
	).add_to(m)
m


In [25]:
markets = add_nearest_osm_node_id(markets, nodes)
markets["count"] = 1
markets

Unnamed: 0,name,id,lat,lon,geometry,nearest_osm_node_id,distance,count
0,Netto Marken-Discount,55441040,50.9408725,7.0081399,POINT (7.00814 50.94087),2725476496,6.737762,1
1,Kaufland,102991868,50.9463208,6.9218135,POINT (6.92181 50.94632),1246265182,9.802642,1
2,Netto Marken-Discount,232289350,50.9585924,6.9497996,POINT (6.94980 50.95859),1680834812,19.672917,1
3,Netto City,242515981,50.9255261,6.9581475,POINT (6.95815 50.92553),3890976981,22.818948,1
4,REWE,243924635,50.9547763,6.9168738,POINT (6.91687 50.95478),6908409244,30.201176,1
...,...,...,...,...,...,...,...,...
369,Lidl,798140989,50.9505915,7.0005266,POINT (7.00053 50.95059),7464862015,27.213849,1
370,REWE Center,816551392,50.9854625,6.9456900,POINT (6.94569 50.98546),11137319905,26.592059,1
371,ALDI Süd,868619074,50.8987225,6.8862791,POINT (6.88628 50.89872),8096600395,41.214910,1
372,Edeka Engels,901623455,50.9974473,6.9139239,POINT (6.91392 50.99745),1684392657,69.915888,1


In [27]:
labels = read_labels_for_nodes(mcr5_output_path, markets.nearest_osm_node_id.unique())
labels.head(3)

Unnamed: 0,target_id_osm,time,cost,n_transfers,human_readable_time,start_id_hex
562,2973666571,33907,0,0,09:25:07,891fa1983dbffff
1244,506371548,34186,0,0,09:29:46,891fa1983dbffff
1321,2282235758,32874,0,0,09:07:54,891fa1983dbffff


In [28]:
labels = labels.merge(
    markets[["count", "nearest_osm_node_id"]],
    left_on="target_id_osm",
    right_on="nearest_osm_node_id",
)
labels.head(3)

Unnamed: 0,target_id_osm,time,cost,n_transfers,human_readable_time,start_id_hex,count,nearest_osm_node_id
0,2973666571,33907,0,0,09:25:07,891fa1983dbffff,1,2973666571
1,2973666571,30902,200,1,08:35:02,891fa1983dbffff,1,2973666571
2,2973666571,32184,100,1,08:56:24,891fa1983dbffff,1,2973666571


In [29]:
max_transfers = 1
max_time = strtime.str_time_to_seconds("08:15:00")
max_cost = 0

In [30]:
selected_labels = labels[
	(labels["time"] <= max_time)
	& (labels["n_transfers"] <= max_transfers)
	& (labels["cost"] <= max_cost)
]

In [31]:
reachable = selected_labels.groupby("start_id_hex")["count"].sum()

In [32]:
reachable.max()

62

In [33]:
m = folium.Map(location=[50.9375, 6.9603], zoom_start=12)
plot_h3_cells_on_folium(reachable.to_dict(), m)
# for location_mapping in location_mappings:
# 	node = nodes_by_id.loc[location_mapping.osm_node_id]
# 	folium.CircleMarker(
# 		location=(node.lat, node.lon),
# 		icon=folium.Icon(color="green"),
# 		radius=1,
# 		color="red",
# 	).add_to(m)
m