In [1]:
%load_ext autoreload
%autoreload 2
# add . to module name
import sys
sys.path.append('../src/')

In [2]:
from package.logger import Timed, rlog, setup
from package import storage
setup("INFO")

In [3]:
from package.osm import osm
import os

In [4]:
city_id = "Koeln"
stops_path = "../data/cleaned/stops.csv"
osm_path = osm.get_osm_path_from_city_id(city_id)

with Timed.info("Reading stops"):
	other_stops_df = storage.read_gdf(stops_path)

if not os.path.exists(osm_path) and city_id:
	rlog.info("Downloading OSM data")
	osm.download_city(city_id, osm_path)
else:
	rlog.info("Using existing OSM data")

osm_reader = osm.new_osm_reader(osm_path)

with Timed.info("Getting OSM graph"):
	nodes, edges = osm.get_graph_for_city_cropped_to_stops(osm_reader, other_stops_df)

In [5]:
nodes

Unnamed: 0,lon,lat,tags,timestamp,version,changeset,id,geometry
0,6.932525,50.937519,"{'TMC:cid_58:tabcd_1:Class': None, 'TMC:cid_58...",0,0,0,21063145,POINT (6.93252 50.93752)
1,6.932627,50.937519,,0,0,0,7151289920,POINT (6.93263 50.93752)
2,6.932735,50.937519,"{'TMC:cid_58:tabcd_1:Class': None, 'TMC:cid_58...",0,0,0,10929975,POINT (6.93274 50.93752)
3,6.934627,50.937499,,0,0,0,10853912,POINT (6.93463 50.93750)
6,6.938906,50.936414,,0,0,0,367149,POINT (6.93891 50.93641)
...,...,...,...,...,...,...,...,...
1215735,6.932704,50.962141,,0,0,0,11105976044,POINT (6.93270 50.96214)
1215736,6.932744,50.962156,"{'TMC:cid_58:tabcd_1:Class': None, 'TMC:cid_58...",0,0,0,11105976043,POINT (6.93274 50.96216)
1215737,6.931031,50.959016,,0,0,0,11105943462,POINT (6.93103 50.95902)
1215738,6.930911,50.959047,,0,0,0,11105943463,POINT (6.93091 50.95905)


In [6]:
from h3 import h3
from typing import List, Set

def get_h3_cells_for_nodes(nodes: List[dict], resolution: int) -> Set[str]:
    h3_cells: Set[str] = set()
    
    for node in nodes:
        lat, lon = node['lat'], node['lon']
        h3_cell = h3.geo_to_h3(lat, lon, resolution)
        h3_cells.add(h3_cell)
    
    return h3_cells

In [7]:


# Example usage:
# OSM nodes represented as dictionaries with lat and lon keys
osm_nodes = nodes[['lon', 'lat']].to_dict("records")

# H3 resolution level (0 to 15)
resolution = 9

# Get unique H3 cells covering the OSM nodes at the given resolution
h3_cells: Set[str] = get_h3_cells_for_nodes(osm_nodes, resolution)
print(h3_cells)

{'891fa1999abffff', '891fa1886d7ffff', '891fa198a13ffff', '891fa1d69abffff', '891fa18a277ffff', '891fa18b407ffff', '891fa18b15bffff', '891fa199db7ffff', '891fa1d69bbffff', '891fa18b153ffff', '891fa19881bffff', '891fa1980d7ffff', '891fa199b1bffff', '891fa199987ffff', '891fa198b8bffff', '891fa1988d3ffff', '891fa198057ffff', '891fa198aa3ffff', '891fa199c07ffff', '891fa1d692fffff', '891fa198eafffff', '891fa199a2fffff', '891fa199b6fffff', '891fa18a45bffff', '891fa198adbffff', '891fa18b46bffff', '891fa18b4d7ffff', '891fa18a653ffff', '891fa198ab7ffff', '891fa18a053ffff', '891fa199adbffff', '891fa18a6a3ffff', '891fa199913ffff', '891fa198ea3ffff', '891fa199dd3ffff', '891fa199eabffff', '891fa1d694bffff', '891fa18b183ffff', '891fa198a03ffff', '891fa198a77ffff', '891fa199b97ffff', '891fa199a93ffff', '891fa18b533ffff', '891fa198e7bffff', '891fa19828fffff', '891fa18b487ffff', '891fa18a42bffff', '891fa199b2fffff', '891fa18a497ffff', '891fa199c4bffff', '891fa1d6b93ffff', '891fa199877ffff', '891fa19884

In [8]:
from h3 import h3
import folium
from typing import List, Set


def get_h3_cells_for_nodes(nodes: List[dict], resolution: int) -> Set[str]:
    h3_cells: Set[str] = set()

    for node in nodes:
        lat, lon = node["lat"], node["lon"]
        h3_cell = h3.geo_to_h3(lat, lon, resolution)
        h3_cells.add(h3_cell)

    return h3_cells


def plot_h3_cells_on_folium(h3_cells: Set[str], folium_map: folium.Map) -> None:
    for h3_cell in h3_cells:
        geo_boundary = list(h3.h3_to_geo_boundary(h3_cell))
        # Close the loop by appending the first coordinates at the end of the list
        geo_boundary.append(geo_boundary[0])
        folium.Polygon(
            locations=geo_boundary,
            color="blue",
            weight=2.5,
            opacity=1,
            fill_color="blue",
            fill_opacity=0.2,
        ).add_to(folium_map)


In [9]:
from h3 import h3
from typing import List, Set

def get_h3_cells_for_bbox(min_lat: float, min_lon: float, max_lat: float, max_lon: float, resolution: int) -> Set[str]:
    h3_cells: Set[str] = set()

    # Get the edge length in meters for the H3 resolution
    edge_length_m = h3.edge_length(resolution, unit='m')
    
    # Approximate step size in degrees, assuming 111,111 meters per degree
    # This is a rough approximation and works best near the equator. 
    # The closer to the poles you get, the less accurate this becomes.
    step_size = edge_length_m / 111111.0


    lat = min_lat
    while lat <= max_lat:
        lon = min_lon
        while lon <= max_lon:
            h3_cell = h3.geo_to_h3(lat, lon, resolution)
            h3_cells.add(h3_cell)
            lon += step_size
        lat += step_size

    return h3_cells

In [10]:
# Initialize Folium Map centered around Cologne, Germany
m = folium.Map(location=[50.9375, 6.9603], zoom_start=12)


# Get unique H3 cells covering the OSM nodes at a given resolution (e.g., 9)
resolution = 9

bbox_cologne_center = [50.92, 6.94, 50.96, 6.98]
h3_cells = get_h3_cells_for_bbox(*bbox_cologne_center, resolution=9)

# Plot H3 cells on the Folium map
plot_h3_cells_on_folium(h3_cells, m)

# draw bbox
folium.Rectangle(
	bounds=[[bbox_cologne_center[0], bbox_cologne_center[1]], [bbox_cologne_center[2], bbox_cologne_center[3]]],
	color='red',
	fill=False,
).add_to(m)

# Show the map
m

In [11]:
from h3 import h3
import pandas as pd
from geopy.distance import great_circle
from typing import List, Dict, Union

def get_closest_osm_nodes_to_h3_cells(h3_cells: List[str], osm_nodes_df: pd.DataFrame) -> Dict[str, Union[None, Dict[str, float]]]:
    closest_nodes: Dict[str, Union[None, Dict[str, float]]] = {}
    
    for h3_cell in h3_cells:
        # Get the center of the H3 cell
        cell_center = h3.h3_to_geo(h3_cell)
        
        # Initialize minimum distance to a very large number
        min_distance = float("inf")
        
        # Initialize closest_node to None
        closest_node = None

        for index, row in osm_nodes_df.iterrows():
            node_coords = (row['lat'], row['lon'])
            distance = great_circle(cell_center, node_coords).meters
            
            if distance < min_distance:
                min_distance = distance
                closest_node = {'lat': row['lat'], 'lon': row['lon'], 'distance': min_distance}

        closest_nodes[h3_cell] = closest_node

    return closest_nodes

# Example usage
# Create a DataFrame with OSM nodes
osm_nodes_df = pd.DataFrame([
    {'lat': 50.9375, 'lon': 6.9603},
    {'lat': 50.9475, 'lon': 6.9203},
    {'lat': 50.9275, 'lon': 6.9503},
])

# Example H3 cells
h3_cells_example = [
    h3.geo_to_h3(50.9375, 6.9603, 9),
    h3.geo_to_h3(50.9475, 6.9203, 9),
]

# Get closest OSM nodes to the H3 cells
result = get_closest_osm_nodes_to_h3_cells(h3_cells_example, osm_nodes_df)

print(result)


{'891fa199947ffff': {'lat': 50.9375, 'lon': 6.9603, 'distance': 127.64463828134565}, '891fa199c67ffff': {'lat': 50.9475, 'lon': 6.9203, 'distance': 99.81102205862538}}


In [15]:
get_closest_osm_nodes_to_h3_cells(list(h3_cells)[:2], nodes)

{'891fa1998afffff': {'lat': 50.9416976,
  'lon': 6.9423235,
  'distance': 27.178323088836716},
 '891fa199823ffff': {'lat': 50.9368981,
  'lon': 6.9434947,
  'distance': 5.013215873116629}}

In [44]:
from scipy.spatial import cKDTree
import numpy as np
import pandas as pd
from h3 import h3
from typing import List, Dict, Union

def get_closest_osm_nodes_to_h3_cells(h3_cells: List[str], osm_nodes_df: pd.DataFrame) -> Dict[str, Union[None, Dict[str, float]]]:
    closest_nodes: Dict[str, Union[None, Dict[str, float]]] = {}

    # Prepare a numpy array from osm_nodes_df for fast k-d tree query
    osm_nodes_array = osm_nodes_df[['lat', 'lon']].to_numpy()
    kdtree = cKDTree(osm_nodes_array)
    
    for h3_cell in h3_cells:
        # Get the center of the H3 cell
        cell_center = h3.h3_to_geo(h3_cell)
        
        # Query the k-d tree for the closest point
        distance, index = kdtree.query(np.array([cell_center]))

        node = osm_nodes_df.iloc[index].iloc[0]
        closest_node = {
            'lat': node.lat,
            'lon': node.lon,
            'distance': distance[0] * 111111,  # Convert to meters approximately
            'osm_node_id': node.id,
        }

        closest_nodes[h3_cell] = closest_node

    return closest_nodes

In [45]:
res = get_closest_osm_nodes_to_h3_cells(h3_cells, nodes)