In [None]:
import os
import requests as req
from tqdm import tqdm
import osmium as osm
import shapely.wkb as wkb
import shapely.geometry as geom
import pandas as pd
import geopandas as gpd
import pharmalink.code.area as area
from typing import List

# Ensure the cache directory exists
osm_cache_path = f"{os.getcwd()}/pharmalink/code/cache/osm/"

if not os.path.exists(osm_cache_path):
    os.makedirs(osm_cache_path, exist_ok=True)


def build_geofabrik_url(bundesland):
    name = bundesland.name

    # Translate umlauts to ascii
    if name == "Baden-Württemberg":
        name = "Baden-Wuerttemberg"
    elif name == "Thüringen":
        name = "Thueringen"

    name = name.lower()

    return f"https://download.geofabrik.de/europe/germany/{name}-latest.osm.pbf"


def get_bundesland_file(regkey):

    # Determine the Bundesland from the regkey
    bundesland = area.regkey_to_bundesland(regkey)
    bl_name = bundesland.name.lower()

    bl_file_name = f"{bl_name}-latest.osm.pbf"
    # Build the file path
    bl_file_path = f"{osm_cache_path}{bl_file_name}"

    # Download the file if it does not exist
    if not os.path.exists(bl_file_path):
        url = build_geofabrik_url(bundesland)

        print(f"Downloading {bl_file_name} file from download.geofabrik.de")

        response = req.get(url, stream=True)
        response.raise_for_status()

        # Get the total size of the file from headers
        total_size = int(response.headers.get("content-length", 0))

        with open(bl_file_path, "wb") as file:
            # Initialize the tqdm progress bar
            with tqdm(
                total=total_size,
                unit="B",
                unit_scale=True,
                desc=f"{bl_file_name}",
            ) as pbar:
                # Iterate over the response content in chunks
                for chunk in response.iter_content(chunk_size=1024):
                    # Filter out keep-alive chunks
                    if chunk:
                        # Write the chunk to the file
                        file.write(chunk)
                        # Update the progress bar with the size of the chunk
                        pbar.update(len(chunk))

    return bl_file_path

In [None]:
wü = area.Area("09663")
wü

In [None]:
# Read the OSM file
class RegKeyToOSMID(osm.SimpleHandler):

    def __init__(self, regkeys):
        osm.SimpleHandler.__init__(self)
        self.regkeys = regkeys
        self.osm_ids = []
        self.key = "de:regionalschluessel"

    def relation(self, r):
        # Get data from the relation with either the long or the short regkey
        if r.tags.get(self.key) in self.regkeys:
            self.osm_ids.append(r.id)


regkey_list = area.get_regkey_list()
areas = regkey_list.index.to_list()

# Build search list of regkeys
# Due to an oddity in osm "de:regionalschlüssel" tags, we need to search
# for both the long (12-digit) and short (5-digit) regkey.
rk_search_list = [area[:5] for area in areas] + areas

file = f"{osm_cache_path}germany-latest.osm.pbf"

osm_id_handler = RegKeyToOSMID(rk_search_list)
osm_id_handler.apply_file(
    file,
    filters=[
        osm.filter.EmptyTagFilter(),
        osm.filter.KeyFilter("de:regionalschluessel"),
    ],
)
osm_ids = osm_id_handler.osm_ids

In [None]:
class RelationGeometry(osm.SimpleHandler):

    def __init__(self, area_ids: int):
        super(RelationGeometry, self).__init__()

        self.area_ids = area_ids
        self.areas = []
        self.key = "de:regionalschluessel"

    def area(self, a):
        if a.tags.get(self.key):

            wkb_shape = wkbfab.create_multipolygon(a)
            shape = wkb.loads(wkb_shape, hex=True)

            area = {"rk": a.tags.get(self.key), "geometry": shape}

            self.areas.append(area)


regkey = area.Area("09663")

file = get_bundesland_file(regkey)

osm_id = 62464
area_id = osm_id * 2 + 1
area_ids = area_id


wkbfab = osm.geom.WKBFactory()
handler = RelationGeometry(area_id)

area_filter = osm.filter.KeyFilter("de:regionalschluessel", "name")


# Apply the handler to the OSM file and filter for areas
handler.apply_file(
    file,
    locations=True,
    idx="sparse_file_array,pharmalink/code/cache/osm/nodelocation",
    filters=[area_filter],
)

In [None]:
gdf = gpd.GeoDataFrame(handler.areas, crs="EPSG:4326")
gdf.to_crs(gdf.estimate_utm_crs(), inplace=True)
gdf.index = gdf["rk"]
gdf.drop(columns="rk", inplace=True)

In [None]:
# drop row with index "09" (Bayern)
gdf.drop("09", inplace=True)


gdf.sort_index()

In [None]:
def filter_areas(osmfile):
    for relation in (
        osm.FileProcessor(osmfile, osm.osm.ALL)
        # .with_locations()
        # .with_areas()
        # .with_filter(osm.filter.KeyFilter("de:regionalschluessel"))
    ):
        if relation.id == osm_id:
            tags = relation.tags
            return tags

In [None]:
wkbfactory = osm.geom.WKBFactory()


class AreaPrinter(osm.SimpleHandler):
    def __init__(self):
        osm.SimpleHandler.__init__(self)

    def area(self, a):

        print(a.tags)

In [None]:
handler = AreaPrinter(l)
handler.apply_file(file, locations=True, idx="flex_mem")

In [None]:
class WayHandler:

    def __init__(self, idx):
        self.idx = idx

    def way(self, w):
        for n in w.nodes:
            loc = idx.get(n.ref)  # note that cache is used here
        print("%d %s" % (w.id, len(w.nodes)))


reader = osm.io.Reader(file, osm.osm.osm_entity_bits.WAY)

idx = osm.index.create_map("sparse_file_array,pharmalink/code/cache/osm/nodelocation")
lh = osm.NodeLocationsForWays(idx)
# lh.ignore_errors()

osm.apply(reader, lh, WayHandler(idx))

reader.close()