In [4]:
import os
import requests
import subprocess

def download_geofabrik_pbf(years, base_url="https://download.geofabrik.de/europe/"):
    folder = "osm_geofabrik_pbf"
    os.makedirs(folder, exist_ok=True)
    
    for year in years:
        filename = f"germany-{year}0101.osm.pbf"
        file_path = os.path.join(folder, filename)
        file_url = base_url + filename
        
        if os.path.exists(file_path):
            print(f"File already exists: {file_path}, skipping download.")
        else:
            print(f"Downloading: {file_url}")
            response = requests.get(file_url, stream=True)
            if response.status_code == 200:
                with open(file_path, "wb") as f:
                    for chunk in response.iter_content(chunk_size=1024):
                        f.write(chunk)
                print(f"Downloaded: {file_path}")
            else:
                print(f"Failed to download {file_url} (Status code: {response.status_code})")

def run_osmium(year):
    try:
        folder = "processed_osm_files"
        os.makedirs(folder, exist_ok=True)
        
        input_pbf = os.path.join("osm_geofabrik_pbf", f"germany-{year}0101.osm.pbf")
        filtered_pbf = os.path.join(folder, f"processed_roads_germany_{year}.pbf")

        if os.path.exists(filtered_pbf):
            print(f"Processed file already exists: {filtered_pbf}, skipping processing.")
            return

        filter_command = [
            "osmium", "tags-filter",
            input_pbf,
            "w/highway=primary,primary_link,secondary,secondary_link,tertiary,tertiary_link",
            "-o", filtered_pbf
        ]
        print("🔹 Running: ", " ".join(filter_command))
        subprocess.run(filter_command, check=True)

        print("✅ Osmium processing complete! Files saved in 'processed_osm_files/'")

    except subprocess.CalledProcessError as e:
        print("❌ Error running Osmium:", e)

if __name__ == "__main__":
    years = [18, 24]  # Configure the years you need
    download_geofabrik_pbf(years)
    
    for year in years:
        run_osmium(str(year))


File already exists: osm_geofabrik_pbf\germany-180101.osm.pbf, skipping download.
File already exists: osm_geofabrik_pbf\germany-240101.osm.pbf, skipping download.
Processed file already exists: processed_osm_files\processed_roads_germany_18.pbf, skipping processing.
Processed file already exists: processed_osm_files\processed_roads_germany_24.pbf, skipping processing.


In [9]:
from pyrosm import OSM
import geopandas as gpd
import pandas as pd

#import warnings
#warnings.simplefilter(action='ignore', category=FutureWarning)




# this can take some time (~3min) and requires a lot of memory

osm_roads_all=gpd.GeoDataFrame()

years= ["18","24"]

years_str = "_".join(["20" + str(y) for y in years])
output_file= f"processed_osm_files/germany_roads_{years_str}.parquet"

if  os.path.exists(output_file):
    print(f"Processed file already exists: {output_file}, skipping processing.")
else:
    for y in years:
        osm = OSM(r"processed_osm_files/processed_roads_germany_"+y+".pbf")

        osm_roads = osm.get_data_by_custom_criteria(custom_filter={
                                            'highway': ['primary','primary_link','secondary','secondary_link','tertiary','tertiary_link'],
                                            },
                                            # Keep data matching the criteria above
                                            filter_type="keep",
                                            keep_nodes=False, 
                                            keep_ways=True, 
                                            keep_relations=False,
                                            extra_attributes=["maxspeed","maxspeed:conditional"]
                                        )
        osm_roads_clean=osm_roads[[ 'highway', 'maxspeed', 'maxspeed:conditional', 'id','geometry']].copy()
        osm_roads_clean["year"]= "20"+y


        osm_roads_all=pd.concat([osm_roads_all,osm_roads_clean])



    osm_roads_all.head()



    #osm_roads_all.to_file("osm_files/"+shape_name_str+"_roads_allYears.gpkg")

    osm_roads_all.to_parquet(output_file)

Processed file already exists: processed_osm_files/germany_roads_2018_2024.parquet, skipping processing.
