# Matching BRON accidents to OSM roads (2022)

This notebook demonstrates how to load BRON accident data for the year 2022, match each accident location to the nearest OSM road segment with the same street name, and enrich the accidents with attributes from the corresponding road segments. The enriched data is saved to a CSV file.

You can adjust the file paths in the code cells below to point to your own datasets. The default paths are set for a Windows environment. Replace them with your own if necessary.

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import re
from tqdm.notebook import tqdm

def parse_hstore(hstore_string):
    if hstore_string is None:
        return {}
    try:
        return dict(re.findall(r'"(.*?)"=>"(.*?)"', hstore_string))
    except Exception:
        return {}


In [None]:
# File paths
bron_csv_file = r"C:\Users\nicol\Documents\TrafficOntology_Project\TrafficOntology\data_processed\BRON_cleaned\ongevallen_2022_clean.csv"
gpkg_file = r"C:\Users\nicol\Documents\TrafficOntology_Project\TrafficOntology\OSM_data_filtered.gpkg"
out_path = r"C:\Users\nicol\Documents\TrafficOntology_Project\TrafficOntology\data_rdf\accidents_enriched_with_osm.csv"


In [None]:
# Load BRON accidents for 2022
df_bron = pd.read_csv(bron_csv_file)
# Drop rows without location or street name
df_bron = df_bron.dropna(subset=['longitude', 'latitude', 'straatnaam'])
print(f'Loaded {len(df_bron)} accidents with valid locations and street names.')


In [None]:
# Load OSM road network
data_columns = ['osm_id', 'highway', 'name', 'other_tags', 'geometry']
gdf_osm_all = gpd.read_file(gpkg_file, layer='lines', columns=data_columns)
print(f'Loaded {len(gdf_osm_all)} OSM road segments.')


In [None]:
# Prepare data: convert to GeoDataFrames and parse tags
gdf_bron = gpd.GeoDataFrame(df_bron, geometry=[Point(xy) for xy in zip(df_bron['longitude'], df_bron['latitude'])])
gdf_bron.set_crs(epsg=4326, inplace=True)
gdf_osm_all = gdf_osm_all.to_crs(epsg=4326)

# Parse 'other_tags'
parsed_tags = gdf_osm_all['other_tags'].apply(parse_hstore)
osm_tags_df = pd.DataFrame.from_records(parsed_tags, index=gdf_osm_all.index)
gdf_osm_all = gdf_osm_all.join(osm_tags_df)

# Keep selected columns
columns_to_keep = ['geometry', 'osm_id', 'name', 'highway', 'maxspeed', 'surface', 'zone:traffic']
final_osm_columns = [col for col in columns_to_keep if col in gdf_osm_all.columns]
gdf_osm_all = gdf_osm_all[final_osm_columns]


In [None]:
# Match accidents to roads by street name and nearest geometry
unique_names = gdf_bron['straatnaam'].unique()
matched_data_list = []
for name in tqdm(unique_names, desc='Matching streets'):
    accidents_on_street = gdf_bron[gdf_bron['straatnaam'] == name]
    roads_with_name = gdf_osm_all[gdf_osm_all['name'] == name]
    if roads_with_name.empty:
        continue
    matched = gpd.sjoin_nearest(accidents_on_street, roads_with_name, how='left')
    matched_data_list.append(matched)

if not matched_data_list:
    raise RuntimeError('No matches were found between accidents and roads.')

final_matched_gdf = pd.concat(matched_data_list)
final_matched_gdf = final_matched_gdf.dropna(subset=['index_right'])
final_matched_gdf = final_matched_gdf.rename(columns={'name': 'osm_road_name'})
print(f'Successfully matched {len(final_matched_gdf)} out of {len(gdf_bron)} accidents.')
final_matched_gdf.head()


In [None]:
# Save enriched data to CSV
final_matched_gdf.to_csv(out_path, index=False)
print(f'Saved enriched data to {out_path}')
