Cell 1: Import settings

In [1]:
# If you're running the notebook from /project_root/notebooks,
# make sure Python can see the ../ndw package:
import sys
from pathlib import Path

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

%load_ext autoreload
%autoreload 2

import pandas as pd

from ndw.incidents import load_incidents, load_active_incidents

Cell 2: Load all incidents

In [2]:
df_all = load_incidents()

print("Total incidents:", len(df_all))
df_all.head()

Total incidents: 12


Unnamed: 0,id,version,creation_time,observation_time,version_time,probability_of_occurrence,validity_status,overall_start_time,overall_severity,description,location_text,lat,lon,loc_col2,loc_col3,loc_col4,loc_col5,carriageway,direction_ref
0,RWS03_532566_1,2,2025-12-02T13:49:38Z,2025-12-02T13:49:38Z,2025-12-02T13:49:38Z,certain,active,2025-12-02T13:49:46Z,unknown,,51.553474 | 4.9186454 | 8 | 6.13 | A | positiv...,51.553474,4.918645,8,6.13,A,positive,A,positive
1,RWS03_532565_1,2,2025-12-02T13:47:01Z,2025-12-02T13:47:01Z,2025-12-02T13:47:01Z,certain,active,2025-12-02T13:47:09Z,unknown,,52.363976 | 5.5269537 | 8 | 6.13 | A | negativ...,52.363976,5.526954,8,6.13,A,negative,A,negative
2,RWS03_532564_1,2,2025-12-02T13:46:14Z,2025-12-02T13:46:14Z,2025-12-02T13:46:14Z,certain,active,2025-12-02T13:46:23Z,unknown,,52.06729 | 5.063491 | 8 | 6.13 | A | negative ...,52.06729,5.063491,8,6.13,A,negative,A,negative
3,RWS03_532563_1,2,2025-12-02T13:45:26Z,2025-12-02T13:45:26Z,2025-12-02T13:45:26Z,certain,active,2025-12-02T13:45:34Z,unknown,,52.508724 | 5.532909 | 8 | 6.13 | A | negative...,52.508724,5.532909,8,6.13,A,negative,A,negative
4,RWS03_532561_1,2,2025-12-02T13:41:42Z,2025-12-02T13:41:42Z,2025-12-02T13:41:42Z,certain,active,2025-12-02T13:41:50Z,unknown,,52.59523 | 4.989107 | 8 | 6.13 | A | positive ...,52.59523,4.989107,8,6.13,A,positive,A,positive


In [3]:
print("Validity status counts:")
print(df_all["validity_status"].value_counts(dropna=False))

print("\nRows with coordinates:")
print(df_all["lat"].notna().sum(), "with lat,",
      df_all["lon"].notna().sum(), "with lon")

Validity status counts:
validity_status
active    12
Name: count, dtype: int64

Rows with coordinates:
12 with lat, 12 with lon


In [4]:
df_active = load_active_incidents()

print("Active incidents with coordinates:", len(df_active))
df_active[["id", "creation_time", "lat", "lon",
           "carriageway", "direction_ref", "description"]].head()

Active incidents with coordinates: 12


Unnamed: 0,id,creation_time,lat,lon,carriageway,direction_ref,description
0,RWS03_532566_1,2025-12-02T13:49:38Z,51.553474,4.918645,A,positive,
1,RWS03_532565_1,2025-12-02T13:47:01Z,52.363976,5.526954,A,negative,
2,RWS03_532564_1,2025-12-02T13:46:14Z,52.06729,5.063491,A,negative,
3,RWS03_532563_1,2025-12-02T13:45:26Z,52.508724,5.532909,A,negative,
4,RWS03_532561_1,2025-12-02T13:41:42Z,52.59523,4.989107,A,positive,


In [5]:
import geopandas as gpd
from shapely.geometry import Point

# Build GeoDataFrame from active incidents
gdf_inc = gpd.GeoDataFrame(
    df_active.copy(),
    geometry=[Point(xy) for xy in zip(df_active["lon"], df_active["lat"])],
    crs="EPSG:4326",
)

gdf_inc.head()

Unnamed: 0,id,version,creation_time,observation_time,version_time,probability_of_occurrence,validity_status,overall_start_time,overall_severity,description,location_text,lat,lon,loc_col2,loc_col3,loc_col4,loc_col5,carriageway,direction_ref,geometry
0,RWS03_532566_1,2,2025-12-02T13:49:38Z,2025-12-02T13:49:38Z,2025-12-02T13:49:38Z,certain,active,2025-12-02T13:49:46Z,unknown,,51.553474 | 4.9186454 | 8 | 6.13 | A | positiv...,51.553474,4.918645,8,6.13,A,positive,A,positive,POINT (4.91865 51.55347)
1,RWS03_532565_1,2,2025-12-02T13:47:01Z,2025-12-02T13:47:01Z,2025-12-02T13:47:01Z,certain,active,2025-12-02T13:47:09Z,unknown,,52.363976 | 5.5269537 | 8 | 6.13 | A | negativ...,52.363976,5.526954,8,6.13,A,negative,A,negative,POINT (5.52695 52.36398)
2,RWS03_532564_1,2,2025-12-02T13:46:14Z,2025-12-02T13:46:14Z,2025-12-02T13:46:14Z,certain,active,2025-12-02T13:46:23Z,unknown,,52.06729 | 5.063491 | 8 | 6.13 | A | negative ...,52.06729,5.063491,8,6.13,A,negative,A,negative,POINT (5.06349 52.06729)
3,RWS03_532563_1,2,2025-12-02T13:45:26Z,2025-12-02T13:45:26Z,2025-12-02T13:45:26Z,certain,active,2025-12-02T13:45:34Z,unknown,,52.508724 | 5.532909 | 8 | 6.13 | A | negative...,52.508724,5.532909,8,6.13,A,negative,A,negative,POINT (5.53291 52.50872)
4,RWS03_532561_1,2,2025-12-02T13:41:42Z,2025-12-02T13:41:42Z,2025-12-02T13:41:42Z,certain,active,2025-12-02T13:41:50Z,unknown,,52.59523 | 4.989107 | 8 | 6.13 | A | positive ...,52.59523,4.989107,8,6.13,A,positive,A,positive,POINT (4.98911 52.59523)


In [6]:
from ndw.ndw_shapefile_utils import load_shapefile_from_url

# 2) Load NDW MSI shapefile via your helper script
gdf_msi = load_shapefile_from_url()  # default MSI URL inside the function

# 3) Reproject both to RD New (meters) for distance-based nearest join
gdf_inc_rd = gdf_inc.to_crs(epsg=28992)
gdf_msi_rd = gdf_msi.to_crs(epsg=28992)

# 4) Nearest spatial join: each incident -> nearest MSI point
gdf_join = gpd.sjoin_nearest(
    gdf_inc_rd,
    gdf_msi_rd,
    how="left",
    distance_col="distance_m",
)

# 5) Back to WGS84 if you want to keep using lat/lon
gdf_join = gdf_join.to_crs(epsg=4326)

# 6) (Optional) keep only reasonably close matches (e.g. within 500 m)
MAX_DISTANCE_M = 500.0
gdf_join = gdf_join[gdf_join["distance_m"] <= MAX_DISTANCE_M].copy()

# 7) Show a nice table with both incident + shapefile info
cols_to_show = [
    "id",
    "creation_time",
    "lat",
    "lon",
    "carriageway",
    "direction_ref",
    "road",        # from MSI shapefile
    "carriagew0",  # from MSI shapefile
    "lane",
    "km",
    "wegvak",
    "bearing",
    "distance_m",
]
[g for g in cols_to_show if g in gdf_join.columns]  # just to be safe

gdf_join[cols_to_show].head()

Unnamed: 0,id,creation_time,lat,lon,carriageway,direction_ref,road,carriagew0,lane,km,wegvak,bearing,distance_m
0,RWS03_532566_1,2025-12-02T13:49:38Z,51.553474,4.918645,A,positive,A58,L,1,49.85,244192008,101.62154,148.468502
0,RWS03_532566_1,2025-12-02T13:49:38Z,51.553474,4.918645,A,positive,A58,L,2,49.85,244192008,101.62154,148.468502
2,RWS03_532564_1,2025-12-02T13:46:14Z,52.06729,5.063491,A,negative,A2,g,1,63.943,266306029,181.60489,194.11077
2,RWS03_532564_1,2025-12-02T13:46:14Z,52.06729,5.063491,A,negative,A2,g,2,63.943,266306029,181.60489,194.11077
2,RWS03_532564_1,2025-12-02T13:46:14Z,52.06729,5.063491,A,negative,A2,g,3,63.943,266306029,181.60489,194.11077


Save data

In [7]:
from pathlib import Path

# Directory to save into
project_root = Path.cwd().parent   # adjust if notebook is elsewhere
data_dir = project_root / "data"
data_dir.mkdir(exist_ok=True)

# Save ONLY active incidents
active_csv = data_dir / "ndw_incidents_active.csv"
df_active.to_csv(active_csv, index=False)

print("Saved active incidents to:", active_csv)

Saved active incidents to: /Users/Bruno/Library/CloudStorage/OneDrive-TUEindhoven/IGNITE/data/data/ndw_incidents_active.csv
