Cell 1: Import settings

In [16]:
# If you're running the notebook from /project_root/notebooks,
# make sure Python can see the ../ndw package:
import sys
from pathlib import Path

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

%load_ext autoreload
%autoreload 2

import pandas as pd

from ndw.incidents import load_incidents, load_active_incidents

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Cell 2: Load all incidents

In [17]:
df_all = load_incidents()

print("Total incidents:", len(df_all))
df_all.head()

Total incidents: 16


Unnamed: 0,id,version,creation_time,observation_time,version_time,probability_of_occurrence,validity_status,overall_start_time,overall_severity,description,location_text,lat,lon,loc_col2,loc_col3,loc_col4,loc_col5,carriageway,direction_ref
0,RWS03_531861_1,7,2025-12-01T12:44:48Z,2025-12-01T12:44:48Z,2025-12-01T12:44:48Z,certain,active,2025-12-01T12:43:23Z,unknown,,51.53602 | 4.297271 | 8 | 6.13 | A | negative ...,51.53602,4.297271,8,6.13,A,negative,A,negative
1,RWS03_531860_1,2,2025-12-01T12:41:00Z,2025-12-01T12:41:00Z,2025-12-01T12:41:00Z,certain,active,2025-12-01T12:41:08Z,unknown,,51.579582 | 5.4909525 | 8 | 6.13 | A | negativ...,51.579582,5.490952,8,6.13,A,negative,A,negative
2,RWS03_531867_1,2,2025-12-01T12:45:56Z,2025-12-01T12:45:56Z,2025-12-01T12:45:56Z,certain,active,2025-12-01T12:46:04Z,unknown,,51.942184 | 4.485742 | 8 | 6.13 | A | negative...,51.942184,4.485742,8,6.13,A,negative,A,negative
3,RWS03_531865_1,2,2025-12-01T12:45:16Z,2025-12-01T12:45:16Z,2025-12-01T12:45:16Z,certain,active,2025-12-01T12:45:24Z,unknown,,52.358032 | 4.735061 | 8 | 6.13 | A | negative...,52.358032,4.735061,8,6.13,A,negative,A,negative
4,RWS03_531866_1,2,2025-12-01T12:45:53Z,2025-12-01T12:45:53Z,2025-12-01T12:45:53Z,certain,active,2025-12-01T12:45:55Z,unknown,,52.72432 | 6.922222 | 8 | 6.13 | A | negative ...,52.72432,6.922222,8,6.13,A,negative,A,negative


In [18]:
print("Validity status counts:")
print(df_all["validity_status"].value_counts(dropna=False))

print("\nRows with coordinates:")
print(df_all["lat"].notna().sum(), "with lat,",
      df_all["lon"].notna().sum(), "with lon")

Validity status counts:
validity_status
active    16
Name: count, dtype: int64

Rows with coordinates:
16 with lat, 16 with lon


In [19]:
df_active = load_active_incidents()

print("Active incidents with coordinates:", len(df_active))
df_active[["id", "creation_time", "lat", "lon",
           "carriageway", "direction_ref", "description"]].head()

Active incidents with coordinates: 16


Unnamed: 0,id,creation_time,lat,lon,carriageway,direction_ref,description
0,RWS03_531861_1,2025-12-01T12:44:48Z,51.53602,4.297271,A,negative,
1,RWS03_531860_1,2025-12-01T12:41:00Z,51.579582,5.490952,A,negative,
2,RWS03_531867_1,2025-12-01T12:45:56Z,51.942184,4.485742,A,negative,
3,RWS03_531865_1,2025-12-01T12:45:16Z,52.358032,4.735061,A,negative,
4,RWS03_531866_1,2025-12-01T12:45:53Z,52.72432,6.922222,A,negative,


In [20]:
import geopandas as gpd
from shapely.geometry import Point

# Build GeoDataFrame from active incidents
gdf_inc = gpd.GeoDataFrame(
    df_active.copy(),
    geometry=[Point(xy) for xy in zip(df_active["lon"], df_active["lat"])],
    crs="EPSG:4326",
)

gdf_inc.head()

Unnamed: 0,id,version,creation_time,observation_time,version_time,probability_of_occurrence,validity_status,overall_start_time,overall_severity,description,location_text,lat,lon,loc_col2,loc_col3,loc_col4,loc_col5,carriageway,direction_ref,geometry
0,RWS03_531861_1,7,2025-12-01T12:44:48Z,2025-12-01T12:44:48Z,2025-12-01T12:44:48Z,certain,active,2025-12-01T12:43:23Z,unknown,,51.53602 | 4.297271 | 8 | 6.13 | A | negative ...,51.53602,4.297271,8,6.13,A,negative,A,negative,POINT (4.29727 51.53602)
1,RWS03_531860_1,2,2025-12-01T12:41:00Z,2025-12-01T12:41:00Z,2025-12-01T12:41:00Z,certain,active,2025-12-01T12:41:08Z,unknown,,51.579582 | 5.4909525 | 8 | 6.13 | A | negativ...,51.579582,5.490952,8,6.13,A,negative,A,negative,POINT (5.49095 51.57958)
2,RWS03_531867_1,2,2025-12-01T12:45:56Z,2025-12-01T12:45:56Z,2025-12-01T12:45:56Z,certain,active,2025-12-01T12:46:04Z,unknown,,51.942184 | 4.485742 | 8 | 6.13 | A | negative...,51.942184,4.485742,8,6.13,A,negative,A,negative,POINT (4.48574 51.94218)
3,RWS03_531865_1,2,2025-12-01T12:45:16Z,2025-12-01T12:45:16Z,2025-12-01T12:45:16Z,certain,active,2025-12-01T12:45:24Z,unknown,,52.358032 | 4.735061 | 8 | 6.13 | A | negative...,52.358032,4.735061,8,6.13,A,negative,A,negative,POINT (4.73506 52.35803)
4,RWS03_531866_1,2,2025-12-01T12:45:53Z,2025-12-01T12:45:53Z,2025-12-01T12:45:53Z,certain,active,2025-12-01T12:45:55Z,unknown,,52.72432 | 6.922222 | 8 | 6.13 | A | negative ...,52.72432,6.922222,8,6.13,A,negative,A,negative,POINT (6.92222 52.72432)


In [21]:
from ndw.ndw_shapefile_utils import load_shapefile_from_url

# 2) Load NDW MSI shapefile via your helper script
gdf_msi = load_shapefile_from_url()  # default MSI URL inside the function

# 3) Reproject both to RD New (meters) for distance-based nearest join
gdf_inc_rd = gdf_inc.to_crs(epsg=28992)
gdf_msi_rd = gdf_msi.to_crs(epsg=28992)

# 4) Nearest spatial join: each incident -> nearest MSI point
gdf_join = gpd.sjoin_nearest(
    gdf_inc_rd,
    gdf_msi_rd,
    how="left",
    distance_col="distance_m",
)

# 5) Back to WGS84 if you want to keep using lat/lon
gdf_join = gdf_join.to_crs(epsg=4326)

# 6) (Optional) keep only reasonably close matches (e.g. within 500 m)
MAX_DISTANCE_M = 500.0
gdf_join = gdf_join[gdf_join["distance_m"] <= MAX_DISTANCE_M].copy()

# 7) Show a nice table with both incident + shapefile info
cols_to_show = [
    "id",
    "creation_time",
    "lat",
    "lon",
    "carriageway",
    "direction_ref",
    "road",        # from MSI shapefile
    "carriagew0",  # from MSI shapefile
    "lane",
    "km",
    "wegvak",
    "bearing",
    "distance_m",
]
[g for g in cols_to_show if g in gdf_join.columns]  # just to be safe

gdf_join[cols_to_show].head()

Unnamed: 0,id,creation_time,lat,lon,carriageway,direction_ref,road,carriagew0,lane,km,wegvak,bearing,distance_m
2,RWS03_531867_1,2025-12-01T12:45:56Z,51.942184,4.485742,A,negative,A20,L,2,32.8,600113695,243.45404,298.009618
2,RWS03_531867_1,2025-12-01T12:45:56Z,51.942184,4.485742,A,negative,A20,L,1,32.8,600113695,243.45404,298.009618
2,RWS03_531867_1,2025-12-01T12:45:56Z,51.942184,4.485742,A,negative,A20,L,3,32.8,600113695,243.45404,298.009618
3,RWS03_531865_1,2025-12-01T12:45:16Z,52.358032,4.735061,A,negative,A9,r,1,39.125,600122330,311.79515,162.638155
5,RWS03_531850_1,2025-12-01T12:44:29Z,52.117996,4.470255,A,positive,A4,R,1,37.11,600424215,221.45965,95.683211


Save data

In [22]:
from pathlib import Path

# Directory to save into
project_root = Path.cwd().parent   # adjust if notebook is elsewhere
data_dir = project_root / "data"
data_dir.mkdir(exist_ok=True)

# Save ONLY active incidents
active_csv = data_dir / "ndw_incidents_active.csv"
df_active.to_csv(active_csv, index=False)

print("Saved active incidents to:", active_csv)

Saved active incidents to: /Users/Bruno/Library/CloudStorage/OneDrive-TUEindhoven/IGNITE/data/data/ndw_incidents_active.csv
