Cell 1: Import settings

In [8]:
# If you're running the notebook from /project_root/notebooks,
# make sure Python can see the ../ndw package:
import sys
from pathlib import Path

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

%load_ext autoreload
%autoreload 2

import pandas as pd

from ndw.incidents import load_incidents, load_active_incidents

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Cell 2: Load all incidents

In [9]:
df_all = load_incidents()

print("Total incidents:", len(df_all))
df_all.head()

Total incidents: 11


Unnamed: 0,id,version,creation_time,observation_time,version_time,probability_of_occurrence,validity_status,overall_start_time,overall_severity,description,location_text,lat,lon,loc_col2,loc_col3,loc_col4,loc_col5,carriageway,direction_ref
0,RWS03_532731_1,2,2025-12-02T17:12:49Z,2025-12-02T17:12:49Z,2025-12-02T17:12:49Z,certain,active,2025-12-02T17:12:57Z,unknown,,52.103516 | 4.4507623 | 8 | 6.13 | A | positiv...,52.103516,4.450762,8,6.13,A,positive,A,positive
1,RWS03_532730_1,2,2025-12-02T17:10:43Z,2025-12-02T17:10:43Z,2025-12-02T17:10:43Z,certain,active,2025-12-02T17:10:51Z,unknown,,51.854782 | 5.7071495 | 8 | 6.13 | A | negativ...,51.854782,5.707149,8,6.13,A,negative,A,negative
2,RWS03_532736_1,2,2025-12-02T17:25:25Z,2025-12-02T17:25:25Z,2025-12-02T17:25:25Z,certain,active,2025-12-02T17:25:33Z,unknown,,52.352093 | 4.9636025 | 8 | 6.13 | A | negativ...,52.352093,4.963603,8,6.13,A,negative,A,negative
3,RWS03_532735_1,2,2025-12-02T17:17:53Z,2025-12-02T17:17:53Z,2025-12-02T17:17:53Z,certain,active,2025-12-02T17:17:58Z,unknown,,51.893143 | 4.5596347 | 8 | 6.13 | A | positiv...,51.893143,4.559635,8,6.13,A,positive,A,positive
4,RWS03_532734_1,2,2025-12-02T17:16:50Z,2025-12-02T17:16:50Z,2025-12-02T17:16:50Z,certain,active,2025-12-02T17:16:58Z,unknown,,52.223423 | 4.6338983 | 8 | 6.13 | A | negativ...,52.223423,4.633898,8,6.13,A,negative,A,negative


In [10]:
print("Validity status counts:")
print(df_all["validity_status"].value_counts(dropna=False))

print("\nRows with coordinates:")
print(df_all["lat"].notna().sum(), "with lat,",
      df_all["lon"].notna().sum(), "with lon")

Validity status counts:
validity_status
active    11
Name: count, dtype: int64

Rows with coordinates:
11 with lat, 11 with lon


In [11]:
df_active = load_active_incidents()

print("Active incidents with coordinates:", len(df_active))
df_active[["id", "creation_time", "lat", "lon",
           "carriageway", "direction_ref", "description"]].head()

Active incidents with coordinates: 11


Unnamed: 0,id,creation_time,lat,lon,carriageway,direction_ref,description
0,RWS03_532731_1,2025-12-02T17:12:49Z,52.103516,4.450762,A,positive,
1,RWS03_532730_1,2025-12-02T17:10:43Z,51.854782,5.707149,A,negative,
2,RWS03_532736_1,2025-12-02T17:25:25Z,52.352093,4.963603,A,negative,
3,RWS03_532735_1,2025-12-02T17:17:53Z,51.893143,4.559635,A,positive,
4,RWS03_532734_1,2025-12-02T17:16:50Z,52.223423,4.633898,A,negative,


In [12]:
import geopandas as gpd
from shapely.geometry import Point

# Build GeoDataFrame from active incidents
gdf_inc = gpd.GeoDataFrame(
    df_active.copy(),
    geometry=[Point(xy) for xy in zip(df_active["lon"], df_active["lat"])],
    crs="EPSG:4326",
)

gdf_inc.head()

Unnamed: 0,id,version,creation_time,observation_time,version_time,probability_of_occurrence,validity_status,overall_start_time,overall_severity,description,location_text,lat,lon,loc_col2,loc_col3,loc_col4,loc_col5,carriageway,direction_ref,geometry
0,RWS03_532731_1,2,2025-12-02T17:12:49Z,2025-12-02T17:12:49Z,2025-12-02T17:12:49Z,certain,active,2025-12-02T17:12:57Z,unknown,,52.103516 | 4.4507623 | 8 | 6.13 | A | positiv...,52.103516,4.450762,8,6.13,A,positive,A,positive,POINT (4.45076 52.10352)
1,RWS03_532730_1,2,2025-12-02T17:10:43Z,2025-12-02T17:10:43Z,2025-12-02T17:10:43Z,certain,active,2025-12-02T17:10:51Z,unknown,,51.854782 | 5.7071495 | 8 | 6.13 | A | negativ...,51.854782,5.707149,8,6.13,A,negative,A,negative,POINT (5.70715 51.85478)
2,RWS03_532736_1,2,2025-12-02T17:25:25Z,2025-12-02T17:25:25Z,2025-12-02T17:25:25Z,certain,active,2025-12-02T17:25:33Z,unknown,,52.352093 | 4.9636025 | 8 | 6.13 | A | negativ...,52.352093,4.963603,8,6.13,A,negative,A,negative,POINT (4.9636 52.35209)
3,RWS03_532735_1,2,2025-12-02T17:17:53Z,2025-12-02T17:17:53Z,2025-12-02T17:17:53Z,certain,active,2025-12-02T17:17:58Z,unknown,,51.893143 | 4.5596347 | 8 | 6.13 | A | positiv...,51.893143,4.559635,8,6.13,A,positive,A,positive,POINT (4.55963 51.89314)
4,RWS03_532734_1,2,2025-12-02T17:16:50Z,2025-12-02T17:16:50Z,2025-12-02T17:16:50Z,certain,active,2025-12-02T17:16:58Z,unknown,,52.223423 | 4.6338983 | 8 | 6.13 | A | negativ...,52.223423,4.633898,8,6.13,A,negative,A,negative,POINT (4.6339 52.22342)


In [13]:
from ndw.ndw_shapefile_utils import load_shapefile_from_url

# 2) Load NDW MSI shapefile via your helper script
gdf_msi = load_shapefile_from_url()  # default MSI URL inside the function

# 3) Reproject both to RD New (meters) for distance-based nearest join
gdf_inc_rd = gdf_inc.to_crs(epsg=28992)
gdf_msi_rd = gdf_msi.to_crs(epsg=28992)

# 4) Nearest spatial join: each incident -> nearest MSI point
gdf_join = gpd.sjoin_nearest(
    gdf_inc_rd,
    gdf_msi_rd,
    how="left",
    distance_col="distance_m",
)

# 5) Back to WGS84 if you want to keep using lat/lon
gdf_join = gdf_join.to_crs(epsg=4326)

# 6) (Optional) keep only reasonably close matches (e.g. within 500 m)
MAX_DISTANCE_M = 500.0
gdf_join = gdf_join[gdf_join["distance_m"] <= MAX_DISTANCE_M].copy()

# 7) Show a nice table with both incident + shapefile info
cols_to_show = [
    "id",
    "creation_time",
    "lat",
    "lon",
    "carriageway",
    "direction_ref",
    "road",        # from MSI shapefile
    "carriagew0",  # from MSI shapefile
    "lane",
    "km",
    "wegvak",
    "bearing",
    "distance_m",
]
[g for g in cols_to_show if g in gdf_join.columns]  # just to be safe

gdf_join[cols_to_show].head()

Unnamed: 0,id,creation_time,lat,lon,carriageway,direction_ref,road,carriagew0,lane,km,wegvak,bearing,distance_m
0,RWS03_532731_1,2025-12-02T17:12:49Z,52.103516,4.450762,A,positive,A4,R,2,39.2,600424221,228.3819,98.162758
0,RWS03_532731_1,2025-12-02T17:12:49Z,52.103516,4.450762,A,positive,A4,R,3,39.2,600424221,228.3819,98.162758
0,RWS03_532731_1,2025-12-02T17:12:49Z,52.103516,4.450762,A,positive,A4,R,4,39.2,600424221,228.3819,98.162758
0,RWS03_532731_1,2025-12-02T17:12:49Z,52.103516,4.450762,A,positive,A4,R,1,39.2,600424221,228.3819,98.162758
1,RWS03_532730_1,2025-12-02T17:10:43Z,51.854782,5.707149,A,negative,A73,v,1,115.88,353258014,211.39374,285.598452


Save data

In [14]:
from pathlib import Path

# Directory to save into
project_root = Path.cwd().parent   # adjust if notebook is elsewhere
data_dir = project_root / "data"
data_dir.mkdir(exist_ok=True)

# Save ONLY active incidents
active_csv = data_dir / "ndw_incidents_active.csv"
df_active.to_csv(active_csv, index=False)

print("Saved active incidents to:", active_csv)

Saved active incidents to: /Users/Bruno/Library/CloudStorage/OneDrive-TUEindhoven/IGNITE/data/data/ndw_incidents_active.csv
