In [9]:
# ---------------------------------------------------------------
# 0 ▸ paths & imports
# ---------------------------------------------------------------
from pathlib import Path
import pandas as pd
import numpy as np

ROOT      = Path().resolve().parents[0]
RAW_DIR   = ROOT / "data" / "raw"
CLEAN_DIR = ROOT / "data" / "clean"

RUNS_FILE   = CLEAN_DIR   / "fire_incidents_cleaned_sorted.csv"
PANEL_IN    = CLEAN_DIR / "exposure_panel_plus_nh.csv"
PANEL_OUT   = CLEAN_DIR / "fire_parcel_panel_monthly.csv"

In [10]:
# ------------------------------------------------------------------
# 1 ▸ Load parcel+NH panel
# ------------------------------------------------------------------
panel = pd.read_csv(PANEL_IN)
panel["snapshot_month"] = pd.PeriodIndex(panel["snapshot_month"], freq="M")

In [11]:
# ------------------------------------------------------------------
# 2 ▸ Load & correctly aggregate NFIRS runs
# ------------------------------------------------------------------
runs = pd.read_csv(
    RUNS_FILE,
    parse_dates=["basic_incident_date_original_fd1.3"],
    low_memory=False,
)

runs["snapshot_month"] = runs["basic_incident_date_original_fd1.3"].dt.to_period("M")

# Map leading digit of incident code to broad class
RUN_MAP = {
    "1": "fire",
    "2": "overpressure",
    "3": "ems",
    "4": "hazmat",
    "5": "service",
    "6": "good_intent",
    "7": "false_alarm",
    "8": "severe_weather",
}
runs["run_class"] = runs["basic_incident_type_code_fd1.21"].astype(str).str[0].map(RUN_MAP)

# ---- aggregate ----
pivot = (
    runs
    .groupby(["snapshot_month", "run_class"])
    .size()
    .unstack(fill_value=0)        # one column per class, zeros where absent
    .add_prefix("runs_")          # e.g. runs_fire
    .reset_index()
)

total = (
    runs
    .groupby("snapshot_month")
    .size()
    .rename("runs_total")
    .reset_index()
)

agg_runs = total.merge(pivot, on="snapshot_month", how="left")

In [12]:
# ------------------------------------------------------------------
# 3 ▸ Merge with exposure panel
# ------------------------------------------------------------------
full_panel = (
    panel
    .merge(agg_runs, on="snapshot_month", how="left")
    .sort_values("snapshot_month")
)

# (optional) mark future months after last NFIRS data as NaN
latest = agg_runs["snapshot_month"].max()
run_cols = [c for c in full_panel.columns if c.startswith("runs_")]
full_panel.loc[full_panel["snapshot_month"] > latest, run_cols] = np.nan

In [14]:
# ------------------------------------------------------------------
# 4 ▸ Save & done
# ------------------------------------------------------------------
full_panel.to_csv(PANEL_OUT, index=False)
print(f"✓ Final panel: {full_panel.shape[0]} rows × {full_panel.shape[1]} cols → {PANEL_OUT}")
full_panel.tail()

✓ Final panel: 84 rows × 29 cols → C:\Repositories\jefferson-township-run-forecasting\data\clean\fire_parcel_panel_monthly.csv


Unnamed: 0,snapshot_month,parcels_total,sqft_total,land_value_total,bldg_value_total,total_value,tif_value_total,pct_multistory,pct_old_40y,pct_high_grade,...,nh_facilities,runs_total,runs_ems,runs_false_alarm,runs_fire,runs_good_intent,runs_hazmat,runs_overpressure,runs_service,runs_severe_weather
79,2025-03,7265.0,15238545.0,47245700.0,100927900.0,148173600.0,467650900.0,0.894976,0.145217,0.33214,...,1.0,170.0,98.0,16.0,7.0,23.0,7.0,2.0,16.0,0.0
80,2025-04,7265.0,15237668.0,47245700.0,100927900.0,148173600.0,467650900.0,0.894701,0.144804,0.33214,...,1.0,170.0,96.0,17.0,4.0,30.0,5.0,0.0,17.0,1.0
81,2025-05,7268.0,15231551.0,47245700.0,100927900.0,148173600.0,467650900.0,0.893919,0.144469,0.331866,...,1.0,158.0,97.0,18.0,2.0,22.0,6.0,0.0,13.0,0.0
82,2025-06,7268.0,15253136.0,47245700.0,100927900.0,148173600.0,467650900.0,0.895157,0.144331,0.331728,...,0.0,165.0,87.0,21.0,4.0,23.0,5.0,0.0,25.0,0.0
83,2025-07,7273.0,15409832.0,47245700.0,100927900.0,148173600.0,467650900.0,0.901416,0.144232,0.331775,...,0.0,141.0,85.0,14.0,1.0,20.0,1.0,0.0,20.0,0.0
