In [1]:
# ------------------------------------------------------------------
# 0 ▸ Paths & file names
# ------------------------------------------------------------------
from pathlib import Path
import pandas as pd
import numpy as np

ROOT      = Path().resolve().parents[0]
RAW_DIR   = ROOT / "data" / "raw"
CLEAN_DIR = ROOT / "data" / "clean"

# existing parcel-only panel
PANEL_IN   = CLEAN_DIR / "parcel_exposure_panel_monthly.csv"
# nursing-home raw CSV you just added
NH_FILE    = RAW_DIR   / "nh_data.csv"
# output with NH metrics merged in
PANEL_OUT  = CLEAN_DIR / "exposure_panel_plus_nh.csv"

In [2]:
# ------------------------------------------------------------------
# 1 ▸ Load datasets
# ------------------------------------------------------------------
panel = pd.read_csv(PANEL_IN)
panel["snapshot_month"] = pd.PeriodIndex(panel["snapshot_month"], freq="M")

nh = pd.read_csv(NH_FILE, dtype=str)
nh["processing_date"] = pd.to_datetime(nh["processing_date"])
nh["snapshot_month"]  = nh["processing_date"].dt.to_period("M")

# numeric coercions
num_cols = ["number_of_certified_beds",
            "average_number_of_residents_per_day"]
nh[num_cols] = nh[num_cols].apply(pd.to_numeric, errors="coerce")

In [3]:
# ------------------------------------------------------------------
# 2 ▸ Monthly NH aggregation (sum across all facilities)
# ------------------------------------------------------------------
nh_monthly = (
    nh
    .groupby("snapshot_month")
    .agg(
        nh_beds_total      = ("number_of_certified_beds", "sum"),
        nh_residents_total = ("average_number_of_residents_per_day", "sum"),
        nh_facilities      = ("cms_certification_number", "nunique"),
    )
    .reset_index()
)

In [4]:
# ------------------------------------------------------------------
# 3 ▸ Merge with parcel panel
# ------------------------------------------------------------------
panel_enriched = (
    panel
      .merge(nh_monthly, on="snapshot_month", how="left")
      .sort_values("snapshot_month")
)

# fill months before first NH record with zeros so models don’t choke
panel_enriched[["nh_beds_total",
                "nh_residents_total",
                "nh_facilities"]] = (
    panel_enriched[["nh_beds_total",
                    "nh_residents_total",
                    "nh_facilities"]]
      .fillna(0)
)

In [5]:
# ------------------------------------------------------------------
# 4 ▸ Save & quick preview
# ------------------------------------------------------------------
panel_enriched.to_csv(PANEL_OUT, index=False)
print(f"✓ Enriched panel ({panel_enriched.shape[0]} rows) → {PANEL_OUT}")
panel_enriched.tail()

✓ Enriched panel (84 rows) → C:\Repositories\jefferson-township-run-forecasting\data\clean\exposure_panel_plus_nh.csv


Unnamed: 0,snapshot_month,parcels_total,sqft_total,land_value_total,bldg_value_total,total_value,tif_value_total,pct_multistory,pct_old_40y,pct_high_grade,parcels_residential,parcels_commercial,sqft_residential,sqft_commercial,month_idx,month_sin,month_cos,nh_beds_total,nh_residents_total,nh_facilities
79,2025-03,7265.0,15238545.0,47245700.0,100927900.0,148173600.0,467650900.0,0.894976,0.145217,0.33214,6829.0,171.0,15134776.0,9133.0,79,-0.5,-0.8660254,50.0,45.4,1.0
80,2025-04,7265.0,15237668.0,47245700.0,100927900.0,148173600.0,467650900.0,0.894701,0.144804,0.33214,6829.0,171.0,15133899.0,9133.0,80,-0.866025,-0.5,58.0,44.3,1.0
81,2025-05,7268.0,15231551.0,47245700.0,100927900.0,148173600.0,467650900.0,0.893919,0.144469,0.331866,6824.0,171.0,15127782.0,9133.0,81,-1.0,-3.42963e-15,58.0,44.3,1.0
82,2025-06,7268.0,15253136.0,47245700.0,100927900.0,148173600.0,467650900.0,0.895157,0.144331,0.331728,6824.0,171.0,15126516.0,9133.0,82,-0.866025,0.5,0.0,0.0,0.0
83,2025-07,7273.0,15409832.0,47245700.0,100927900.0,148173600.0,467650900.0,0.901416,0.144232,0.331775,6824.0,171.0,15283427.0,9133.0,83,-0.5,0.8660254,0.0,0.0,0.0
