In [146]:
# Fire Incident Cleaning Script

import pandas as pd
import re

# ── 1. Load raw file ─────────────────────────────────────────────
file_path = r"C:\Users\JosephWhite\Documents\GitHub\jefferson-township-run-forecasting\data\raw\fire_and_ems_runs_2018_2025.csv"
df = pd.read_csv(file_path)

In [147]:
# ── 2. Standardize column names ─────────────────────────────────
df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
    .str.replace(".", "", regex=False)
)

In [148]:
# ── 3. Create standardized incident number ──────────────────────
df["incident_number_original"] = df["basic_incident_number_fd1"].astype(str)
df = df.dropna(subset=["basic_incident_year_fd13"]).copy()
df["incident_year"] = df["basic_incident_year_fd13"].astype(int)

def fix_incident_number(row):
    inc = row["incident_number_original"]
    year = row["incident_year"]
    if len(inc) < 10 and year < 2022:
        return f"{year}{inc[-6:].zfill(6)}"
    else:
        return inc

df["incident_number_cleaned"] = df.apply(fix_incident_number, axis=1)

In [149]:
# ── 4. Convert numeric codes to Int64 ───────────────────────────
df["basic_property_use_code_fd146"] = pd.to_numeric(
    df["basic_property_use_code_fd146"], errors="coerce"
).astype("Int64")

In [150]:
# ── 5. Parse incident date column to datetime ───────────────────
df["basic_incident_date_original_fd13"] = pd.to_datetime(
    df["basic_incident_date_original_fd13"], errors="coerce"
)

In [151]:
# ── Step 6: Save cleaned dataset ──────────────────────────────────
df = df.sort_values(by="incident_number_original", ascending=False)
out_path = r"C:\Users\JosephWhite\Documents\GitHub\jefferson-township-run-forecasting\data\clean\fire_incidents_general_cleaned.csv"
df.to_csv(out_path, index=False)
print("✓ Cleaned fire run data saved to:", out_path)

✓ Cleaned fire run data saved to: C:\Users\JosephWhite\Documents\GitHub\jefferson-township-run-forecasting\data\clean\fire_incidents_general_cleaned.csv
