# Jefferson Township Fire Run Forecasting Panel
This notebook constructs a yearly panel dataset combining fire run data and parcel data.

In [3]:
import pandas as pd
import numpy as np
from pathlib import Path

fire_path = Path(r"C:\Users\JosephWhite\Documents\GitHub\jefferson-township-run-forecasting\data\clean\fire_incidents_cleaned_sorted.csv")
parcel_path = Path(r"C:\Users\JosephWhite\Documents\GitHub\jefferson-township-run-forecasting\data\clean\parcels_jefferson_cleaned.csv")

fire = pd.read_csv(fire_path)
parcels = pd.read_csv(parcel_path)

In [4]:
# Clean and prepare fire data
fire = fire.rename(columns={
    'basic_incident_date_original_fd1.3': 'date',
    'basic_incident_type_fd1.21': 'incident_type',
    'basic_incident_type_code_fd1.21': 'incident_type_code',
    'basic_incident_year_fd1.3': 'year',
    'basic_property_use_code_fd1.46': 'property_use_code',
})

fire['date'] = pd.to_datetime(fire['date'], errors='coerce')
fire['year'] = fire['date'].dt.year.astype("Int64")
fire = fire[fire['year'].between(2018, 2025)]

# 2018 observed slice and scaling
fire_aug_dec_2018 = fire[(fire['year'] == 2018) & (fire['date'].dt.month >= 8)]
observed_aug_dec_total = fire_aug_dec_2018.shape[0]

# Scaled proportions
incident_type_dist = fire_aug_dec_2018['incident_type'].value_counts(normalize=True).to_frame('proportion')
incident_type_scaled = (incident_type_dist * 2499).round(0).astype(int).T
incident_type_scaled['year'] = 2018

property_use_dist = fire_aug_dec_2018['property_use_code'].dropna().astype(str).value_counts(normalize=True).to_frame('proportion')
property_use_scaled = (property_use_dist * 2499).round(0).astype(int).T
property_use_scaled['year'] = 2018

# Yearly totals with corrected 2018 and scaled 2025
fire_counts_by_year = fire.groupby('year').size().reset_index(name='raw_total_runs')
fire_counts_by_year.loc[fire_counts_by_year['year'] == 2018, 'total_runs'] = 2499
latest_2025_month = fire[fire['year'] == 2025]['date'].dt.month.max()
scale_factor = 12 / latest_2025_month
fire_counts_by_year.loc[fire_counts_by_year['year'] == 2025, 'total_runs'] = round(fire[fire['year'] == 2025].shape[0] * scale_factor)
fire_counts_by_year['total_runs'] = fire_counts_by_year['total_runs'].fillna(fire_counts_by_year['raw_total_runs'])
fire_counts_by_year = fire_counts_by_year.drop(columns='raw_total_runs')


In [5]:
# Full-year run breakdowns
runs_by_incident_type = fire.groupby(['year', 'incident_type']).size().unstack(fill_value=0).reset_index()
runs_by_incident_type = runs_by_incident_type.set_index('year')
runs_by_incident_type.loc[2018] = incident_type_scaled.loc[2018]
runs_by_incident_type = runs_by_incident_type.reset_index()

runs_by_property_use_code = fire.dropna(subset=['property_use_code']).copy()
runs_by_property_use_code['property_use_code'] = runs_by_property_use_code['property_use_code'].astype(str)
runs_by_property_use_code = runs_by_property_use_code.groupby(['year', 'property_use_code']).size().unstack(fill_value=0).reset_index()
runs_by_property_use_code = runs_by_property_use_code.set_index('year')
property_use_scaled.columns = property_use_scaled.columns.astype(str)
runs_by_property_use_code.loc[2018] = property_use_scaled.loc[2018]
runs_by_property_use_code = runs_by_property_use_code.reset_index()


KeyError: 2018

In [None]:
# Classify and summarize parcel data
parcels['landuse'] = parcels['landuse'].astype(str)

def classify_property_type(code):
    if code.startswith('4'):
        return 'residential'
    elif code.startswith('5'):
        return 'commercial'
    elif code.startswith('6'):
        return 'industrial'
    elif code.startswith(('1', '2', '3')):
        return 'agricultural'
    elif code.startswith('9'):
        return 'special'
    else:
        return 'other'

parcels['property_type'] = parcels['landuse'].apply(classify_property_type)
parcels['square_footage'] = parcels['aexmbld'].fillna(0)

parcel_counts_by_type = parcels.groupby(['year', 'property_type']).size().unstack(fill_value=0).reset_index()
sqft_by_type = parcels.groupby(['year', 'property_type'])['square_footage'].sum().unstack(fill_value=0).reset_index()


In [None]:
# Start panel with yearly totals
panel = fire_counts_by_year.set_index('year')

# Add incident types
panel = panel.merge(runs_by_incident_type.set_index('year'), left_index=True, right_index=True, how='left')

# Add parcel metrics for each year
for col in parcel_counts_by_type.columns[1:]:
    panel[f'parcel_count_{col}'] = panel.index.map(dict(zip(parcel_counts_by_type['year'], parcel_counts_by_type[col])))

for col in sqft_by_type.columns[1:]:
    panel[f'total_sqft_{col}'] = panel.index.map(dict(zip(sqft_by_type['year'], sqft_by_type[col])))

panel = panel.reset_index()
panel.to_csv("outputs/fire_forecasting_panel.csv", index=False)
panel.head()
