In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import patsy

In [2]:
# NYC 311, Heat Exposure, ACS SES — Block Group × Day Panel

# Load final modeling dataset.
df = pd.read_csv(
    "data/model/nyc311_heat_acs_bg_2018_2025_final_regression_cleaned.csv",
    dtype = {"GEOID_BG": str}
)

In [3]:
# Convert date to datetime
df["date"] = pd.to_datetime(df["date"])

print("Dataset loaded:", df.shape)
df.head()

Dataset loaded: (0, 50)


Unnamed: 0,GEOID_BG,date,total_calls,qol_calls,qol_pct,MAX_BAT,MAX_CP,MAX_JFK,MAX_LGA,MAX_PORT,...,poverty_rate,dow,year,log_total_calls,tmax_city_f,tmean_city_f,poverty_rate_c,medhhinc_c,extreme_x_poverty,extreme_x_no_vehicle


In [4]:
df_sub.isna().sum()

NameError: name 'df_sub' is not defined

In [None]:
# Subset to smaller period for testing model speed.
df_sub = df[df["year"].isin([2019, 2020])].copy()

print(df_sub.shape)

In [None]:
# Negative Binomial GLM Formula with Fixed Effects
# Patsy modula formal, great for FE models.
formula = """
qol_calls ~
    extreme_heat +
    tmax_city_f +
    poverty_rate_c +
    medhhinc_c +
    extreme_x_poverty +
    extreme_x_no_vehicle +
    C(dow) +
    C(year) +
    C(GEOID_BG)
"""

In [None]:
# Design matrices.
y, X = patsy.dmatrices(formula, df_sub, return_type = "dataframe")

print("Design matrices ready:")
print("X:", X.shape, "y:", y.shape)

In [None]:
# Fit NB regression with log offset.
model = sm.GLM(
    y,
    X,
    family=sm.families.NegativeBinomial(),
    offset=df_sub["log_total_calls"]
)

res = model.fit()
print(res.summary())

In [None]:
# Cluster-Robust Standard Errors by Block Group
clusters = df_sub["GEOID_BG"]

robust_res = res.get_robustcov_results(
    cov_type="cluster",
    groups=clusters
)

print(robust_res.summary())

In [None]:
# Incidence Rate Ratios
irr = np.exp(robust_res.params)
irr = irr.rename("IRR").to_frame()
irr["StdErr"] = robust_res.bse
irr["p-value"] = robust_res.pvalues

print("\nIncidence Rate Ratios (IRR):\n")
print(irr)

In [None]:
# Save results.
output_path = "data/model/nb_regression_results_robust_2019_2020.txt"

with open(output_path, "w") as f:
    f.write(robust_res.summary().as_text())

print("Saved regression summary to:", output_path)