Original Replication Script for Hemauer, Saunders, and Desmarais

Note: This file does not include any gridsearch or hyperparameter tuning. This is just a basic inference replication script.

Last updated: 06/02/2025

In [None]:
### Boehmke et al. 2017 Replication
# Coef estimates are exact, constant estimate is not.

import statsmodels.api as sm
import pandas as pd
import random
import time
import warnings

warnings.filterwarnings('ignore')

random.seed(1337)

# Data
boehmke_2017_full = pd.read_stata(r"boehmke_analysis\replication_data\boehmke2017.dta")

covariates = ["srcs_decay","nbrs_lag","rpcpinc","totpop","legp_squire",
                "citi6010","unif_rep","unif_dem","time","time_sq","time_cube"]
boehmke_2017 = boehmke_2017_full[["state", "year", "statepol", "adopt"] + covariates].dropna()

# Define X and y
X = boehmke_2017.drop(columns = ['adopt', 'year', 'statepol']).copy()
X = pd.get_dummies(X, columns = ['state'], drop_first = True)  # drop_first avoids perfect multicollinearity
X = sm.add_constant(X)
y = boehmke_2017['adopt']

# Fit Logistic Regression model
start_time = time.time()
logistic = sm.Logit(y.astype(float), X.astype(float)).fit(cov_type = "cluster", cov_kwds = {'groups': boehmke_2017['statepol']})
end_time = time.time()

print(f"Logistic Regression took {end_time - start_time:.2f} seconds")

# Extract summary table
summary_df = logistic.summary2().tables[1]

# Filter out state dummy variables
summary_filtered = summary_df[~summary_df.index.str.startswith("state_")]

print(summary_filtered)

Optimization terminated successfully.
         Current function value: 0.181244
         Iterations 8
Logistic Regression took 0.09 seconds
                Coef.  Std.Err.          z         P>|z|    [0.025    0.975]
const       -4.932003  0.379749 -12.987549  1.439637e-38 -5.676296 -4.187709
srcs_decay   8.526663  0.438523  19.444071  3.271493e-84  7.667175  9.386151
nbrs_lag     0.392840  0.022265  17.643892  1.133760e-69  0.349202  0.436479
rpcpinc      0.573760  0.074898   7.660526  1.851727e-14  0.426962  0.720558
totpop       0.090543  0.028298   3.199597  1.376197e-03  0.035080  0.146007
legp_squire -1.088974  0.687671  -1.583569  1.132918e-01 -2.436784  0.258836
citi6010     0.009835  0.003520   2.793907  5.207549e-03  0.002936  0.016734
unif_rep    -0.020446  0.076089  -0.268708  7.881541e-01 -0.169578  0.128687
unif_dem     0.062910  0.066440   0.946871  3.437047e-01 -0.067310  0.193131
time        -0.135390  0.017594  -7.695380  1.410739e-14 -0.169872 -0.100907
time_sq      

In [None]:
### Boushey 2016 Replication

random.seed(1337)

# Data
boushey_2016_full = pd.read_stata(r"boushey_analysis/replication_data/boushey2016.dta")

# Covariates
covariates = ["policycongruent","gub_election","elect2", "hvd_4yr", "fedcrime",
                "leg_dem_per_2pty","dem_governor","insession","propneighpol",
                "citidist","squire_prof86","citi6008","crimespendpc","crimespendpcsq",
                "violentthousand","pctwhite","stateincpercap","logpop","counter","counter2","counter3"]
boushey_2016 = boushey_2016_full.dropna()