Original Replication Script for Hemauer, Saunders, and Desmarais

Note: This file does not include any gridsearch or hyperparameter tuning. This is just a basic inference replication script.

Last updated: 06/04/2025

In [1]:
### Preprocessing

import statsmodels.api as sm
import pandas as pd
import random
import time
import warnings

warnings.filterwarnings('ignore')

random.seed(1337)

In [None]:
### Boehmke et al. 2017 Replication
# Coef estimates are exact, constant estimate is not.

# Data
boehmke_2017_full = pd.read_stata(r"data/boehmke2017.dta")

covariates = ["srcs_decay","nbrs_lag","rpcpinc","totpop","legp_squire",
                "citi6010","unif_rep","unif_dem","time","time_sq","time_cube"]
boehmke_2017 = boehmke_2017_full[["state", "year", "statepol", "adopt"] + covariates].dropna()

# Define X and y
X = boehmke_2017.drop(columns = ['adopt', 'year', 'statepol']).copy()
X = pd.get_dummies(X, columns = ['state'], drop_first = True)  # drop_first avoids perfect multicollinearity
X = sm.add_constant(X)
y = boehmke_2017['adopt']

# Fit Logistic Regression model
start_time = time.time()
logistic = sm.Logit(y.astype(float), X.astype(float)).fit(cov_type = "cluster", cov_kwds = {'groups': boehmke_2017['statepol']})
end_time = time.time()

print(f"Logistic Regression took {end_time - start_time:.2f} seconds")

# Extract summary table
summary_df = logistic.summary2().tables[1]

# Filter out state dummy variables
summary_filtered = summary_df[~summary_df.index.str.startswith("state_")]

print(summary_filtered)

Optimization terminated successfully.
         Current function value: 0.181244
         Iterations 8
Logistic Regression took 0.09 seconds
                Coef.  Std.Err.          z         P>|z|    [0.025    0.975]
const       -4.932003  0.379749 -12.987549  1.439637e-38 -5.676296 -4.187709
srcs_decay   8.526663  0.438523  19.444071  3.271493e-84  7.667175  9.386151
nbrs_lag     0.392840  0.022265  17.643892  1.133760e-69  0.349202  0.436479
rpcpinc      0.573760  0.074898   7.660526  1.851727e-14  0.426962  0.720558
totpop       0.090543  0.028298   3.199597  1.376197e-03  0.035080  0.146007
legp_squire -1.088974  0.687671  -1.583569  1.132918e-01 -2.436784  0.258836
citi6010     0.009835  0.003520   2.793907  5.207549e-03  0.002936  0.016734
unif_rep    -0.020446  0.076089  -0.268708  7.881541e-01 -0.169578  0.128687
unif_dem     0.062910  0.066440   0.946871  3.437047e-01 -0.067310  0.193131
time        -0.135390  0.017594  -7.695380  1.410739e-14 -0.169872 -0.100907
time_sq      

In [None]:
### Boushey 2016 Replication (Table 2: Model 2)
# Coef estimates are close, but not the same. N is the same.
# Maybe missing random effects (?)

# Data
boushey_2016_full = pd.read_stata(r"data/boushey2016.dta")

# Covariates
covariates = ["policycongruent","gub_election","elect2", "hvd_4yr", "fedcrime",
                "leg_dem_per_2pty","dem_governor","insession","propneighpol",
                "citidist","squire_prof86","citi6008","crimespendpc","crimespendpcsq",
                "violentthousand","pctwhite","stateincpercap","logpop","counter","counter2","counter3"]
boushey_2016 = boushey_2016_full[["state", "styear", "dvadopt"] + covariates].dropna()

# Define X and y
X = boushey_2016[['state'] + covariates].copy()
X = pd.get_dummies(X, columns = ['state'], drop_first = True)  # drop_first avoids perfect multicollinearity
X = sm.add_constant(X)
y = boushey_2016['dvadopt']

# Fit Logistic Regression model
start_time = time.time()
logistic = sm.Logit(y.astype(float), X.astype(float)).fit(cov_type = "cluster", cov_kwds = {'groups': boushey_2016['styear']})
end_time = time.time()

print(f"Logistic Regression took {end_time - start_time:.2f} seconds")

# Extract summary table
summary_df = logistic.summary2().tables[1]

# Filter out state dummy variables
summary_filtered = summary_df[~summary_df.index.str.startswith("state_")]

print(summary_filtered)

Optimization terminated successfully.
         Current function value: 0.219523
         Iterations 8
Logistic Regression took 0.07 seconds
                     Coef.  Std.Err.          z          P>|z|    [0.025  \
const            -1.193708       NaN        NaN            NaN       NaN   
policycongruent   0.372078  0.061058   6.093815   1.102512e-09  0.252406   
gub_election      0.064567  0.143689   0.449351   6.531784e-01 -0.217058   
elect2            0.092987  0.104155   0.892780   3.719747e-01 -0.111152   
hvd_4yr           0.001258  0.006413   0.196132   8.445071e-01 -0.011311   
fedcrime          1.228812  0.535981   2.292640   2.186874e-02  0.178308   
leg_dem_per_2pty -0.001555  0.004704  -0.330524   7.410041e-01 -0.010774   
dem_governor      0.023219  0.075940   0.305754   7.597916e-01 -0.125621   
insession         2.172243  0.325451   6.674560   2.479746e-11  1.534370   
propneighpol      2.429220  0.105420  23.043276  1.717934e-117  2.222601   
citidist         -0.0487

In [None]:
### Bricker & Lacombe 2021 (Table 3: Model 3)(Monadic Model)

# Data
bricker_lacombe_2021_full = pd.read_stata(r"data/bricker_lacombe2021.dta")

# Create State Year Variable (Cluster)
# Figure out fixed/random effect specification

# Covariates
covariates = ["std_score","initiative","init_sigs","std_population",
                "std_citideology","unified","std_income","std_legp_squire",
                "duration","durationsq","durationcb", "year"]
bricker_lacombe_2021 = bricker_lacombe_2021_full[["state", "stateyear", "adoption"] + covariates].dropna()

# Define X and y
X = bricker_lacombe_2021[['state'] + covariates].copy()
X = pd.get_dummies(X, columns = ['state'], drop_first = True)  # drop_first avoids perfect multicollinearity
X = sm.add_constant(X)
y = boushey_2016['adoption']

# Fit Logistic Regression model
start_time = time.time()
logistic = sm.Logit(y.astype(float), X.astype(float)).fit(cov_type = "cluster", cov_kwds = {'groups': bricker_lacombe_2021['stateyear']})
end_time = time.time()

print(f"Logistic Regression took {end_time - start_time:.2f} seconds")

# Extract summary table
summary_df = logistic.summary2().tables[1]

# Filter out state dummy variables
summary_filtered = summary_df[~summary_df.index.str.startswith("state_")]

print(summary_filtered)

KeyError: "['stateyear'] not in index"