In [2]:

# Install dependencies
%pip install --quiet pyreadstat statsmodels seaborn SyntheticControlMethods zstd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import pyreadstat
from SyntheticControlMethods import Synth
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.optimize import minimize, basinhopping
from scipy.stats import gaussian_kde
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant
from statsmodels.stats.sandwich_covariance import cov_hc1
from sklearn.linear_model import LogisticRegression
import warnings
import pyreadstat


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
def zstd(x, gelman=False):
    """
    Z-standardize a variable.
    
    Parameters:
    -----------
    x : array-like
        Variable to standardize
    gelman : bool
        If True, divide by 2*SD (Gelman standardization)
    
    Returns:
    --------
    array-like : Standardized variable
    """
    divisor = 2 * np.nanstd(x) if gelman else np.nanstd(x)
    return (x - np.nanmean(x)) / divisor


def wild_bootstrap_se(model, y, X, interactions, n_iter=5000):
    """
    Compute wild bootstrap standard errors (Rademacher weights).
    
    Parameters:
    -----------
    model : fitted OLS model
    y : array-like
        Dependent variable
    X : array-like
        Independent variables
    interactions : array-like
        Interaction term column
    n_iter : int
        Number of bootstrap iterations
    
    Returns:
    --------
    float : Bootstrap standard error for interaction term
    """
    n = len(y)
    residuals = model.resid
    coefs_boot = []
    
    for _ in range(n_iter):
        # Rademacher weights (-1 or 1 with equal probability)
        weights = np.random.choice([-1, 1], size=n)
        y_boot = model.fittedvalues + weights * residuals
        
        # Refit model
        model_boot = OLS(y_boot, X).fit()
        # Get coefficient for interaction term (last coefficient)
        coefs_boot.append(model_boot.params[-1])
    
    return np.std(coefs_boot)


def logit_pdf(x, theta):
    """
    Logistic PDF (derivative of CDF).
    
    Parameters:
    -----------
    x : array-like
        Design matrix
    theta : array-like
        Parameters
    
    Returns:
    --------
    array : PDF values
    """
    xb = x @ theta
    return 1 / ((1 + np.exp(xb)) ** 2)


def panel_to_cs(data, y_name, id_name, time_name):
    """
    Transform panel data to cross-sectional by computing first differences.
    
    Parameters:
    -----------
    data : DataFrame
        Panel data
    y_name : str
        Outcome variable name
    id_name : str
        ID variable name
    time_name : str
        Time variable name
    
    Returns:
    --------
    DataFrame : Cross-sectional data with 'dy' column (first difference of y)
    """
    data = data.sort_values([id_name, time_name])
    data['dy'] = data.groupby(id_name)[y_name].diff()
    
    # Keep only second observation for each unit
    data = data.groupby(id_name).nth(1).reset_index()
    return data


print("Helper functions defined")

Helper functions defined


In [5]:
# Load the main dataset
dat = pd.read_stata('replication/CountryEPData.dta')

# Exclude certain countries (5, 9, 12) and 2019 election
dat = dat[~dat['ccode'].isin([5, 9, 12])]
dat = dat[dat['ep_election'] != 2019]

# Load covariate data with interpolation
datIP, meta = pyreadstat.read_dta('replication/CountryEPData_covIP.dta')

# Merge datasets
dat = dat.merge(datIP, on=['ccode', 'year', 'ep_election'], how='left')

# Keep only EP election years
dat = dat[dat['year'].isin(dat['ep_election'].unique())]

# Create treatment indicators
dat['D'] = (dat['ccode'] == 11).astype(int)  # UK = 11
dat['fD'] = np.where(dat['ccode'] == 11, 1999, 0)  # First treatment year
dat['post'] = (dat['ep_election'] >= 1999).astype(int)

# Adjust opencIP (from percentage)
dat['opencIP'] = dat['opencIP'] / 100

# Standardize covariates
covariates = ['unempIP', 'S_uegenIP', 'opencIP', 'ch_imp_aec', 'RAIIP', 
              'E_eubadIP', 'ch_imp_fns', 'eu_pos_meanIP', 'migr_inflIP']

for cov in covariates:
    if cov in dat.columns:
        dat[cov] = zstd(dat[cov].values)

print(f"Data loaded: {len(dat)} observations")
print(f"Countries: {dat['country'].nunique()}")
print(f"Time periods: {sorted(dat['ep_election'].unique())}")
print(f"\nFirst few rows:")
dat.head()

Data loaded: 72 observations
Countries: 9
Time periods: [np.float64(1979.0), np.float64(1984.0), np.float64(1989.0), np.float64(1994.0), np.float64(1999.0), np.float64(2004.0), np.float64(2009.0), np.float64(2014.0)]

First few rows:


Unnamed: 0,ccode,country,year,ep_election,rr_votes,openc,unemp,pop,S_uegen,E_eubad,...,E_NatDem_satisfiedIP,unempIP,opencIP,S_uegenIP,effpar_eleIP,rightgovIP,RAIIP,D,fD,post
5,1,Belgium,1979,1979.0,0.0,100.8315,7.0,9848.381836,13.4,0.026752,...,0.457645,-0.373865,0.542941,1.226065,7.546885,0.0,0.772152,0,0,0
10,1,Belgium,1984,1984.0,1.28,127.2862,10.8,9855.37207,13.9,0.055122,...,0.469118,0.735566,1.198721,1.391971,9.040284,1.0,1.033949,0,0,0
15,1,Belgium,1989,1989.0,4.09,124.0117,7.4,9937.697266,13.7,0.041469,...,0.609894,-0.257083,1.11755,1.325609,8.167466,0.0,1.681966,0,0,0
20,1,Belgium,1994,1994.0,10.73,112.3887,9.8,10115.599609,13.6,0.084512,...,0.56,0.44361,0.82943,1.292428,9.842326,0.0,1.681966,0,0,0
25,1,Belgium,1999,1999.0,10.91,123.997,8.4,10226.419922,13.3,0.06966,...,0.525166,0.034872,1.117186,1.192884,10.288595,0.0,1.315196,0,0,1


In [7]:
westminster = pd.read_csv('replication/westminster.csv')

In [22]:
dat = pd.read_stata('replication/CountryEPData.dta')
westminster = pd.read_csv('replication/westminster.csv')

# remove france
dat = dat[dat['country'] != 'France']
# remove 2019 election
dat = dat[dat['ep_election'] != 2019]

dat

Unnamed: 0,ccode,country,year,ep_election,rr_votes,openc,unemp,pop,S_uegen,E_eubad,RAI,E_NatDem_satisfied,enep_ep,ch_imp_aec,ch_imp_fns,rightgov,eu_pos_mean,eu_pos_meanIP,migr_infl,migr_inflIP
0,1,Belgium,1974,,,104.49560,2.3,9772.418945,,0.046499,26.035097,,,,,0.0,,,,
1,1,Belgium,1975,1979.0,,90.84992,4.2,9800.700195,12.8,0.034266,26.035097,,,,,1.0,,6.427861,,
2,1,Belgium,1976,1979.0,,96.10307,5.5,9818.226562,13.0,0.038414,26.035097,0.532901,,,,1.0,,6.427861,,
3,1,Belgium,1977,1979.0,,94.97614,6.3,9830.357422,13.1,0.046106,26.035097,0.531076,,,,0.0,,6.427861,,
4,1,Belgium,1978,1979.0,,91.63636,6.8,9839.533203,13.2,0.045521,26.035097,0.487478,,,,0.0,,6.427861,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
487,12,Portugal,2010,2014.0,,67.30485,12.0,10573.099609,10.6,0.205078,3.803829,0.283630,,,,0.0,,,,
488,12,Portugal,2011,2014.0,,72.86388,12.9,10557.559570,10.5,0.261450,,0.293700,,,,0.0,,,,
489,12,Portugal,2012,2014.0,,75.92896,15.8,10514.839844,,,,0.230700,,,,1.0,,,,
490,12,Portugal,2013,2014.0,,78.02734,16.4,10457.290039,,,,0.145795,,,,1.0,,,,
