In [None]:
!pip install linearmodels

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from linearmodels.panel import PanelOLS

pd.set_option("display.max_columns", None)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
PATH_TO_UNIFIED = "/content/corrected_unified_data.csv"

df = pd.read_csv(PATH_TO_UNIFIED)

print(df.shape)
df.head()

(3178, 9)


Unnamed: 0,year,Country,type,CV_kast,Turnout_kast,in_kast,CV_idea,Turnout_idea,in_idea
0,1945,Canada,Legislative,0.0,75.3,1.0,0.0,76.31,1.0
1,1945,Denmark,Legislative,0.0,86.3,1.0,0.0,86.29,1.0
2,1945,Finland,Legislative,0.0,74.9,1.0,0.0,74.87,1.0
3,1945,Ireland,Presidential,0.0,63.0,1.0,0.0,60.24,1.0
4,1945,Norway,Legislative,0.0,76.4,1.0,0.0,76.36,1.0


In [None]:
print(df['Country'].nunique(), len(df))

214 3178


In [None]:
df['CV_idea'].value_counts()

CV_idea
0.00    1035
1.00     338
0.35       6
0.01       6
0.27       4
0.28       4
0.25       3
0.26       3
0.34       2
0.31       2
0.07       2
0.42       1
0.13       1
0.08       1
Name: count, dtype: int64

In [None]:
df = df.query('(CV_idea == 0.0 or CV_idea == 1.0) and (CV_kast == 0 or CV_kast == 1)')
len(df)

1203

## 1. Kastelka

In [None]:
# Plain regression
# ================

tmp_1 = df[df["in_kast"]==1].copy()
tmp_1.dropna(subset=["CV_kast","Turnout_kast"], inplace=True)
print(tmp_1.shape)

model = smf.ols(formula="Turnout_kast ~ 1 + CV_kast", data=tmp_1)
results_1 = model.fit()

results_1.summary().tables[1]

(1408, 9)


0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,67.5147,0.425,158.754,0.000,66.680,68.349
CV_kast,14.5934,0.865,16.872,0.000,12.897,16.290


In [None]:
tmp_1["CV_kast"].unique()

array([0.  , 1.  , 0.42, 0.25, 0.34, 0.26, 0.35, 0.27, 0.28, 0.31, 0.07,
       0.01, 0.13, 0.08])

In [None]:
tmp_1["Country"].nunique()

118

In [None]:
tmp_1["year"].min(), tmp_1["year"].max()

(1945, 2017)

In [None]:
# Account for fixed effects
# =========================

tmp_1.set_index(["Country","year"], inplace=True)

formula = 'Turnout_kast ~ 1 + CV_kast'
fe_model = PanelOLS.from_formula(formula, data=tmp_1, drop_absorbed=True)
results_2 = fe_model.fit(cov_type='clustered', cluster_entity=True)

results_2.summary.tables[1]

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Intercept,67.515,1.5993,42.215,0.0000,64.377,70.652
CV_kast,14.593,2.7070,5.3909,0.0000,9.2831,19.904


## 2. Idea

In [None]:
# Plain regression
# ================

tmp_2 = df[df["in_idea"]==1].copy()
tmp_2.dropna(subset=["CV_idea","Turnout_idea"], inplace=True)
print(tmp_2.shape)

model = smf.ols(formula="Turnout_idea ~ 1 + CV_idea", data=tmp_2)
results_3 = model.fit()

results_3.summary().tables[1]

(3008, 9)


0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,68.4246,0.322,212.359,0.000,67.793,69.056
CV_idea,9.2758,0.675,13.748,0.000,7.953,10.599


In [None]:
tmp_2["CV_idea"].unique()

array([0., 1.])

In [None]:
tmp_2["Country"].nunique()

198

In [None]:
tmp_2["year"].min(), tmp_2["year"].max()

(1945, 2023)

In [None]:
# Account for fixed effects
# =========================

tmp_2.set_index(["Country","year"], inplace=True)

formula = 'Turnout_idea ~ 1 + CV_idea'
fe_model = PanelOLS.from_formula(formula, data=tmp_2, drop_absorbed=True)
results_4 = fe_model.fit(cov_type='clustered', cluster_entity=True)

results_4.summary.tables[1]

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Intercept,67.880,1.6489,41.167,0.0000,64.645,71.115
CV_idea,14.492,2.7032,5.3610,0.0000,9.1885,19.795


In [None]:
tmp_2 = df[df["in_idea"]==1].copy()
tmp_2.dropna(subset=["CV_idea","Turnout_idea"], inplace=True)

In [None]:
tmp_2["CV_idea"].shape

(1238,)

In [None]:
tmp_2["year"].

0       1945
1       1945
2       1945
3       1945
4       1945
        ... 
1401    2017
1402    2017
1403    2017
1404    2017
1407    2017
Name: year, Length: 1238, dtype: int64