In [46]:
# import sys
# !{sys.executable} -m pip install pandas
# !{sys.executable} -m pip install statsmodels
# !{sys.executable} -m pip install tabulate
# !{sys.executable} -m pip install httpimport

import pandas as pd
import statsmodels.api as sm
import numpy as np
from statsmodels.iolib.summary2 import summary_col
from tabulate import tabulate

In [47]:
data_panel_diff = pd.read_csv(r'https://raw.githubusercontent.com/mariusgruenewald/pol_viol/main/data_election_crime_merged.csv')

In [48]:
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['party'], prefix='party')],axis=1)
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['bl_kuerzel'], prefix='bl')],axis=1)

### Let's consider the standard measure of crime (against a party in a city)

In [50]:
data_panel_diff = data_panel_diff[data_panel_diff['cycle_1'] == 2014]
data_panel_diff = data_panel_diff.drop_duplicates(['city', 'party', 'plz', 'state', 'p_female_diff', 'crime_count_party'], ignore_index=True)
data_panel_diff

Unnamed: 0.1,Unnamed: 0,crime,city,law,date,background,suspects,party,state,plz,...,county_Uckermark,county_Ulm,county_Unstrut-Hainich-Kreis,county_Vogtlandkreis,county_Waldshut,county_Wartburgkreis,county_Weimar,county_Weimarer Land,county_Zollernalbkreis,county_Zwickau
0,17,Beleidigung,Stuttgart,185 StGB,2019-01-09,Links,1.0,AfD,BW,70173.0,...,0,0,0,0,0,0,0,0,0,0
1,19,,Stuttgart,,,,,CDU,BW,70173.0,...,0,0,0,0,0,0,0,0,0,0
2,20,,Stuttgart,,,,,DIE LINKE,BW,70173.0,...,0,0,0,0,0,0,0,0,0,0
3,21,,Stuttgart,,,,,FDP,BW,70173.0,...,0,0,0,0,0,0,0,0,0,0
4,22,,Stuttgart,,,,,GRÜNE,BW,70173.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9242,20588,,Pinnow,,,,,DIE LINKE,BB,16278.0,...,1,0,0,0,0,0,0,0,0,0
9243,20589,,Pinnow,,,,,CDU,BB,16278.0,...,1,0,0,0,0,0,0,0,0,0
9244,20590,,Pinnow,,,,,GRÜNE,BB,16278.0,...,1,0,0,0,0,0,0,0,0,0
9245,20594,,Passow,,,,,CDU,BB,16306.0,...,1,0,0,0,0,0,0,0,0,0


In [51]:
data_panel_diff['p_female_diff'] = data_panel_diff['p_female_diff']*100

In [52]:
data_panel_diff['p_female_diff'].mean()

1.8368332446078404

In [53]:
data_panel_diff[['p_female_diff', 'city', 'crime_count_party']]

Unnamed: 0,p_female_diff,city,crime_count_party
0,6.666667,Stuttgart,14.0
1,-5.000000,Stuttgart,1.0
2,1.666667,Stuttgart,0.0
3,-1.666667,Stuttgart,4.0
4,0.000000,Stuttgart,1.0
...,...,...,...
9242,20.000000,Pinnow,0.0
9243,5.555556,Pinnow,0.0
9244,0.000000,Pinnow,0.0
9245,0.000000,Passow,0.0


In [54]:
print("Conditional means based on party (no restriction to wether a crime has happened or whether the party stood twice)")
data_panel_diff.groupby(['party'], as_index=False).mean()[['party', 'crime_count_party', 'p_female_diff']], len(data_panel_diff[~data_panel_diff['p_female_diff'].isna()])

Conditional means based on party (no restriction to wether a crime has happened or whether the party stood twice)


(       party  crime_count_party  p_female_diff
 0        AfD           0.346580       5.022525
 1        CDU           0.079655       1.393978
 2  DIE LINKE           0.000000       2.868497
 3        FDP           0.033910       1.560927
 4      GRÜNE           0.075260       3.091604
 5        SPD           0.158374       1.363334, 3924)

In [55]:
# Drop missing values
data_panel_diff.dropna(subset=['p_female_diff', 'crime_count_party'], inplace=True)
print("Conditional means based on party when they stood for office twice")
data_panel_diff.groupby(['party'], as_index=False).mean()[['party', 'crime_count_party', 'p_female_diff']], len(data_panel_diff[~data_panel_diff['p_female_diff'].isna()])

Conditional means based on party when they stood for office twice


(       party  crime_count_party  p_female_diff
 0        AfD           4.962963       5.022525
 1        CDU           0.105023       1.393978
 2  DIE LINKE           0.000000       2.868497
 3        FDP           0.118110       1.560927
 4      GRÜNE           0.213542       3.091604
 5        SPD           0.262361       1.363334, 3924)

In [56]:
X = data_panel_diff['crime_count_party']
y = data_panel_diff['p_female_diff']
X = sm.add_constant(X)
model1 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model1.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,6.304
Date:,"Sat, 25 Sep 2021",Prob (F-statistic):,0.241
Time:,14:21:06,Log-Likelihood:,-16853.0
No. Observations:,3924,AIC:,33710.0
Df Residuals:,3922,BIC:,33720.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.8251,0.309,5.904,0.000,1.219,2.431
crime_count_party,0.0562,0.022,2.511,0.012,0.012,0.100

0,1,2,3
Omnibus:,624.04,Durbin-Watson:,2.016
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5828.169
Skew:,0.466,Prob(JB):,0.0
Kurtosis:,8.897,Cond. No.,2.06


In [57]:
model2 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model2.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,3.204
Date:,"Sat, 25 Sep 2021",Prob (F-statistic):,0.133
Time:,14:21:06,Log-Likelihood:,-16853.0
No. Observations:,3924,AIC:,33710.0
Df Residuals:,3922,BIC:,33720.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.8251,0.318,5.747,0.000,1.203,2.448
crime_count_party,0.0562,0.031,1.790,0.073,-0.005,0.118

0,1,2,3
Omnibus:,624.04,Durbin-Watson:,2.016
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5828.169
Skew:,0.466,Prob(JB):,0.0
Kurtosis:,8.897,Cond. No.,2.06


In [58]:
model3 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model3.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,3.157
Date:,"Sat, 25 Sep 2021",Prob (F-statistic):,0.174
Time:,14:21:06,Log-Likelihood:,-16853.0
No. Observations:,3924,AIC:,33710.0
Df Residuals:,3922,BIC:,33720.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.8251,0.703,2.597,0.009,0.448,3.203
crime_count_party,0.0562,0.032,1.777,0.076,-0.006,0.118

0,1,2,3
Omnibus:,624.04,Durbin-Watson:,2.016
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5828.169
Skew:,0.466,Prob(JB):,0.0
Kurtosis:,8.897,Cond. No.,2.06


In [59]:
print(summary_col([model1,model2, model3]))
# A increase by one crime against a party is associated with an 0.06% higher share of females in the following period. 


                  p_female_diff I p_female_diff II p_female_diff III
--------------------------------------------------------------------
const             1.8251          1.8251           1.8251           
                  (0.3092)        (0.3176)         (0.7029)         
crime_count_party 0.0562          0.0562           0.0562           
                  (0.0224)        (0.0314)         (0.0316)         
Standard errors in parentheses.


In [77]:
%run estout_func.py

In [82]:
X2 = data_panel_diff[["crime_count_party", "party_GRÜNE", "party_SPD", "party_CDU", "party_FDP", "party_AfD", 'bl_BB', 'bl_SN', 'bl_TH']]
X2 = sm.add_constant(X2)
model4 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model4.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.001599
Date:,"Sat, 25 Sep 2021",Prob (F-statistic):,0.975
Time:,14:38:31,Log-Likelihood:,-16849.0
No. Observations:,3924,AIC:,33720.0
Df Residuals:,3914,BIC:,33780.0
Df Model:,9,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,1.99e-14,1.44e+14,0.000,2.868,2.868
crime_count_party,-0.0369,0.042,-0.887,0.375,-0.118,0.045
party_GRÜNE,0.2295,0.015,14.874,0.000,0.199,0.260
party_SPD,-1.5042,0.009,-161.853,0.000,-1.522,-1.486
party_CDU,-1.4742,0.003,-543.754,0.000,-1.479,-1.469
party_FDP,-1.3032,0.005,-265.440,0.000,-1.313,-1.294
party_AfD,2.0915,0.515,4.060,0.000,1.082,3.101
bl_BB,-1.5629,5.992,-0.261,0.794,-13.307,10.181
bl_SN,3.2762,0.939,3.489,0.000,1.436,5.116

0,1,2,3
Omnibus:,597.582,Durbin-Watson:,2.015
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5762.065
Skew:,0.414,Prob(JB):,0.0
Kurtosis:,8.878,Cond. No.,75.3


In [83]:
model5 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model5.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,530.4
Date:,"Sat, 25 Sep 2021",Prob (F-statistic):,9.37e-07
Time:,14:38:41,Log-Likelihood:,-16849.0
No. Observations:,3924,AIC:,33720.0
Df Residuals:,3914,BIC:,33780.0
Df Model:,9,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,7.46e-15,3.85e+14,0.000,2.868,2.868
crime_count_party,-0.0369,0.059,-0.621,0.534,-0.153,0.079
party_GRÜNE,0.2295,0.020,11.279,0.000,0.190,0.269
party_SPD,-1.5042,0.011,-132.460,0.000,-1.527,-1.482
party_CDU,-1.4742,0.004,-335.298,0.000,-1.483,-1.466
party_FDP,-1.3032,0.007,-186.011,0.000,-1.317,-1.289
party_AfD,2.0915,0.370,5.653,0.000,1.366,2.817
bl_BB,-1.5629,4.596,-0.340,0.734,-10.570,7.444
bl_SN,3.2762,1.829,1.791,0.073,-0.309,6.861

0,1,2,3
Omnibus:,597.582,Durbin-Watson:,2.015
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5762.065
Skew:,0.414,Prob(JB):,0.0
Kurtosis:,8.878,Cond. No.,75.3


In [81]:
model6 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model6.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,8.63
Date:,"Sat, 25 Sep 2021",Prob (F-statistic):,0.055
Time:,14:38:05,Log-Likelihood:,-16849.0
No. Observations:,3924,AIC:,33720.0
Df Residuals:,3914,BIC:,33780.0
Df Model:,9,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,4.9600,3.560,1.393,0.164,-2.018,11.938
crime_count_party,-0.0369,0.055,-0.664,0.507,-0.146,0.072
party_GRÜNE,-1.8619,3.692,-0.504,0.614,-9.097,5.374
party_SPD,-3.5957,3.435,-1.047,0.295,-10.329,3.138
party_CDU,-3.5656,3.115,-1.145,0.252,-9.670,2.539
party_FDP,-3.3947,3.074,-1.104,0.269,-9.419,2.630
party_DIE LINKE,-2.0915,3.352,-0.624,0.533,-8.661,4.478
bl_BB,-1.5629,2.460,-0.635,0.525,-6.385,3.259
bl_SN,3.2762,1.640,1.997,0.046,0.061,6.491

0,1,2,3
Omnibus:,597.582,Durbin-Watson:,2.015
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5762.065
Skew:,0.414,Prob(JB):,0.0
Kurtosis:,8.878,Cond. No.,75.8


In [89]:
estout_ols(modellist=[model4, model5, model6], modellist_str = ["model4","model5","model6"], y='Share Female Candidates', caption="Basic Regression", label="Basic Regression", list_regression_sets=[X3,X3,X3], p_values=True)

\begin{table}[htbp] \caption{Basic Regression \label{Basic Regression}}
\resizebox{0.9\textwidth}{!}{ \centering
\begin{tabular}{lccc} \hline
  & (I) & (II) & (III) \\ 
Dependent Variable & \multicolumn{3}{c}{Share Female Candidates} \\ \hline \vspace{4pt} 
& \begin{footnotesize}\end{footnotesize} & \begin{footnotesize}\end{footnotesize} & \begin{footnotesize}\end{footnotesize}  \\ 
bl_BB & -1.5629 & -1.5629 & -1.5629 \\ 
 \vspace{4pt} & \begin{footnotesize}(5.9921) \end{footnotesize} & \begin{footnotesize}(4.5956) \end{footnotesize} & \begin{footnotesize}(2.4601) \end{footnotesize} \\ 
bl_SN & 3.2762*** & 3.2762* & 3.2762** \\ 
 \vspace{4pt} & \begin{footnotesize}(0.9389) \end{footnotesize} & \begin{footnotesize}(1.829) \end{footnotesize} & \begin{footnotesize}(1.6403) \end{footnotesize} \\ 
bl_TH & -1.1523 & -1.1523 & -1.1523 \\ 
 \vspace{4pt} & \begin{footnotesize}(4.4142) \end{footnotesize} & \begin{footnotesize}(2.9738) \end{footnotesize} & \begin{footnotesize}(0.9002) \end{footno

In [85]:
data_panel_diff_balanced_noafd = data_panel_diff[data_panel_diff['party_AfD'] != 1]
X4 = data_panel_diff_balanced_noafd[["crime_count_party", "party_SPD", "party_FDP", "party_CDU", "party_GRÜNE", 'bl_BB', 'bl_SN', 'bl_TH']]
y3 = data_panel_diff_balanced_noafd["p_female_diff"]
X4 = sm.add_constant(X4)
model9 = sm.OLS(y3, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_noafd['party_lr']})
model9.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.1252
Date:,"Sat, 25 Sep 2021",Prob (F-statistic):,0.784
Time:,14:39:26,Log-Likelihood:,-16562.0
No. Observations:,3870,AIC:,33140.0
Df Residuals:,3861,BIC:,33200.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,1.47e-14,1.96e+14,0.000,2.868,2.868
crime_count_party,0.0736,0.067,1.106,0.269,-0.057,0.204
party_SPD,-1.5310,0.014,-106.667,0.000,-1.559,-1.503
party_FDP,-1.3163,0.008,-167.314,0.000,-1.332,-1.301
party_CDU,-1.4847,0.005,-318.525,0.000,-1.494,-1.476
party_GRÜNE,0.1868,0.003,56.470,0.000,0.180,0.193
bl_BB,6.7975,0.130,52.325,0.000,6.543,7.052
bl_SN,2.6516,0.484,5.481,0.000,1.703,3.600
bl_TH,-1.5307,4.546,-0.337,0.736,-10.440,7.379

0,1,2,3
Omnibus:,575.915,Durbin-Watson:,2.002
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5447.087
Skew:,0.4,Prob(JB):,0.0
Kurtosis:,8.757,Cond. No.,71.8


In [86]:
data_panel_diff_balanced_noafd = data_panel_diff[data_panel_diff['party_GRÜNE'] != 1]
X5 = data_panel_diff_balanced_noafd[["crime_count_party", "party_SPD", "party_FDP", "party_CDU", "party_AfD", 'bl_BB', 'bl_SN', 'bl_TH']]
y3 = data_panel_diff_balanced_noafd["p_female_diff"]
X5 = sm.add_constant(X5)
model10 = sm.OLS(y3, X5).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_noafd['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.1065
Date:,"Sat, 25 Sep 2021",Prob (F-statistic):,0.799
Time:,14:39:39,Log-Likelihood:,-15040.0
No. Observations:,3540,AIC:,30100.0
Df Residuals:,3531,BIC:,30150.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,1.08e-14,2.67e+14,0.000,2.868,2.868
crime_count_party,-0.0831,0.041,-2.011,0.044,-0.164,-0.002
party_SPD,-1.4964,0.007,-201.162,0.000,-1.511,-1.482
party_FDP,-1.2978,0.005,-266.052,0.000,-1.307,-1.288
party_CDU,-1.4714,0.004,-363.767,0.000,-1.479,-1.463
party_AfD,2.3917,0.069,34.519,0.000,2.256,2.528
bl_BB,-6.0863,0.111,-55.036,0.000,-6.303,-5.870
bl_SN,4.3187,2.918,1.480,0.139,-1.400,10.037
bl_TH,-0.0956,5.387,-0.018,0.986,-10.654,10.463

0,1,2,3
Omnibus:,526.952,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4826.384
Skew:,0.412,Prob(JB):,0.0
Kurtosis:,8.661,Cond. No.,91.3


In [None]:
data_panel_diff2 = data_panel_diff.drop_duplicates(['city', 'plz', 'state', 'p_female_city_diff', 'crime_count_city'], ignore_index=True)
data_panel_diff2.dropna(subset=['p_female_city_diff', 'crime_count_city'], inplace=True)

X2 = data_panel_diff2["crime_count_city"]
y = data_panel_diff2["p_female_city_diff"]
X2 = sm.add_constant(X2)
model4 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff2['party_lr']})
model4.summary()

In [None]:
X2 = data_panel_diff2["crime_count_city"]
y = data_panel_diff2["p_female_city_diff"]
X2 = sm.add_constant(X2)
model5 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff2['party']})
model5.summary()

In [None]:
data_panel_lr = data_panel_diff.drop_duplicates(['city', 'party', 'plz', 'state', 'p_female_party_lr', 'crime_count_party_lr'], ignore_index=True)
data_panel_lr.dropna(subset=['p_female_party_lr', 'crime_count_party_lr'], inplace=True)

X2 = data_panel_lr["crime_count_party_lr"]
y = data_panel_lr["p_female_party_lr"]
X2 = sm.add_constant(X2)
model6 = sm.OLS(y, X2).fit(cov_type='HC3')
model6.summary()

---