In [1]:
# import sys
# !{sys.executable} -m pip install pandas
# !{sys.executable} -m pip install statsmodels
# !{sys.executable} -m pip install tabulate
# !{sys.executable} -m pip install httpimport

import pandas as pd
import statsmodels.api as sm
import numpy as np
from statsmodels.iolib.summary2 import summary_col
from tabulate import tabulate

  import pandas.util.testing as tm


In [184]:
data_panel_diff = pd.read_csv(r'https://raw.githubusercontent.com/mariusgruenewald/pol_viol/main/data_election_crime_merged.csv')

In [185]:
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['party'], prefix='party')],axis=1)
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['state'], prefix='bl')],axis=1)
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['degree_of_urban'], prefix='du')],axis=1)

### Let's consider the standard measure of crime (against a party in a city)

In [186]:
data_panel_diff = data_panel_diff[data_panel_diff['cycle_1'] == 2014]
data_panel_diff = data_panel_diff.drop_duplicates(['city', 'party', 'plz', 'state', 'p_female_diff', 'crime_count_party'], ignore_index=True)

Unnamed: 0.1,Unnamed: 0,crime,city,law,date,background,suspects,party,state,plz,...,party_GRÜNE,party_SPD,bl_BB,bl_BW,bl_HE,bl_SN,bl_TH,du_dicht besiedelt,du_gering besiedelt,du_mittlere Besiedlungsdichte
0,89,Korperverletzung,Stuttgart,223 StGB,2019-04-20,Links,0.0,AfD,BW,70173.0,...,0,0,0,1,0,0,0,1,0,0
1,102,,Stuttgart,,,,,CDU,BW,70173.0,...,0,0,0,1,0,0,0,1,0,0
2,103,,Stuttgart,,,,,DIE LINKE,BW,70173.0,...,0,0,0,1,0,0,0,1,0,0
3,104,Sachbeschadigung,Stuttgart,303 StGB,2019-04-17,Links,2.0,FDP,BW,70173.0,...,0,0,0,1,0,0,0,1,0,0
4,106,Beleidigung,Stuttgart,185 StGB,2019-02-27,Rechts,0.0,GRÜNE,BW,70173.0,...,1,0,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10414,24492,,Zierenberg,,,,,CDU,HE,34289.0,...,0,0,0,0,1,0,0,0,1,0
10415,24493,,Zierenberg,,,,,GRÜNE,HE,34289.0,...,1,0,0,0,1,0,0,0,1,0
10416,24494,,Zierenberg,,,,,SPD,HE,34289.0,...,0,1,0,0,1,0,0,0,1,0
10417,24497,,Zwingenberg,,,,,CDU,HE,64673.0,...,0,0,0,0,1,0,0,0,0,1


In [187]:
data_panel_diff['p_female_diff'] = data_panel_diff['p_female_diff']*100
data_panel_diff['p_female_diff'].mean()

1.9894823579394114

In [188]:
data_panel_diff[['p_female_diff', 'city', 'crime_count_party']]

Unnamed: 0,p_female_diff,city,crime_count_party
0,6.666667,Stuttgart,13.0
1,-5.000000,Stuttgart,0.0
2,1.666667,Stuttgart,0.0
3,-1.666667,Stuttgart,2.0
4,0.000000,Stuttgart,1.0
...,...,...,...
10414,5.138340,Zierenberg,0.0
10415,,Zierenberg,0.0
10416,0.000000,Zierenberg,0.0
10417,10.219780,Zwingenberg,0.0


In [189]:
print("Conditional means based on party (no restriction to wether a crime has happened or whether the party stood twice)")
data_panel_diff.groupby(['party'], as_index=False).mean()[['party', 'crime_count_party', 'p_female_diff']], len(data_panel_diff[~data_panel_diff['p_female_diff'].isna()])

Conditional means based on party (no restriction to wether a crime has happened or whether the party stood twice)


(       party  crime_count_party  p_female_diff
 0        AfD           0.326047       4.722508
 1        CDU           0.082792       1.522936
 2  DIE LINKE           0.000000       2.868497
 3        FDP           0.031882       1.616425
 4      GRÜNE           0.065382       3.132141
 5        SPD           0.132382       1.831817, 4948)

In [190]:
# Drop missing values
data_panel_diff.dropna(subset=['p_female_diff', 'crime_count_party'], inplace=True)
print("Conditional means based on party when they stood for office twice")
data_panel_diff.groupby(['party'], as_index=False).mean()[['party', 'crime_count_party', 'p_female_diff']], len(data_panel_diff[~data_panel_diff['p_female_diff'].isna()])

Conditional means based on party when they stood for office twice


(       party  crime_count_party  p_female_diff
 0        AfD           4.559322       4.722508
 1        CDU           0.104265       1.522936
 2  DIE LINKE           0.000000       2.868497
 3        FDP           0.083499       1.616425
 4      GRÜNE           0.151852       3.132141
 5        SPD           0.196193       1.831817, 4948)

In [191]:
data_panel_diff['log_crime_count_party'] = np.log(data_panel_diff['crime_count_party']+1)
data_panel_diff['log_pop'] = np.log(data_panel_diff['pop'])
data_panel_diff['log_pop_female'] = np.log(data_panel_diff['pop_female'])
data_panel_diff['log_dens'] = np.log(data_panel_diff['density'])

In [192]:
data_panel_diff = data_panel_diff[~data_panel_diff['log_pop'].isna()]

In [193]:
X = data_panel_diff['log_crime_count_party']
y = data_panel_diff['p_female_diff']
X = sm.add_constant(X)
model1 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model1.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,2.283
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,0.372
Time:,18:05:08,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4865,BIC:,41210.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9584,0.306,6.404,0.000,1.359,2.558
log_crime_count_party,0.9999,0.662,1.511,0.131,-0.297,2.297

0,1,2,3
Omnibus:,829.678,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8470.262
Skew:,0.501,Prob(JB):,0.0
Kurtosis:,9.385,Cond. No.,3.46


In [194]:
model2 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model2.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,4.13
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,0.0978
Time:,18:05:09,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4865,BIC:,41210.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9584,0.280,6.984,0.000,1.409,2.508
log_crime_count_party,0.9999,0.492,2.032,0.042,0.036,1.964

0,1,2,3
Omnibus:,829.678,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8470.262
Skew:,0.501,Prob(JB):,0.0
Kurtosis:,9.385,Cond. No.,3.46


In [195]:
model3 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model3.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,7.349
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,0.0535
Time:,18:05:10,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4865,BIC:,41210.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9584,0.558,3.507,0.000,0.864,3.053
log_crime_count_party,0.9999,0.369,2.711,0.007,0.277,1.723

0,1,2,3
Omnibus:,829.678,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8470.262
Skew:,0.501,Prob(JB):,0.0
Kurtosis:,9.385,Cond. No.,3.46


In [196]:
print(summary_col([model1,model2, model3]))
# A increase of crime against a party by 1% is associated with an 1.2% higher share of females in the following period. 


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 1.9584          1.9584           1.9584           
                      (0.3058)        (0.2804)         (0.5584)         
log_crime_count_party 0.9999          0.9999           0.9999           
                      (0.6617)        (0.4920)         (0.3688)         
Standard errors in parentheses.


In [15]:
%run estout_func.py

In [197]:
Xp = data_panel_diff[["log_crime_count_party", "log_pop", "log_pop_female"]]
Xp = sm.add_constant(Xp)
model4 = sm.OLS(y, Xp).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model4.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,-5519000000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,1.0
Time:,18:05:17,Log-Likelihood:,-20598.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4863,BIC:,41230.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.4759,9.974,1.151,0.250,-8.072,31.024
log_crime_count_party,1.1107,0.860,1.291,0.197,-0.575,2.796
log_pop,-12.0989,11.328,-1.068,0.286,-34.302,10.104
log_pop_female,11.9521,11.099,1.077,0.282,-9.802,33.706

0,1,2,3
Omnibus:,827.252,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8501.063
Skew:,0.496,Prob(JB):,0.0
Kurtosis:,9.398,Cond. No.,1020.0


In [198]:
model5 = sm.OLS(y, Xp).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model5.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.959
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,0.239
Time:,18:05:20,Log-Likelihood:,-20598.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4863,BIC:,41230.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.4759,6.987,1.642,0.100,-2.218,25.170
log_crime_count_party,1.1107,0.620,1.792,0.073,-0.104,2.325
log_pop,-12.0989,7.356,-1.645,0.100,-26.516,2.318
log_pop_female,11.9521,7.149,1.672,0.095,-2.060,25.964

0,1,2,3
Omnibus:,827.252,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8501.063
Skew:,0.496,Prob(JB):,0.0
Kurtosis:,9.398,Cond. No.,1020.0


In [199]:
model6 = sm.OLS(y, Xp).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model6.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,6.025
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,0.0577
Time:,18:05:20,Log-Likelihood:,-20598.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4863,BIC:,41230.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.4759,7.180,1.598,0.110,-2.597,25.549
log_crime_count_party,1.1107,0.369,3.013,0.003,0.388,1.833
log_pop,-12.0989,11.100,-1.090,0.276,-33.854,9.656
log_pop_female,11.9521,11.112,1.076,0.282,-9.828,33.732

0,1,2,3
Omnibus:,827.252,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8501.063
Skew:,0.496,Prob(JB):,0.0
Kurtosis:,9.398,Cond. No.,1020.0


In [200]:
print(summary_col([model4,model5, model6]))


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 11.4759         11.4759          11.4759          
                      (9.9736)        (6.9870)         (7.1804)         
log_crime_count_party 1.1107          1.1107           1.1107           
                      (0.8600)        (0.6197)         (0.3686)         
log_pop               -12.0989        -12.0989         -12.0989         
                      (11.3284)       (7.3556)         (11.0997)        
log_pop_female        11.9521         11.9521          11.9521          
                      (11.0991)       (7.1492)         (11.1124)        
Standard errors in parentheses.


In [201]:
X2 = data_panel_diff[["log_crime_count_party", "log_pop", "log_pop_female", "party_GRÜNE", "party_SPD", "party_CDU", "party_FDP", "party_AfD"]]
X2 = sm.add_constant(X2)
model7 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model7.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,15890000000000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,1.6e-07
Time:,18:05:23,Log-Likelihood:,-20592.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4858,BIC:,41260.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,12.6097,9.262,1.362,0.173,-5.543,30.762
log_crime_count_party,0.7655,1.491,0.513,0.608,-2.157,3.688
log_pop,-11.2614,10.748,-1.048,0.295,-32.327,9.804
log_pop_female,11.0136,10.516,1.047,0.295,-9.597,31.624
party_GRÜNE,0.3802,0.008,47.307,0.000,0.364,0.396
party_SPD,-1.0492,0.061,-17.219,0.000,-1.169,-0.930
party_CDU,-1.3950,0.062,-22.453,0.000,-1.517,-1.273
party_FDP,-1.1412,0.038,-30.102,0.000,-1.215,-1.067
party_AfD,4.4906,0.872,5.151,0.000,2.782,6.199

0,1,2,3
Omnibus:,781.796,Durbin-Watson:,2.026
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8257.33
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.322,Cond. No.,1030.0


In [202]:
model8 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model8.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,-6271000000000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,1.0
Time:,18:05:29,Log-Likelihood:,-20592.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4858,BIC:,41260.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,12.6097,6.324,1.994,0.046,0.216,25.004
log_crime_count_party,0.7655,0.898,0.852,0.394,-0.995,2.526
log_pop,-11.2614,7.102,-1.586,0.113,-25.181,2.658
log_pop_female,11.0136,6.947,1.585,0.113,-2.602,24.629
party_GRÜNE,0.3802,0.125,3.047,0.002,0.136,0.625
party_SPD,-1.0492,0.062,-16.835,0.000,-1.171,-0.927
party_CDU,-1.3950,0.048,-29.027,0.000,-1.489,-1.301
party_FDP,-1.1412,0.108,-10.564,0.000,-1.353,-0.929
party_AfD,4.4906,0.648,6.934,0.000,3.221,5.760

0,1,2,3
Omnibus:,781.796,Durbin-Watson:,2.026
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8257.33
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.322,Cond. No.,1030.0


In [203]:
model9 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model9.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,-802600000000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,1.0
Time:,18:05:33,Log-Likelihood:,-20592.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4858,BIC:,41260.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,12.6097,7.134,1.767,0.077,-1.373,26.593
log_crime_count_party,0.7655,0.389,1.967,0.049,0.003,1.528
log_pop,-11.2614,11.182,-1.007,0.314,-33.177,10.655
log_pop_female,11.0136,11.232,0.981,0.327,-11.001,33.028
party_GRÜNE,0.3802,0.830,0.458,0.647,-1.246,2.006
party_SPD,-1.0492,0.471,-2.229,0.026,-1.972,-0.126
party_CDU,-1.3950,0.216,-6.461,0.000,-1.818,-0.972
party_FDP,-1.1412,0.480,-2.376,0.017,-2.082,-0.200
party_AfD,4.4906,1.956,2.296,0.022,0.657,8.324

0,1,2,3
Omnibus:,781.796,Durbin-Watson:,2.026
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8257.33
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.322,Cond. No.,1030.0


In [204]:
print(summary_col([model7,model8, model9]))


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 12.6097         12.6097          12.6097          
                      (9.2616)        (6.3235)         (7.1343)         
log_crime_count_party 0.7655          0.7655           0.7655           
                      (1.4913)        (0.8982)         (0.3892)         
log_pop               -11.2614        -11.2614         -11.2614         
                      (10.7481)       (7.1019)         (11.1818)        
log_pop_female        11.0136         11.0136          11.0136          
                      (10.5159)       (6.9467)         (11.2320)        
party_GRÜNE           0.3802          0.3802           0.3802           
                      (0.0080)        (0.1248)         (0.8295)         
party_SPD             -1.0492         -1.0492          -1.0492          
                      (0.0609)        (0.0623)    

In [205]:
X3 = data_panel_diff[["log_crime_count_party", "log_pop", "log_pop_female", "density", 
                      "bl_HE", "bl_SN", "bl_BW", "bl_TH"]]
X3 = sm.add_constant(X3)
model10 = sm.OLS(y, X3).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,-401600000000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,1.0
Time:,18:05:39,Log-Likelihood:,-20587.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4858,BIC:,41250.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.7950,7.826,1.507,0.132,-3.544,27.134
log_crime_count_party,0.9769,0.671,1.457,0.145,-0.337,2.291
log_pop,-12.1181,9.354,-1.295,0.195,-30.452,6.216
log_pop_female,12.2191,9.121,1.340,0.180,-5.657,30.096
density,-0.0009,0.000,-1.828,0.068,-0.002,6.18e-05
bl_HE,-1.5189,0.986,-1.541,0.123,-3.451,0.413
bl_SN,-2.2982,1.288,-1.784,0.074,-4.823,0.227
bl_BW,-2.9302,0.327,-8.974,0.000,-3.570,-2.290
bl_TH,-3.1344,0.200,-15.684,0.000,-3.526,-2.743

0,1,2,3
Omnibus:,783.097,Durbin-Watson:,2.036
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8300.291
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.339,Cond. No.,47300.0


In [206]:
model11 = sm.OLS(y, X3).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model11.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,113800000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,3.61e-20
Time:,18:05:44,Log-Likelihood:,-20587.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4858,BIC:,41250.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.7950,6.011,1.962,0.050,0.013,23.577
log_crime_count_party,0.9769,0.525,1.861,0.063,-0.052,2.006
log_pop,-12.1181,6.761,-1.792,0.073,-25.370,1.134
log_pop_female,12.2191,6.564,1.862,0.063,-0.645,25.083
density,-0.0009,0.001,-1.616,0.106,-0.002,0.000
bl_HE,-1.5189,0.690,-2.200,0.028,-2.872,-0.166
bl_SN,-2.2982,0.666,-3.449,0.001,-3.604,-0.992
bl_BW,-2.9302,0.484,-6.052,0.000,-3.879,-1.981
bl_TH,-3.1344,0.741,-4.227,0.000,-4.588,-1.681

0,1,2,3
Omnibus:,783.097,Durbin-Watson:,2.036
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8300.291
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.339,Cond. No.,47300.0


In [207]:
model12 = sm.OLS(y, X3).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model12.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,13090.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,1.75e-08
Time:,18:05:46,Log-Likelihood:,-20587.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4858,BIC:,41250.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.7950,9.085,1.298,0.194,-6.011,29.601
log_crime_count_party,0.9769,0.367,2.660,0.008,0.257,1.697
log_pop,-12.1181,11.685,-1.037,0.300,-35.020,10.784
log_pop_female,12.2191,11.565,1.057,0.291,-10.448,34.886
density,-0.0009,0.000,-4.831,0.000,-0.001,-0.001
bl_HE,-1.5189,0.055,-27.849,0.000,-1.626,-1.412
bl_SN,-2.2982,0.067,-34.176,0.000,-2.430,-2.166
bl_BW,-2.9302,0.072,-40.439,0.000,-3.072,-2.788
bl_TH,-3.1344,0.074,-42.620,0.000,-3.279,-2.990

0,1,2,3
Omnibus:,783.097,Durbin-Watson:,2.036
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8300.291
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.339,Cond. No.,47300.0


In [208]:
print(summary_col([model10,model11, model12]))


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 11.7950         11.7950          11.7950          
                      (7.8262)        (6.0114)         (9.0850)         
log_crime_count_party 0.9769          0.9769           0.9769           
                      (0.6706)        (0.5249)         (0.3673)         
log_pop               -12.1181        -12.1181         -12.1181         
                      (9.3542)        (6.7615)         (11.6850)        
log_pop_female        12.2191         12.2191          12.2191          
                      (9.1208)        (6.5635)         (11.5648)        
density               -0.0009         -0.0009          -0.0009          
                      (0.0005)        (0.0005)         (0.0002)         
bl_HE                 -1.5189         -1.5189          -1.5189          
                      (0.9858)        (0.6905)    

In [209]:
X4 = data_panel_diff[["log_crime_count_party", "log_pop", "log_pop_female", 
                      "bl_HE", "bl_SN", "bl_BW", "bl_TH", "party_GRÜNE", "party_SPD", "party_CDU",
                      "party_FDP", "party_AfD"]]
X4 = sm.add_constant(X4)
model13 = sm.OLS(y, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model13.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,1.11
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,0.483
Time:,18:06:01,Log-Likelihood:,-20584.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4854,BIC:,41280.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,13.3920,7.864,1.703,0.089,-2.021,28.805
log_crime_count_party,0.6415,1.294,0.496,0.620,-1.894,3.177
log_pop,-11.0572,9.978,-1.108,0.268,-30.614,8.499
log_pop_female,10.8959,9.788,1.113,0.266,-8.288,30.080
bl_HE,-1.3428,1.006,-1.335,0.182,-3.314,0.628
bl_SN,-2.1684,1.120,-1.937,0.053,-4.363,0.026
bl_BW,-2.7773,0.366,-7.591,0.000,-3.494,-2.060
bl_TH,-2.9108,0.108,-26.941,0.000,-3.123,-2.699
party_GRÜNE,0.5779,0.064,9.031,0.000,0.453,0.703

0,1,2,3
Omnibus:,756.374,Durbin-Watson:,2.034
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8156.706
Skew:,0.391,Prob(JB):,0.0
Kurtosis:,9.294,Cond. No.,1050.0


In [210]:
model14 = sm.OLS(y, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model14.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,174400000000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,3.93e-28
Time:,18:06:05,Log-Likelihood:,-20584.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4854,BIC:,41280.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,13.3920,5.999,2.233,0.026,1.635,25.149
log_crime_count_party,0.6415,0.764,0.840,0.401,-0.856,2.139
log_pop,-11.0572,7.482,-1.478,0.139,-25.721,3.606
log_pop_female,10.8959,7.352,1.482,0.138,-3.513,25.305
bl_HE,-1.3428,0.789,-1.701,0.089,-2.890,0.204
bl_SN,-2.1684,0.602,-3.604,0.000,-3.348,-0.989
bl_BW,-2.7773,0.581,-4.780,0.000,-3.916,-1.638
bl_TH,-2.9108,0.676,-4.306,0.000,-4.236,-1.586
party_GRÜNE,0.5779,0.288,2.004,0.045,0.013,1.143

0,1,2,3
Omnibus:,756.374,Durbin-Watson:,2.034
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8156.706
Skew:,0.391,Prob(JB):,0.0
Kurtosis:,9.294,Cond. No.,1050.0


In [211]:
model15 = sm.OLS(y, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model15.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,14090000000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,1.41e-20
Time:,18:06:06,Log-Likelihood:,-20584.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4854,BIC:,41280.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,13.3920,9.536,1.404,0.160,-5.298,32.082
log_crime_count_party,0.6415,0.536,1.197,0.231,-0.409,1.692
log_pop,-11.0572,12.267,-0.901,0.367,-35.100,12.986
log_pop_female,10.8959,12.191,0.894,0.371,-12.998,34.790
bl_HE,-1.3428,0.189,-7.097,0.000,-1.714,-0.972
bl_SN,-2.1684,0.121,-17.947,0.000,-2.405,-1.932
bl_BW,-2.7773,0.213,-13.059,0.000,-3.194,-2.360
bl_TH,-2.9108,0.093,-31.463,0.000,-3.092,-2.730
party_GRÜNE,0.5779,0.990,0.584,0.559,-1.362,2.517

0,1,2,3
Omnibus:,756.374,Durbin-Watson:,2.034
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8156.706
Skew:,0.391,Prob(JB):,0.0
Kurtosis:,9.294,Cond. No.,1050.0


# Ignore anything below

---

In [23]:
data_panel_diff_balanced_noafd = data_panel_diff[data_panel_diff['party_AfD'] != 1]
X4 = data_panel_diff_balanced_noafd[["crime_count_party", "party_SPD", "party_FDP", "party_CDU", "party_GRÜNE"]]
y3 = data_panel_diff_balanced_noafd["p_female_diff"]
X4 = sm.add_constant(X4)
model9 = sm.OLS(y3, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_noafd['party_lr']})
model9.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,69.97
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0757
Time:,17:17:21,Log-Likelihood:,-20622.0
No. Observations:,4889,AIC:,41260.0
Df Residuals:,4883,BIC:,41290.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,2.36e-14,1.22e+14,0.000,2.868,2.868
crime_count_party,0.2310,0.073,3.172,0.002,0.088,0.374
party_SPD,-1.0820,0.014,-75.728,0.000,-1.110,-1.054
party_FDP,-1.2714,0.006,-209.074,0.000,-1.283,-1.259
party_CDU,-1.3696,0.008,-180.377,0.000,-1.385,-1.355
party_GRÜNE,0.2286,0.011,20.668,0.000,0.207,0.250

0,1,2,3
Omnibus:,762.588,Durbin-Watson:,2.011
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8121.818
Skew:,0.4,Prob(JB):,0.0
Kurtosis:,9.263,Cond. No.,7.9


In [27]:
data_panel_diff_balanced_nospd = data_panel_diff[data_panel_diff['party_SPD'] != 1]
X5 = data_panel_diff_balanced_nospd[["crime_count_party", "party_GRÜNE", "party_FDP", "party_CDU", "party_AfD"]]
y3 = data_panel_diff_balanced_nospd["p_female_diff"]
X5 = sm.add_constant(X5)
model10 = sm.OLS(y3, X5).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_nospd['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,80.71
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0706
Time:,17:21:40,Log-Likelihood:,-15286.0
No. Observations:,3582,AIC:,30580.0
Df Residuals:,3576,BIC:,30620.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,1.99e-14,1.44e+14,0.000,2.868,2.868
crime_count_party,0.0094,0.041,0.230,0.818,-0.071,0.090
party_GRÜNE,0.2622,0.006,42.103,0.000,0.250,0.274
party_FDP,-1.2529,0.003,-365.849,0.000,-1.260,-1.246
party_CDU,-1.3465,0.004,-314.892,0.000,-1.355,-1.338
party_AfD,1.8110,0.187,9.685,0.000,1.445,2.178

0,1,2,3
Omnibus:,577.408,Durbin-Watson:,1.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6632.99
Skew:,0.395,Prob(JB):,0.0
Kurtosis:,9.62,Cond. No.,18.1


In [28]:
data_panel_diff_balanced_nogr = data_panel_diff[data_panel_diff['party_GRÜNE'] != 1]
X6 = data_panel_diff_balanced_nogr[["crime_count_party", "party_SPD", "party_FDP", "party_CDU", "party_AfD"]]
y4 = data_panel_diff_balanced_nogr["p_female_diff"]
X6 = sm.add_constant(X6)
model10 = sm.OLS(y4, X6).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_nogr['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,46.03
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0932
Time:,17:21:52,Log-Likelihood:,-18467.0
No. Observations:,4408,AIC:,36950.0
Df Residuals:,4402,BIC:,36980.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,2.28e-14,1.26e+14,0.000,2.868,2.868
crime_count_party,0.0181,0.051,0.353,0.724,-0.083,0.119
party_SPD,-1.0402,0.010,-103.259,0.000,-1.060,-1.020
party_FDP,-1.2536,0.004,-292.383,0.000,-1.262,-1.245
party_CDU,-1.3474,0.005,-251.682,0.000,-1.358,-1.337
party_AfD,1.7715,0.234,7.567,0.000,1.313,2.230

0,1,2,3
Omnibus:,702.03,Durbin-Watson:,1.996
Prob(Omnibus):,0.0,Jarque-Bera (JB):,7247.664
Skew:,0.431,Prob(JB):,0.0
Kurtosis:,9.222,Cond. No.,19.1


In [43]:
data_panel_diff2 = data_panel_diff.drop_duplicates(['city', 'plz', 'state', 'p_female_city_diff', 'crime_count_city'], ignore_index=True)
data_panel_diff2.dropna(subset=['p_female_city_diff', 'crime_count_city'], inplace=True)

X2 = data_panel_diff2["crime_count_city"]
y = data_panel_diff2["p_female_city_diff"]
X2 = sm.add_constant(X2)
model4 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff2['party_lr']})
model4.summary()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


0,1,2,3
Dep. Variable:,p_female_city_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,3.577
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.31
Time:,00:35:46,Log-Likelihood:,1147.0
No. Observations:,1668,AIC:,-2290.0
Df Residuals:,1666,BIC:,-2279.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0033,0.006,0.598,0.550,-0.008,0.014
crime_count_city,0.0002,0.000,1.891,0.059,-7.9e-06,0.000

0,1,2,3
Omnibus:,289.411,Durbin-Watson:,1.928
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2736.752
Skew:,-0.515,Prob(JB):,0.0
Kurtosis:,9.19,Cond. No.,5.97


In [44]:
X2 = data_panel_diff2["crime_count_city"]
y = data_panel_diff2["p_female_city_diff"]
X2 = sm.add_constant(X2)
model5 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff2['party']})
model5.summary()

0,1,2,3
Dep. Variable:,p_female_city_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.442
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.284
Time:,00:35:55,Log-Likelihood:,1147.0
No. Observations:,1668,AIC:,-2290.0
Df Residuals:,1666,BIC:,-2279.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0033,0.004,0.813,0.416,-0.005,0.011
crime_count_city,0.0002,0.000,1.201,0.230,-0.000,0.001

0,1,2,3
Omnibus:,289.411,Durbin-Watson:,1.928
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2736.752
Skew:,-0.515,Prob(JB):,0.0
Kurtosis:,9.19,Cond. No.,5.97


In [45]:
data_panel_lr = data_panel_diff.drop_duplicates(['city', 'party', 'plz', 'state', 'p_female_party_lr', 'crime_count_party_lr'], ignore_index=True)
data_panel_lr.dropna(subset=['p_female_party_lr', 'crime_count_party_lr'], inplace=True)

X2 = data_panel_lr["crime_count_party_lr"]
y = data_panel_lr["p_female_party_lr"]
X2 = sm.add_constant(X2)
model6 = sm.OLS(y, X2).fit(cov_type='HC3')
model6.summary()

0,1,2,3
Dep. Variable:,p_female_party_lr,R-squared:,0.042
Model:,OLS,Adj. R-squared:,0.025
Method:,Least Squares,F-statistic:,9.118
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.00381
Time:,00:35:58,Log-Likelihood:,28.816
No. Observations:,58,AIC:,-53.63
Df Residuals:,56,BIC:,-49.51
Df Model:,1,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.2861,0.022,12.900,0.000,0.243,0.330
crime_count_party_lr,-0.0022,0.001,-3.020,0.003,-0.004,-0.001

0,1,2,3
Omnibus:,1.418,Durbin-Watson:,2.173
Prob(Omnibus):,0.492,Jarque-Bera (JB):,1.349
Skew:,-0.254,Prob(JB):,0.509
Kurtosis:,2.452,Cond. No.,16.7


---