In [2]:
# import sys
# !{sys.executable} -m pip install pandas
# !{sys.executable} -m pip install statsmodels
# !{sys.executable} -m pip install tabulate
# !{sys.executable} -m pip install httpimport

import pandas as pd
import statsmodels.api as sm
import numpy as np
from statsmodels.iolib.summary2 import summary_col
from tabulate import tabulate

In [89]:
data_panel_diff = pd.read_csv(r'https://raw.githubusercontent.com/mariusgruenewald/pol_viol/main/data/data_election_crime_merged.csv')

In [90]:
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['party'], prefix='party')],axis=1)
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['state'], prefix='bl')],axis=1)
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['degree_of_urban'], prefix='du')],axis=1)

In [91]:
data_panel_diff["cycle_1"].unique()

array([2019., 2014.])

### Let's consider the standard measure of crime (against a party in a city)

In [92]:
data_panel_diff = data_panel_diff[data_panel_diff['cycle_1'] == 2019]
data_panel_diff = data_panel_diff.drop_duplicates(['city', 'party', 'plz', 'state', 'p_female_diff', 'crime_count_party'], ignore_index=True)

In [93]:
data_panel_diff['p_female_diff'] = data_panel_diff['p_female_diff']*100
data_panel_diff['p_female_diff'].mean()

1.9964112455980585

In [94]:
data_panel_diff['log_crime_count_party'] = np.log(data_panel_diff['crime_count_party']+1)
data_panel_diff['log_pop'] = np.log(data_panel_diff['pop'])
data_panel_diff['log_pop_female'] = np.log(data_panel_diff['pop_female'])
data_panel_diff['log_dens'] = np.log(data_panel_diff['density'])

In [95]:
data_panel_diff = data_panel_diff[~data_panel_diff['p_female_diff'].isna()]
data_panel_diff = data_panel_diff[~data_panel_diff['log_pop'].isna()]

In [102]:
data_panel_diff["cycle_2_y"]

0        2019.0
1        2019.0
2        2019.0
3        2019.0
4        2019.0
          ...  
10946    2019.0
10947    2019.0
10948    2019.0
10949    2019.0
10950    2019.0
Name: cycle_2_y, Length: 4880, dtype: float64

In [97]:
X = data_panel_diff['log_crime_count_party']
y = data_panel_diff['p_female_diff']
X = sm.add_constant(X)
model1 = sm.OLS(y, X).fit(cov_type="HC3")
model1.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.5719
Date:,"Mon, 19 Dec 2022",Prob (F-statistic):,0.45
Time:,15:52:54,Log-Likelihood:,-20655.0
No. Observations:,4880,AIC:,41310.0
Df Residuals:,4878,BIC:,41330.0
Df Model:,1,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.0565,0.246,8.355,0.000,1.574,2.539
log_crime_count_party,-0.3830,0.507,-0.756,0.450,-1.376,0.610

0,1,2,3
Omnibus:,826.349,Durbin-Watson:,2.029
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8427.354
Skew:,0.495,Prob(JB):,0.0
Kurtosis:,9.361,Cond. No.,2.72


In [22]:
model1 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model1.summary()

ValueError: r_matrix performs f_test for using dimensions that are asymptotically non-normal

In [61]:
model2 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model2.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.507
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,0.274
Time:,19:49:56,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4865,BIC:,41210.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9707,0.281,7.007,0.000,1.419,2.522
log_crime_count_party,0.7147,0.582,1.228,0.220,-0.426,1.856

0,1,2,3
Omnibus:,829.245,Durbin-Watson:,2.028
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8469.223
Skew:,0.501,Prob(JB):,0.0
Kurtosis:,9.384,Cond. No.,3.4


In [62]:
model3 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model3.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.083
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,0.357
Time:,19:49:57,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4865,BIC:,41210.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9707,0.586,3.364,0.001,0.823,3.119
log_crime_count_party,0.7147,0.687,1.041,0.298,-0.631,2.061

0,1,2,3
Omnibus:,829.245,Durbin-Watson:,2.028
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8469.223
Skew:,0.501,Prob(JB):,0.0
Kurtosis:,9.384,Cond. No.,3.4


In [63]:
print(summary_col([model1,model2, model3]))
# A increase of crime against a party by 1% is associated with an 1.2% higher share of females in the following period. 


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 1.9707          1.9707           1.9707           
                      (0.3300)        (0.2813)         (0.5858)         
log_crime_count_party 0.7147          0.7147           0.7147           
                      (0.1899)        (0.5821)         (0.6867)         
R-squared             0.0002          0.0002           0.0002           
R-squared Adj.        -0.0000         -0.0000          -0.0000          
Standard errors in parentheses.


In [15]:
%run estout_func.py

In [33]:
Xp = data_panel_diff[["log_crime_count_party", "log_pop", "log_pop_female"]]
Xp = sm.add_constant(Xp)
model4 = sm.OLS(y, Xp).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model4.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,-59600000000.0
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,1.0
Time:,17:51:59,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41210.0
Df Residuals:,4863,BIC:,41230.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.1535,9.551,1.168,0.243,-7.567,29.874
log_crime_count_party,0.8005,0.363,2.206,0.027,0.089,1.512
log_pop,-11.8577,11.072,-1.071,0.284,-33.558,9.843
log_pop_female,11.7315,10.875,1.079,0.281,-9.583,33.046

0,1,2,3
Omnibus:,827.81,Durbin-Watson:,2.028
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8504.534
Skew:,0.497,Prob(JB):,0.0
Kurtosis:,9.399,Cond. No.,1020.0


In [34]:
model5 = sm.OLS(y, Xp).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model5.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.884
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,0.25
Time:,17:52:03,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41210.0
Df Residuals:,4863,BIC:,41230.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.1535,6.671,1.672,0.095,-1.922,24.229
log_crime_count_party,0.8005,0.568,1.410,0.159,-0.312,1.913
log_pop,-11.8577,7.177,-1.652,0.098,-25.923,2.208
log_pop_female,11.7315,6.998,1.676,0.094,-1.984,25.447

0,1,2,3
Omnibus:,827.81,Durbin-Watson:,2.028
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8504.534
Skew:,0.497,Prob(JB):,0.0
Kurtosis:,9.399,Cond. No.,1020.0


In [35]:
model6 = sm.OLS(y, Xp).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model6.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.119
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,0.44
Time:,17:52:08,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41210.0
Df Residuals:,4863,BIC:,41230.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.1535,7.558,1.476,0.140,-3.659,25.966
log_crime_count_party,0.8005,0.649,1.233,0.218,-0.472,2.073
log_pop,-11.8577,11.360,-1.044,0.297,-34.123,10.408
log_pop_female,11.7315,11.341,1.034,0.301,-10.497,33.960

0,1,2,3
Omnibus:,827.81,Durbin-Watson:,2.028
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8504.534
Skew:,0.497,Prob(JB):,0.0
Kurtosis:,9.399,Cond. No.,1020.0


In [36]:
print(summary_col([model4,model5, model6]))


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 11.1535         11.1535          11.1535          
                      (9.5515)        (6.6711)         (7.5576)         
log_crime_count_party 0.8005          0.8005           0.8005           
                      (0.3629)        (0.5678)         (0.6493)         
log_pop               -11.8577        -11.8577         -11.8577         
                      (11.0718)       (7.1765)         (11.3601)        
log_pop_female        11.7315         11.7315          11.7315          
                      (10.8750)       (6.9978)         (11.3414)        
R-squared             0.0004          0.0004           0.0004           
R-squared Adj.        -0.0003         -0.0003          -0.0003          
Standard errors in parentheses.


In [37]:
X2 = data_panel_diff[["log_crime_count_party", "log_pop", "log_pop_female", "party_GRÜNE", "party_SPD", "party_CDU", "party_FDP", "party_AfD"]]
X2 = sm.add_constant(X2)
model7 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model7.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,-1014000000000.0
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,1.0
Time:,17:52:18,Log-Likelihood:,-20593.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4858,BIC:,41260.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,12.0903,8.750,1.382,0.167,-5.059,29.239
log_crime_count_party,0.2506,0.894,0.280,0.779,-1.501,2.003
log_pop,-10.9991,10.556,-1.042,0.297,-31.689,9.691
log_pop_female,10.7897,10.359,1.042,0.298,-9.514,31.093
party_GRÜNE,0.4209,0.124,3.404,0.001,0.179,0.663
party_SPD,-0.9831,0.097,-10.090,0.000,-1.174,-0.792
party_CDU,-1.3409,0.061,-21.830,0.000,-1.461,-1.220
party_FDP,-1.1136,0.139,-8.036,0.000,-1.385,-0.842
party_AfD,4.8445,0.426,11.374,0.000,4.010,5.679

0,1,2,3
Omnibus:,782.486,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8258.196
Skew:,0.433,Prob(JB):,0.0
Kurtosis:,9.322,Cond. No.,1030.0


In [38]:
model8 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model8.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,26.37
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,0.00172
Time:,17:52:54,Log-Likelihood:,-20593.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4858,BIC:,41260.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,12.0903,5.944,2.034,0.042,0.440,23.741
log_crime_count_party,0.2506,0.790,0.317,0.751,-1.298,1.800
log_pop,-10.9991,6.954,-1.582,0.114,-24.629,2.630
log_pop_female,10.7897,6.832,1.579,0.114,-2.600,24.179
party_GRÜNE,0.4209,0.126,3.329,0.001,0.173,0.669
party_SPD,-0.9831,0.066,-14.856,0.000,-1.113,-0.853
party_CDU,-1.3409,0.063,-21.162,0.000,-1.465,-1.217
party_FDP,-1.1136,0.121,-9.235,0.000,-1.350,-0.877
party_AfD,4.8445,0.636,7.618,0.000,3.598,6.091

0,1,2,3
Omnibus:,782.486,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8258.196
Skew:,0.433,Prob(JB):,0.0
Kurtosis:,9.322,Cond. No.,1030.0


In [39]:
model9 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model9.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,30910000000.0
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,2.79e-21
Time:,17:53:01,Log-Likelihood:,-20593.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4858,BIC:,41260.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,12.0903,7.514,1.609,0.108,-2.638,26.818
log_crime_count_party,0.2506,0.777,0.323,0.747,-1.272,1.773
log_pop,-10.9991,11.458,-0.960,0.337,-33.457,11.459
log_pop_female,10.7897,11.475,0.940,0.347,-11.700,33.280
party_GRÜNE,0.4209,0.834,0.504,0.614,-1.214,2.056
party_SPD,-0.9831,0.463,-2.123,0.034,-1.891,-0.075
party_CDU,-1.3409,0.182,-7.369,0.000,-1.697,-0.984
party_FDP,-1.1136,0.423,-2.635,0.008,-1.942,-0.285
party_AfD,4.8445,2.190,2.213,0.027,0.553,9.136

0,1,2,3
Omnibus:,782.486,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8258.196
Skew:,0.433,Prob(JB):,0.0
Kurtosis:,9.322,Cond. No.,1030.0


In [24]:
print(summary_col([model7,model8, model9]))


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 12.0903         12.0903          12.0903          
                      (8.7496)        (5.9442)         (7.5145)         
log_crime_count_party 0.2506          0.2506           0.2506           
                      (0.8939)        (0.7903)         (0.7767)         
log_pop               -10.9991        -10.9991         -10.9991         
                      (10.5563)       (6.9539)         (11.4583)        
log_pop_female        10.7897         10.7897          10.7897          
                      (10.3592)       (6.8316)         (11.4747)        
party_GRÜNE           0.4209          0.4209           0.4209           
                      (0.1236)        (0.1264)         (0.8342)         
party_SPD             -0.9831         -0.9831          -0.9831          
                      (0.0974)        (0.0662)    

In [40]:
X3 = data_panel_diff[["log_crime_count_party", "log_pop", "log_pop_female", "density", "bl_HE", "bl_SN", "bl_BW", "bl_TH"]]
X3 = sm.add_constant(X3)
model10 = sm.OLS(y, X3).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,-18670000000000.0
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,1.0
Time:,17:53:14,Log-Likelihood:,-20587.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4858,BIC:,41250.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.2523,7.304,1.541,0.123,-3.064,25.568
log_crime_count_party,0.5567,0.165,3.371,0.001,0.233,0.880
log_pop,-11.6406,8.931,-1.303,0.192,-29.145,5.864
log_pop_female,11.7705,8.731,1.348,0.178,-5.342,28.883
density,-0.0008,0.000,-1.655,0.098,-0.002,0.000
bl_HE,-1.5556,0.935,-1.664,0.096,-3.387,0.276
bl_SN,-2.2907,1.273,-1.800,0.072,-4.785,0.204
bl_BW,-2.9644,0.273,-10.863,0.000,-3.499,-2.430
bl_TH,-3.1269,0.191,-16.384,0.000,-3.501,-2.753

0,1,2,3
Omnibus:,782.98,Durbin-Watson:,2.037
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8302.782
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.34,Cond. No.,47300.0


In [41]:
model11 = sm.OLS(y, X3).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model11.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,3741000000.0
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,5.83e-24
Time:,17:53:24,Log-Likelihood:,-20587.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4858,BIC:,41250.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.2523,5.595,2.011,0.044,0.286,22.218
log_crime_count_party,0.5567,0.541,1.028,0.304,-0.504,1.618
log_pop,-11.6406,6.497,-1.792,0.073,-24.374,1.093
log_pop_female,11.7705,6.333,1.859,0.063,-0.642,24.183
density,-0.0008,0.001,-1.486,0.137,-0.002,0.000
bl_HE,-1.5556,0.648,-2.401,0.016,-2.826,-0.286
bl_SN,-2.2907,0.659,-3.476,0.001,-3.582,-0.999
bl_BW,-2.9644,0.447,-6.629,0.000,-3.841,-2.088
bl_TH,-3.1269,0.742,-4.214,0.000,-4.581,-1.673

0,1,2,3
Omnibus:,782.98,Durbin-Watson:,2.037
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8302.782
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.34,Cond. No.,47300.0


In [42]:
model12 = sm.OLS(y, X3).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model12.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,8845.0
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,3.83e-08
Time:,17:53:53,Log-Likelihood:,-20587.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4858,BIC:,41250.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,11.2523,9.673,1.163,0.245,-7.706,30.211
log_crime_count_party,0.5567,0.909,0.612,0.540,-1.225,2.339
log_pop,-11.6406,12.222,-0.952,0.341,-35.596,12.315
log_pop_female,11.7705,12.064,0.976,0.329,-11.874,35.415
density,-0.0008,0.000,-4.724,0.000,-0.001,-0.000
bl_HE,-1.5556,0.117,-13.331,0.000,-1.784,-1.327
bl_SN,-2.2907,0.061,-37.663,0.000,-2.410,-2.172
bl_BW,-2.9644,0.106,-28.048,0.000,-3.172,-2.757
bl_TH,-3.1269,0.072,-43.180,0.000,-3.269,-2.985

0,1,2,3
Omnibus:,782.98,Durbin-Watson:,2.037
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8302.782
Skew:,0.432,Prob(JB):,0.0
Kurtosis:,9.34,Cond. No.,47300.0


In [43]:
print(summary_col([model10,model11, model12]))


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 11.2523         11.2523          11.2523          
                      (7.3041)        (5.5951)         (9.6729)         
log_crime_count_party 0.5567          0.5567           0.5567           
                      (0.1652)        (0.5414)         (0.9092)         
log_pop               -11.6406        -11.6406         -11.6406         
                      (8.9310)        (6.4969)         (12.2223)        
log_pop_female        11.7705         11.7705          11.7705          
                      (8.7310)        (6.3329)         (12.0635)        
density               -0.0008         -0.0008          -0.0008          
                      (0.0005)        (0.0006)         (0.0002)         
bl_HE                 -1.5556         -1.5556          -1.5556          
                      (0.9347)        (0.6480)    

In [44]:
X4 = data_panel_diff[["log_crime_count_party", "log_pop", "log_pop_female", "bl_HE", "bl_SN", "bl_BW", "bl_TH", "party_GRÜNE", "party_SPD", "party_CDU",
                      "party_FDP", "party_AfD"]]
X4 = sm.add_constant(X4)
model13 = sm.OLS(y, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model13.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,1.14
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,0.479
Time:,17:55:09,Log-Likelihood:,-20584.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4854,BIC:,41280.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,12.5520,7.040,1.783,0.075,-1.246,26.350
log_crime_count_party,0.0761,0.708,0.107,0.914,-1.311,1.463
log_pop,-10.4928,9.427,-1.113,0.266,-28.969,7.983
log_pop_female,10.3854,9.283,1.119,0.263,-7.809,28.580
bl_HE,-1.4182,0.901,-1.575,0.115,-3.183,0.347
bl_SN,-2.1580,1.117,-1.931,0.053,-4.348,0.032
bl_BW,-2.8443,0.263,-10.804,0.000,-3.360,-2.328
bl_TH,-2.8927,0.101,-28.632,0.000,-3.091,-2.695
party_GRÜNE,0.6450,0.088,7.329,0.000,0.473,0.818

0,1,2,3
Omnibus:,755.529,Durbin-Watson:,2.034
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8151.752
Skew:,0.39,Prob(JB):,0.0
Kurtosis:,9.292,Cond. No.,1050.0


In [45]:
model14 = sm.OLS(y, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model14.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,-558300000000.0
Date:,"Wed, 30 Nov 2022",Prob (F-statistic):,1.0
Time:,17:55:23,Log-Likelihood:,-20584.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4854,BIC:,41280.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,12.5520,5.384,2.332,0.020,2.000,23.104
log_crime_count_party,0.0761,0.663,0.115,0.909,-1.223,1.375
log_pop,-10.4928,7.101,-1.478,0.139,-24.410,3.424
log_pop_female,10.3854,7.015,1.480,0.139,-3.364,24.134
bl_HE,-1.4182,0.720,-1.970,0.049,-2.829,-0.007
bl_SN,-2.1580,0.605,-3.569,0.000,-3.343,-0.973
bl_BW,-2.8443,0.529,-5.381,0.000,-3.880,-1.808
bl_TH,-2.8927,0.683,-4.235,0.000,-4.231,-1.554
party_GRÜNE,0.6450,0.294,2.196,0.028,0.069,1.221

0,1,2,3
Omnibus:,755.529,Durbin-Watson:,2.034
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8151.752
Skew:,0.39,Prob(JB):,0.0
Kurtosis:,9.292,Cond. No.,1050.0


In [211]:
model15 = sm.OLS(y, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model15.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,14090000000.0
Date:,"Mon, 04 Apr 2022",Prob (F-statistic):,1.41e-20
Time:,18:06:06,Log-Likelihood:,-20584.0
No. Observations:,4867,AIC:,41190.0
Df Residuals:,4854,BIC:,41280.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,13.3920,9.536,1.404,0.160,-5.298,32.082
log_crime_count_party,0.6415,0.536,1.197,0.231,-0.409,1.692
log_pop,-11.0572,12.267,-0.901,0.367,-35.100,12.986
log_pop_female,10.8959,12.191,0.894,0.371,-12.998,34.790
bl_HE,-1.3428,0.189,-7.097,0.000,-1.714,-0.972
bl_SN,-2.1684,0.121,-17.947,0.000,-2.405,-1.932
bl_BW,-2.7773,0.213,-13.059,0.000,-3.194,-2.360
bl_TH,-2.9108,0.093,-31.463,0.000,-3.092,-2.730
party_GRÜNE,0.5779,0.990,0.584,0.559,-1.362,2.517

0,1,2,3
Omnibus:,756.374,Durbin-Watson:,2.034
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8156.706
Skew:,0.391,Prob(JB):,0.0
Kurtosis:,9.294,Cond. No.,1050.0


# Ignore anything below

---

In [23]:
data_panel_diff_balanced_noafd = data_panel_diff[data_panel_diff['party_AfD'] != 1]
X4 = data_panel_diff_balanced_noafd[["crime_count_party", "party_SPD", "party_FDP", "party_CDU", "party_GRÜNE"]]
y3 = data_panel_diff_balanced_noafd["p_female_diff"]
X4 = sm.add_constant(X4)
model9 = sm.OLS(y3, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_noafd['party_lr']})
model9.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,69.97
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0757
Time:,17:17:21,Log-Likelihood:,-20622.0
No. Observations:,4889,AIC:,41260.0
Df Residuals:,4883,BIC:,41290.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,2.36e-14,1.22e+14,0.000,2.868,2.868
crime_count_party,0.2310,0.073,3.172,0.002,0.088,0.374
party_SPD,-1.0820,0.014,-75.728,0.000,-1.110,-1.054
party_FDP,-1.2714,0.006,-209.074,0.000,-1.283,-1.259
party_CDU,-1.3696,0.008,-180.377,0.000,-1.385,-1.355
party_GRÜNE,0.2286,0.011,20.668,0.000,0.207,0.250

0,1,2,3
Omnibus:,762.588,Durbin-Watson:,2.011
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8121.818
Skew:,0.4,Prob(JB):,0.0
Kurtosis:,9.263,Cond. No.,7.9


In [27]:
data_panel_diff_balanced_nospd = data_panel_diff[data_panel_diff['party_SPD'] != 1]
X5 = data_panel_diff_balanced_nospd[["crime_count_party", "party_GRÜNE", "party_FDP", "party_CDU", "party_AfD"]]
y3 = data_panel_diff_balanced_nospd["p_female_diff"]
X5 = sm.add_constant(X5)
model10 = sm.OLS(y3, X5).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_nospd['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,80.71
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0706
Time:,17:21:40,Log-Likelihood:,-15286.0
No. Observations:,3582,AIC:,30580.0
Df Residuals:,3576,BIC:,30620.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,1.99e-14,1.44e+14,0.000,2.868,2.868
crime_count_party,0.0094,0.041,0.230,0.818,-0.071,0.090
party_GRÜNE,0.2622,0.006,42.103,0.000,0.250,0.274
party_FDP,-1.2529,0.003,-365.849,0.000,-1.260,-1.246
party_CDU,-1.3465,0.004,-314.892,0.000,-1.355,-1.338
party_AfD,1.8110,0.187,9.685,0.000,1.445,2.178

0,1,2,3
Omnibus:,577.408,Durbin-Watson:,1.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6632.99
Skew:,0.395,Prob(JB):,0.0
Kurtosis:,9.62,Cond. No.,18.1


In [28]:
data_panel_diff_balanced_nogr = data_panel_diff[data_panel_diff['party_GRÜNE'] != 1]
X6 = data_panel_diff_balanced_nogr[["crime_count_party", "party_SPD", "party_FDP", "party_CDU", "party_AfD"]]
y4 = data_panel_diff_balanced_nogr["p_female_diff"]
X6 = sm.add_constant(X6)
model10 = sm.OLS(y4, X6).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_nogr['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,46.03
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0932
Time:,17:21:52,Log-Likelihood:,-18467.0
No. Observations:,4408,AIC:,36950.0
Df Residuals:,4402,BIC:,36980.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,2.28e-14,1.26e+14,0.000,2.868,2.868
crime_count_party,0.0181,0.051,0.353,0.724,-0.083,0.119
party_SPD,-1.0402,0.010,-103.259,0.000,-1.060,-1.020
party_FDP,-1.2536,0.004,-292.383,0.000,-1.262,-1.245
party_CDU,-1.3474,0.005,-251.682,0.000,-1.358,-1.337
party_AfD,1.7715,0.234,7.567,0.000,1.313,2.230

0,1,2,3
Omnibus:,702.03,Durbin-Watson:,1.996
Prob(Omnibus):,0.0,Jarque-Bera (JB):,7247.664
Skew:,0.431,Prob(JB):,0.0
Kurtosis:,9.222,Cond. No.,19.1


In [43]:
data_panel_diff2 = data_panel_diff.drop_duplicates(['city', 'plz', 'state', 'p_female_city_diff', 'crime_count_city'], ignore_index=True)
data_panel_diff2.dropna(subset=['p_female_city_diff', 'crime_count_city'], inplace=True)

X2 = data_panel_diff2["crime_count_city"]
y = data_panel_diff2["p_female_city_diff"]
X2 = sm.add_constant(X2)
model4 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff2['party_lr']})
model4.summary()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


0,1,2,3
Dep. Variable:,p_female_city_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,3.577
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.31
Time:,00:35:46,Log-Likelihood:,1147.0
No. Observations:,1668,AIC:,-2290.0
Df Residuals:,1666,BIC:,-2279.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0033,0.006,0.598,0.550,-0.008,0.014
crime_count_city,0.0002,0.000,1.891,0.059,-7.9e-06,0.000

0,1,2,3
Omnibus:,289.411,Durbin-Watson:,1.928
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2736.752
Skew:,-0.515,Prob(JB):,0.0
Kurtosis:,9.19,Cond. No.,5.97


In [44]:
X2 = data_panel_diff2["crime_count_city"]
y = data_panel_diff2["p_female_city_diff"]
X2 = sm.add_constant(X2)
model5 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff2['party']})
model5.summary()

0,1,2,3
Dep. Variable:,p_female_city_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.442
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.284
Time:,00:35:55,Log-Likelihood:,1147.0
No. Observations:,1668,AIC:,-2290.0
Df Residuals:,1666,BIC:,-2279.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0033,0.004,0.813,0.416,-0.005,0.011
crime_count_city,0.0002,0.000,1.201,0.230,-0.000,0.001

0,1,2,3
Omnibus:,289.411,Durbin-Watson:,1.928
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2736.752
Skew:,-0.515,Prob(JB):,0.0
Kurtosis:,9.19,Cond. No.,5.97


In [45]:
data_panel_lr = data_panel_diff.drop_duplicates(['city', 'party', 'plz', 'state', 'p_female_party_lr', 'crime_count_party_lr'], ignore_index=True)
data_panel_lr.dropna(subset=['p_female_party_lr', 'crime_count_party_lr'], inplace=True)

X2 = data_panel_lr["crime_count_party_lr"]
y = data_panel_lr["p_female_party_lr"]
X2 = sm.add_constant(X2)
model6 = sm.OLS(y, X2).fit(cov_type='HC3')
model6.summary()

0,1,2,3
Dep. Variable:,p_female_party_lr,R-squared:,0.042
Model:,OLS,Adj. R-squared:,0.025
Method:,Least Squares,F-statistic:,9.118
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.00381
Time:,00:35:58,Log-Likelihood:,28.816
No. Observations:,58,AIC:,-53.63
Df Residuals:,56,BIC:,-49.51
Df Model:,1,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.2861,0.022,12.900,0.000,0.243,0.330
crime_count_party_lr,-0.0022,0.001,-3.020,0.003,-0.004,-0.001

0,1,2,3
Omnibus:,1.418,Durbin-Watson:,2.173
Prob(Omnibus):,0.492,Jarque-Bera (JB):,1.349
Skew:,-0.254,Prob(JB):,0.509
Kurtosis:,2.452,Cond. No.,16.7


---