In [1]:
# import sys
# !{sys.executable} -m pip install pandas
# !{sys.executable} -m pip install statsmodels
# !{sys.executable} -m pip install tabulate
# !{sys.executable} -m pip install httpimport

import pandas as pd
import statsmodels.api as sm
import numpy as np
from statsmodels.iolib.summary2 import summary_col
from tabulate import tabulate

  import pandas.util.testing as tm


In [2]:
data_panel_diff = pd.read_csv(r'https://raw.githubusercontent.com/mariusgruenewald/pol_viol/main/data_election_crime_merged.csv')

In [3]:
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['party'], prefix='party')],axis=1)
data_panel_diff = pd.concat([data_panel_diff, pd.get_dummies(data_panel_diff['bl_kuerzel'], prefix='bl')],axis=1)

### Let's consider the standard measure of crime (against a party in a city)

In [4]:
data_panel_diff = data_panel_diff[data_panel_diff['cycle_1'] == 2014]
data_panel_diff = data_panel_diff.drop_duplicates(['city', 'party', 'plz', 'state', 'p_female_diff', 'crime_count_party'], ignore_index=True)
data_panel_diff

Unnamed: 0.1,Unnamed: 0,crime,city,law,date,background,suspects,party,state,plz,...,party_CDU,party_DIE LINKE,party_FDP,party_GRÜNE,party_SPD,bl_BB,bl_BW,bl_HE,bl_SN,bl_TH
0,89,Korperverletzung,Stuttgart,223 StGB,2019-04-20,Links,0.0,AfD,BW,70173.0,...,0,0,0,0,0,0,1,0,0,0
1,102,,Stuttgart,,,,,CDU,BW,70173.0,...,1,0,0,0,0,0,0,0,0,0
2,103,,Stuttgart,,,,,DIE LINKE,BW,70173.0,...,0,1,0,0,0,0,0,0,0,0
3,104,Sachbeschadigung,Stuttgart,303 StGB,2019-04-17,Links,2.0,FDP,BW,70173.0,...,0,0,1,0,0,0,1,0,0,0
4,106,Beleidigung,Stuttgart,185 StGB,2019-02-27,Rechts,0.0,GRÜNE,BW,70173.0,...,0,0,0,1,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10414,24492,,Zierenberg,,,,,CDU,HE,34289.0,...,1,0,0,0,0,0,0,0,0,0
10415,24493,,Zierenberg,,,,,GRÜNE,HE,34289.0,...,0,0,0,1,0,0,0,0,0,0
10416,24494,,Zierenberg,,,,,SPD,HE,34289.0,...,0,0,0,0,1,0,0,0,0,0
10417,24497,,Zwingenberg,,,,,CDU,HE,64673.0,...,1,0,0,0,0,0,0,0,0,0


In [5]:
data_panel_diff['p_female_diff'] = data_panel_diff['p_female_diff']*100

In [6]:
data_panel_diff['p_female_diff'].mean()

1.9894823579394114

In [7]:
data_panel_diff[['p_female_diff', 'city', 'crime_count_party']]

Unnamed: 0,p_female_diff,city,crime_count_party
0,6.666667,Stuttgart,13.0
1,-5.000000,Stuttgart,0.0
2,1.666667,Stuttgart,0.0
3,-1.666667,Stuttgart,2.0
4,0.000000,Stuttgart,1.0
...,...,...,...
10414,5.138340,Zierenberg,0.0
10415,,Zierenberg,0.0
10416,0.000000,Zierenberg,0.0
10417,10.219780,Zwingenberg,0.0


In [8]:
print("Conditional means based on party (no restriction to wether a crime has happened or whether the party stood twice)")
data_panel_diff.groupby(['party'], as_index=False).mean()[['party', 'crime_count_party', 'p_female_diff']], len(data_panel_diff[~data_panel_diff['p_female_diff'].isna()])

Conditional means based on party (no restriction to wether a crime has happened or whether the party stood twice)


(       party  crime_count_party  p_female_diff
 0        AfD           0.326047       4.722508
 1        CDU           0.082792       1.522936
 2  DIE LINKE           0.000000       2.868497
 3        FDP           0.031882       1.616425
 4      GRÜNE           0.065382       3.132141
 5        SPD           0.132382       1.831817, 4948)

In [9]:
# Drop missing values
data_panel_diff.dropna(subset=['p_female_diff', 'crime_count_party'], inplace=True)
print("Conditional means based on party when they stood for office twice")
data_panel_diff.groupby(['party'], as_index=False).mean()[['party', 'crime_count_party', 'p_female_diff']], len(data_panel_diff[~data_panel_diff['p_female_diff'].isna()])

Conditional means based on party when they stood for office twice


(       party  crime_count_party  p_female_diff
 0        AfD           4.559322       4.722508
 1        CDU           0.104265       1.522936
 2  DIE LINKE           0.000000       2.868497
 3        FDP           0.083499       1.616425
 4      GRÜNE           0.151852       3.132141
 5        SPD           0.196193       1.831817, 4948)

In [26]:
data_panel_diff['log_crime_count_party'] = np.log(data_panel_diff['crime_count_party']+1)
data_panel_diff['log_pop'] = np.log(data_panel_diff['pop'])

In [33]:
data_panel_diff = data_panel_diff[~data_panel_diff['log_pop'].isna()]

In [34]:
X = data_panel_diff['log_crime_count_party']
y = data_panel_diff['p_female_diff']
X = sm.add_constant(X)
model1 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model1.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,2.283
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.372
Time:,17:26:50,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4865,BIC:,41210.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9584,0.306,6.404,0.000,1.359,2.558
log_crime_count_party,0.9999,0.662,1.511,0.131,-0.297,2.297

0,1,2,3
Omnibus:,829.678,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8470.262
Skew:,0.501,Prob(JB):,0.0
Kurtosis:,9.385,Cond. No.,3.46


In [35]:
model2 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model2.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,4.13
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0978
Time:,17:26:56,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4865,BIC:,41210.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9584,0.280,6.984,0.000,1.409,2.508
log_crime_count_party,0.9999,0.492,2.032,0.042,0.036,1.964

0,1,2,3
Omnibus:,829.678,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8470.262
Skew:,0.501,Prob(JB):,0.0
Kurtosis:,9.385,Cond. No.,3.46


In [36]:
model3 = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model3.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,7.349
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0535
Time:,17:27:01,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4865,BIC:,41210.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9584,0.558,3.507,0.000,0.864,3.053
log_crime_count_party,0.9999,0.369,2.711,0.007,0.277,1.723

0,1,2,3
Omnibus:,829.678,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8470.262
Skew:,0.501,Prob(JB):,0.0
Kurtosis:,9.385,Cond. No.,3.46


In [37]:
print(summary_col([model1,model2, model3]))
# A increase of crime against a party by 1% is associated with an 1.2% higher share of females in the following period. 


                      p_female_diff I p_female_diff II p_female_diff III
------------------------------------------------------------------------
const                 1.9584          1.9584           1.9584           
                      (0.3058)        (0.2804)         (0.5584)         
log_crime_count_party 0.9999          0.9999           0.9999           
                      (0.6617)        (0.4920)         (0.3688)         
Standard errors in parentheses.


In [15]:
%run estout_func.py

In [40]:
Xp = data_panel_diff[["log_crime_count_party", "log_pop"]]
Xp = sm.add_constant(Xp)
model4 = sm.OLS(y, Xp).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model4.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,2.099
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.218
Time:,17:27:52,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4864,BIC:,41220.0
Df Model:,2,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.5232,2.145,1.176,0.239,-1.680,6.727
log_crime_count_party,1.0784,0.603,1.787,0.074,-0.104,2.261
log_pop,-0.0635,0.219,-0.290,0.772,-0.493,0.366

0,1,2,3
Omnibus:,826.352,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8451.365
Skew:,0.497,Prob(JB):,0.0
Kurtosis:,9.379,Cond. No.,76.5


In [41]:
Xp = data_panel_diff[["log_crime_count_party", "log_pop"]]
Xp = sm.add_constant(Xp)
model5 = sm.OLS(y, Xp).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model5.summary()

0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,4.331
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0998
Time:,17:27:57,Log-Likelihood:,-20599.0
No. Observations:,4867,AIC:,41200.0
Df Residuals:,4864,BIC:,41220.0
Df Model:,2,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.5232,1.737,1.453,0.146,-0.881,5.927
log_crime_count_party,1.0784,0.372,2.902,0.004,0.350,1.807
log_pop,-0.0635,0.165,-0.384,0.701,-0.387,0.260

0,1,2,3
Omnibus:,826.352,Durbin-Watson:,2.027
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8451.365
Skew:,0.497,Prob(JB):,0.0
Kurtosis:,9.379,Cond. No.,76.5


In [19]:
X2 = data_panel_diff[["log_crime_count_party", "party_GRÜNE", "party_SPD", "party_CDU", "party_FDP", "party_AfD"]]
X2 = sm.add_constant(X2)
model4 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party_lr']})
model4.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,0.005984
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.951
Time:,17:16:32,Log-Likelihood:,-20935.0
No. Observations:,4948,AIC:,41880.0
Df Residuals:,4941,BIC:,41930.0
Df Model:,6,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,8.23e-15,3.49e+14,0.000,2.868,2.868
log_crime_count_party,0.8264,1.023,0.808,0.419,-1.179,2.832
party_GRÜNE,0.2053,0.072,2.844,0.004,0.064,0.347
party_SPD,-1.1004,0.079,-13.952,0.000,-1.255,-0.946
party_CDU,-1.3811,0.044,-31.385,0.000,-1.467,-1.295
party_FDP,-1.2844,0.040,-32.096,0.000,-1.363,-1.206
party_AfD,1.2811,0.709,1.806,0.071,-0.109,2.671

0,1,2,3
Omnibus:,795.592,Durbin-Watson:,2.023
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8663.407
Skew:,0.421,Prob(JB):,0.0
Kurtosis:,9.427,Cond. No.,11.6


In [20]:
model5 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['party']})
model5.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,0.01548
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.906
Time:,17:16:50,Log-Likelihood:,-20935.0
No. Observations:,4948,AIC:,41880.0
Df Residuals:,4941,BIC:,41930.0
Df Model:,6,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,3.21e-15,8.93e+14,0.000,2.868,2.868
log_crime_count_party,0.8264,0.636,1.299,0.194,-0.420,2.073
party_GRÜNE,0.2053,0.045,4.576,0.000,0.117,0.293
party_SPD,-1.1004,0.049,-22.442,0.000,-1.196,-1.004
party_CDU,-1.3811,0.027,-50.484,0.000,-1.435,-1.327
party_FDP,-1.2844,0.025,-51.628,0.000,-1.333,-1.236
party_AfD,1.2811,0.441,2.906,0.004,0.417,2.145

0,1,2,3
Omnibus:,795.592,Durbin-Watson:,2.023
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8663.407
Skew:,0.421,Prob(JB):,0.0
Kurtosis:,9.427,Cond. No.,11.6


In [21]:
model6 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff['state']})
model6.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,3.909
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.108
Time:,17:16:55,Log-Likelihood:,-20935.0
No. Observations:,4948,AIC:,41880.0
Df Residuals:,4941,BIC:,41930.0
Df Model:,6,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,0.326,8.802,0.000,2.230,3.507
log_crime_count_party,0.8264,0.589,1.403,0.161,-0.328,1.981
party_GRÜNE,0.2053,0.806,0.255,0.799,-1.374,1.785
party_SPD,-1.1004,0.462,-2.382,0.017,-2.006,-0.195
party_CDU,-1.3811,0.232,-5.953,0.000,-1.836,-0.926
party_FDP,-1.2844,0.446,-2.881,0.004,-2.158,-0.411
party_AfD,1.2811,3.059,0.419,0.675,-4.715,7.277

0,1,2,3
Omnibus:,795.592,Durbin-Watson:,2.023
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8663.407
Skew:,0.421,Prob(JB):,0.0
Kurtosis:,9.427,Cond. No.,11.6


In [29]:
estout_ols(modellist=[model4, model5, model6], modellist_str = ["model4","model5","model6"], y='Share Female Candidates', caption="Basic Regression", label="Basic Regression", list_regression_sets=[X2,X2,X2], p_values=True)

\begin{table}[htbp] \caption{Basic Regression \label{Basic Regression}}
\resizebox{0.9\textwidth}{!}{ \centering
\begin{tabular}{lccc} \hline
  & (I) & (II) & (III) \\ 
Dependent Variable & \multicolumn{3}{c}{Share Female Candidates} \\ \hline \vspace{4pt} 
& \begin{footnotesize}\end{footnotesize} & \begin{footnotesize}\end{footnotesize} & \begin{footnotesize}\end{footnotesize}  \\ 
const & 2.8685*** & 2.8685*** & 2.8685*** \\ 
 \vspace{4pt} & \begin{footnotesize}(0.0) \end{footnotesize} & \begin{footnotesize}(0.0) \end{footnotesize} & \begin{footnotesize}(0.3259) \end{footnotesize} \\ 
log_crime_count_party & 0.8264 & 0.8264 & 0.8264 \\ 
 \vspace{4pt} & \begin{footnotesize}(1.0231) \end{footnotesize} & \begin{footnotesize}(0.636) \end{footnotesize} & \begin{footnotesize}(0.5891) \end{footnotesize} \\ 
party_AfD & 1.2811* & 1.2811*** & 1.2811 \\ 
 \vspace{4pt} & \begin{footnotesize}(0.7092) \end{footnotesize} & \begin{footnotesize}(0.4409) \end{footnotesize} & \begin{footnotesize}(3.05

---

In [23]:
data_panel_diff_balanced_noafd = data_panel_diff[data_panel_diff['party_AfD'] != 1]
X4 = data_panel_diff_balanced_noafd[["crime_count_party", "party_SPD", "party_FDP", "party_CDU", "party_GRÜNE"]]
y3 = data_panel_diff_balanced_noafd["p_female_diff"]
X4 = sm.add_constant(X4)
model9 = sm.OLS(y3, X4).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_noafd['party_lr']})
model9.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,69.97
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0757
Time:,17:17:21,Log-Likelihood:,-20622.0
No. Observations:,4889,AIC:,41260.0
Df Residuals:,4883,BIC:,41290.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,2.36e-14,1.22e+14,0.000,2.868,2.868
crime_count_party,0.2310,0.073,3.172,0.002,0.088,0.374
party_SPD,-1.0820,0.014,-75.728,0.000,-1.110,-1.054
party_FDP,-1.2714,0.006,-209.074,0.000,-1.283,-1.259
party_CDU,-1.3696,0.008,-180.377,0.000,-1.385,-1.355
party_GRÜNE,0.2286,0.011,20.668,0.000,0.207,0.250

0,1,2,3
Omnibus:,762.588,Durbin-Watson:,2.011
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8121.818
Skew:,0.4,Prob(JB):,0.0
Kurtosis:,9.263,Cond. No.,7.9


In [27]:
data_panel_diff_balanced_nospd = data_panel_diff[data_panel_diff['party_SPD'] != 1]
X5 = data_panel_diff_balanced_nospd[["crime_count_party", "party_GRÜNE", "party_FDP", "party_CDU", "party_AfD"]]
y3 = data_panel_diff_balanced_nospd["p_female_diff"]
X5 = sm.add_constant(X5)
model10 = sm.OLS(y3, X5).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_nospd['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,80.71
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0706
Time:,17:21:40,Log-Likelihood:,-15286.0
No. Observations:,3582,AIC:,30580.0
Df Residuals:,3576,BIC:,30620.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,1.99e-14,1.44e+14,0.000,2.868,2.868
crime_count_party,0.0094,0.041,0.230,0.818,-0.071,0.090
party_GRÜNE,0.2622,0.006,42.103,0.000,0.250,0.274
party_FDP,-1.2529,0.003,-365.849,0.000,-1.260,-1.246
party_CDU,-1.3465,0.004,-314.892,0.000,-1.355,-1.338
party_AfD,1.8110,0.187,9.685,0.000,1.445,2.178

0,1,2,3
Omnibus:,577.408,Durbin-Watson:,1.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6632.99
Skew:,0.395,Prob(JB):,0.0
Kurtosis:,9.62,Cond. No.,18.1


In [28]:
data_panel_diff_balanced_nogr = data_panel_diff[data_panel_diff['party_GRÜNE'] != 1]
X6 = data_panel_diff_balanced_nogr[["crime_count_party", "party_SPD", "party_FDP", "party_CDU", "party_AfD"]]
y4 = data_panel_diff_balanced_nogr["p_female_diff"]
X6 = sm.add_constant(X6)
model10 = sm.OLS(y4, X6).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff_balanced_nogr['party_lr']})
model10.summary()



0,1,2,3
Dep. Variable:,p_female_diff,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,46.03
Date:,"Fri, 01 Apr 2022",Prob (F-statistic):,0.0932
Time:,17:21:52,Log-Likelihood:,-18467.0
No. Observations:,4408,AIC:,36950.0
Df Residuals:,4402,BIC:,36980.0
Df Model:,5,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,2.8685,2.28e-14,1.26e+14,0.000,2.868,2.868
crime_count_party,0.0181,0.051,0.353,0.724,-0.083,0.119
party_SPD,-1.0402,0.010,-103.259,0.000,-1.060,-1.020
party_FDP,-1.2536,0.004,-292.383,0.000,-1.262,-1.245
party_CDU,-1.3474,0.005,-251.682,0.000,-1.358,-1.337
party_AfD,1.7715,0.234,7.567,0.000,1.313,2.230

0,1,2,3
Omnibus:,702.03,Durbin-Watson:,1.996
Prob(Omnibus):,0.0,Jarque-Bera (JB):,7247.664
Skew:,0.431,Prob(JB):,0.0
Kurtosis:,9.222,Cond. No.,19.1


In [43]:
data_panel_diff2 = data_panel_diff.drop_duplicates(['city', 'plz', 'state', 'p_female_city_diff', 'crime_count_city'], ignore_index=True)
data_panel_diff2.dropna(subset=['p_female_city_diff', 'crime_count_city'], inplace=True)

X2 = data_panel_diff2["crime_count_city"]
y = data_panel_diff2["p_female_city_diff"]
X2 = sm.add_constant(X2)
model4 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff2['party_lr']})
model4.summary()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


0,1,2,3
Dep. Variable:,p_female_city_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,3.577
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.31
Time:,00:35:46,Log-Likelihood:,1147.0
No. Observations:,1668,AIC:,-2290.0
Df Residuals:,1666,BIC:,-2279.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0033,0.006,0.598,0.550,-0.008,0.014
crime_count_city,0.0002,0.000,1.891,0.059,-7.9e-06,0.000

0,1,2,3
Omnibus:,289.411,Durbin-Watson:,1.928
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2736.752
Skew:,-0.515,Prob(JB):,0.0
Kurtosis:,9.19,Cond. No.,5.97


In [44]:
X2 = data_panel_diff2["crime_count_city"]
y = data_panel_diff2["p_female_city_diff"]
X2 = sm.add_constant(X2)
model5 = sm.OLS(y, X2).fit(cov_type='cluster', cov_kwds={'groups': data_panel_diff2['party']})
model5.summary()

0,1,2,3
Dep. Variable:,p_female_city_diff,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.442
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.284
Time:,00:35:55,Log-Likelihood:,1147.0
No. Observations:,1668,AIC:,-2290.0
Df Residuals:,1666,BIC:,-2279.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0033,0.004,0.813,0.416,-0.005,0.011
crime_count_city,0.0002,0.000,1.201,0.230,-0.000,0.001

0,1,2,3
Omnibus:,289.411,Durbin-Watson:,1.928
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2736.752
Skew:,-0.515,Prob(JB):,0.0
Kurtosis:,9.19,Cond. No.,5.97


In [45]:
data_panel_lr = data_panel_diff.drop_duplicates(['city', 'party', 'plz', 'state', 'p_female_party_lr', 'crime_count_party_lr'], ignore_index=True)
data_panel_lr.dropna(subset=['p_female_party_lr', 'crime_count_party_lr'], inplace=True)

X2 = data_panel_lr["crime_count_party_lr"]
y = data_panel_lr["p_female_party_lr"]
X2 = sm.add_constant(X2)
model6 = sm.OLS(y, X2).fit(cov_type='HC3')
model6.summary()

0,1,2,3
Dep. Variable:,p_female_party_lr,R-squared:,0.042
Model:,OLS,Adj. R-squared:,0.025
Method:,Least Squares,F-statistic:,9.118
Date:,"Wed, 05 Jan 2022",Prob (F-statistic):,0.00381
Time:,00:35:58,Log-Likelihood:,28.816
No. Observations:,58,AIC:,-53.63
Df Residuals:,56,BIC:,-49.51
Df Model:,1,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.2861,0.022,12.900,0.000,0.243,0.330
crime_count_party_lr,-0.0022,0.001,-3.020,0.003,-0.004,-0.001

0,1,2,3
Omnibus:,1.418,Durbin-Watson:,2.173
Prob(Omnibus):,0.492,Jarque-Bera (JB):,1.349
Skew:,-0.254,Prob(JB):,0.509
Kurtosis:,2.452,Cond. No.,16.7


---