# Diff in Diff Regression by Clarissa

In [19]:
import pandas as pd
import numpy as np

df = pd.read_csv("/Users/clarissaache/Documents/IDS 701/uds-2022-ids-701-team-3/20_analysis/big_merge.csv")
df_clean = df.dropna(axis=0).copy()
df.sample(3)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,sex,subprovince,region,sample_population,enrolled_total,rate_enrollment,year
3170,226,226,male,Umer Kot,rural,349,155.0,0.444126,2012
2993,49,49,male,Gujrat,urban,129,115.0,0.891473,2012
1363,239,239,male,D.G. Khan,urban,708,274.0,0.387006,2007


In [20]:
# Packages
import numpy as np
from scipy import stats
import statsmodels.formula.api as smf
import statsmodels.api as sm

## Aggregated

In [21]:
# Treatment and pre-post variables

df_clean.loc[(df_clean["year"] >= 2010), "post_2009"] = 1
df_clean.loc[(df_clean["year"] < 2007), "post_2009"] = 0

# Divide into urban/rural and rural+female

df_urban = df_clean.loc[df_clean["region"] == "urban"].copy()
df_rural = df_clean.loc[df_clean["region"] == "rural"].copy()
df_female_rural = df_clean.loc[(df_clean["sex"] == "female") & (df_clean["region"] == "rural")].copy()

## Cities controlled by terrorist in 2009 vs not
(only measuring differences in women)

In [22]:
taliabn_dominance = [
    "South Waziristan",
    "North Waziristan",
    "Orakzai",
    "Kurram",
    "Khyber",
    "Mohmand",
    "Bajur",
    "Darra Adamkhel",
    "Swat",
    "Upper Dir",
    "Lower Dir",
    "Bannu",
    "Lakki Marwat",
    "Tank",
    "Peshawar",
    "Dera Ismail Khan",
    "Mardan",
    "Charsadda",
    "Kohat",
]

In [23]:
# Define Treatment variables

df_female_rural["Treated"] = 0
df_female_rural.loc[(df_female_rural["subprovince"].isin(taliabn_dominance)), "Treated"] = 1
df_female_rural.sample(5)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,sex,subprovince,region,sample_population,enrolled_total,rate_enrollment,year,post_2009,Treated
702,314,314,female,Toba Tek Singh,rural,75,51.0,0.68,2005,0.0,0
1130,6,6,female,Bahawal Nagar,rural,1040,287.0,0.275962,2007,,0
622,234,234,female,Kohat,rural,53,30.0,0.566038,2005,0.0,1
632,244,244,female,Layya,rural,57,27.0,0.473684,2005,0.0,0
5440,112,112,female,Lasbela,rural,47,20.0,0.425532,2019,1.0,0


In [29]:
from linearmodels import PanelOLS
df_for_panelols = df_female_rural.set_index(["subprovince", "year"])

mod = PanelOLS.from_formula(
    "rate_enrollment ~ Treated * post_2009 + EntityEffects + TimeEffects",
    data=df_for_panelols,
    drop_absorbed=True,
).fit()
mod.summary

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Treated, post_2009



0,1,2,3
Dep. Variable:,rate_enrollment,R-squared:,0.0032
Estimator:,PanelOLS,R-squared (Between):,0.0125
No. Observations:,1089,R-squared (Within):,0.0098
Date:,"Mon, Apr 25 2022",R-squared (Overall):,0.0104
Time:,14:10:00,Log-likelihood,1010.8
Cov. Estimator:,Unadjusted,,
,,F-statistic:,3.0586
Entities:,130,P-value,0.0806
Avg Obs:,8.3769,Distribution:,"F(1,948)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Treated:post_2009,0.0409,0.0234,1.7489,0.0806,-0.0050,0.0868


In [30]:
print(mod.summary.as_latex())

\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:}     &  rate\_enrollment  & \textbf{  R-squared:         }   &      0.0032      \\
\textbf{Estimator:}         &      PanelOLS      & \textbf{  R-squared (Between):}  &      0.0125      \\
\textbf{No. Observations:}  &        1089        & \textbf{  R-squared (Within):}   &      0.0098      \\
\textbf{Date:}              &  Mon, Apr 25 2022  & \textbf{  R-squared (Overall):}  &      0.0104      \\
\textbf{Time:}              &      14:10:00      & \textbf{  Log-likelihood     }   &      1010.8      \\
\textbf{Cov. Estimator:}    &     Unadjusted     & \textbf{                     }   &                  \\
\textbf{}                   &                    & \textbf{  F-statistic:       }   &      3.0586      \\
\textbf{Entities:}          &        130         & \textbf{  P-value            }   &      0.0806      \\
\textbf{Avg Obs:}           &       8.3769       & \textbf{  Distribution:      }   &     F(1,948)     \\


In [31]:
# Taliban proximity

mod_proximity = smf.ols('rate_enrollment ~ C(post_2009) * C(Treated)', df_female_rural).fit()
mod_proximity.get_robustcov_results(cov_type="HC3").summary()


0,1,2,3
Dep. Variable:,rate_enrollment,R-squared:,0.035
Model:,OLS,Adj. R-squared:,0.032
Method:,Least Squares,F-statistic:,20.79
Date:,"Mon, 25 Apr 2022",Prob (F-statistic):,4.24e-13
Time:,14:10:11,Log-Likelihood:,27.582
No. Observations:,1089,AIC:,-47.16
Df Residuals:,1085,BIC:,-27.19
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4772,0.015,32.825,0.000,0.449,0.506
C(post_2009)[T.1.0],0.0963,0.017,5.589,0.000,0.062,0.130
C(Treated)[T.1],-0.0587,0.026,-2.260,0.024,-0.110,-0.008
C(post_2009)[T.1.0]:C(Treated)[T.1],0.0272,0.034,0.793,0.428,-0.040,0.095

0,1,2,3
Omnibus:,199.005,Durbin-Watson:,1.67
Prob(Omnibus):,0.0,Jarque-Bera (JB):,44.081
Skew:,-0.096,Prob(JB):,2.68e-10
Kurtosis:,2.033,Cond. No.,12.2


In [32]:
print(mod_proximity.summary().as_latex())

\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:}                       & rate\_enrollment & \textbf{  R-squared:         } &     0.035   \\
\textbf{Model:}                               &       OLS        & \textbf{  Adj. R-squared:    } &     0.032   \\
\textbf{Method:}                              &  Least Squares   & \textbf{  F-statistic:       } &     13.16   \\
\textbf{Date:}                                & Mon, 25 Apr 2022 & \textbf{  Prob (F-statistic):} &  1.92e-08   \\
\textbf{Time:}                                &     14:10:13     & \textbf{  Log-Likelihood:    } &    27.582   \\
\textbf{No. Observations:}                    &        1089      & \textbf{  AIC:               } &    -47.16   \\
\textbf{Df Residuals:}                        &        1085      & \textbf{  BIC:               } &    -27.19   \\
\textbf{Df Model:}                            &           3      & \textbf{                     } &             \\
\textbf{Covariance Type:}         