In [44]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
data = pd.read_stata('data_1.dta')

#### 1.1

In [3]:
data = data.dropna(how='all', subset=['treatp', 'treatnp'])

In [4]:
poor = data[~data.treatp.isna()]
nonpoor = data[~data.treatnp.isna()]

In [14]:
# split poor and non-poor:
npr = nonpoor.groupby(['time', 'treatnp'])['consump'].mean()
pr = poor.groupby(['time', 'treatp'])['consump'].mean()

In [79]:
# ITE / ATE
print(npr.groupby('time').diff().dropna())
print('\n')
print(pr.groupby('time').diff().dropna())

time  treatnp
8     1.0        -6.237778
9     1.0        19.368240
10    1.0        17.363739
Name: consump, dtype: float32


time  treatp
8     1.0       15.840271
9     1.0       25.739944
10    1.0       30.609909
Name: consump, dtype: float32


In [85]:
# OLS with sklearn:
from sklearn.linear_model import LinearRegression
regr = LinearRegression()

for r in nonpoor.groupby('time'):
    X = r[1].treatnp.values.reshape(-1, 1)
    y = r[1].consump.values

    reg = regr.fit(X, y)
    print(reg.coef_[0])
    
print('\n')

for r in poor.groupby('time'):
    X = r[1].treatp.values.reshape(-1, 1)
    y = r[1].consump.values

    reg = regr.fit(X, y)
    print(reg.coef_[0])

-6.23777357848297
19.368245356089105
17.363729818841982


15.840269814660573
25.739942246725608
30.609912387630725


In [98]:
# statsmodels 

# OLS for ITE
for r in nonpoor.groupby('time'):  
    X = r[1].treatnp.values
    y = r[1].consump.values
    X = sm.add_constant(X.ravel())
    villages = r[1].local.values

    results = sm.OLS(y,X).fit(cov_type='cluster', cov_kwds={'groups':villages})
    print(f'time: {r[0]}')
    print(f'coefficient: {results.params[1]}')
    print(f'standard error: {results.bse[1]}')
    print('\n')

time: 8
coefficient: -6.237773578483137
standard error: 6.872664630482705


time: 9
coefficient: 19.368245356089144
standard error: 9.665845049851379


time: 10
coefficient: 17.36372981884209
standard error: 9.878562367785348




In [99]:
# OLS for ATE
for r in poor.groupby('time'):  
    X = r[1].treatp.values
    y = r[1].consump.values
    X = sm.add_constant(X.ravel())
    villages = r[1].local.values

    results = sm.OLS(y,X).fit(cov_type='cluster', cov_kwds={'groups':villages})
    print(f'time: {r[0]}')
    print(f'coefficient: {results.params[1]}')
    print(f'standard error: {results.bse[1]}')
    print('\n')

time: 8
coefficient: 15.840269814661028
standard error: 4.28674979861519


time: 9
coefficient: 25.739942246726507
standard error: 5.795239754124136


time: 10
coefficient: 30.609912387630846
standard error: 5.163827804279433


