In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [3]:
df = pd.read_stata("aerdat4.dta")
tab5 = pd.read_stata("tab5v1.dta")

In [4]:
tab5.columns

Index(['id', 'vouch0', 'sex', 'svy', 'age', 'hsvisit', 'strata1', 'strata2',
       'strata3', 'strata4', 'strata5', 'strata6', 'mom_sch', 'dad_sch',
       'dad_miss', 'mom_miss', 'math', 'reading', 'writing', 't_site',
       'totalpts', 'tsite1', 'tsite2', 'tsite3', 'sex_name', 'bog95smp',
       'bog95asd', 'bog97smp', 'bog97asd', 'jam93smp', 'jam93asd', 'test_tak',
       'dbogota', 'djamundi', 'd1995', 'd1993', 'd1997', 'phone', 'scyfnsh',
       'darea1', 'darea2', 'darea3', 'darea4', 'darea5', 'darea6', 'darea7',
       'darea8', 'darea9', 'darea10', 'darea11', 'darea12', 'darea13',
       'darea14', 'darea15', 'darea16', 'darea17', 'darea18', 'darea19',
       'dmonth1', 'dmonth2', 'dmonth3', 'dmonth4', 'dmonth5', 'dmonth6',
       'dmonth7', 'dmonth8', 'dmonth9', 'dmonth10', 'dmonth11', 'dmonth12',
       'bog95', 'bog97', 'mom_age', 'mom_mw', 'dad_age', 'dad_mw', 'sex2',
       'stratams', 'age2'],
      dtype='object')

In [5]:
# Panel A Column 1

treatment = 'vouch0'
controls = ['tsite1','tsite2','tsite3']
ols_vars_panelA = ['totalpts','math','reading','writing']

def tab5_panelA_col1(vars, treatment, controls):

    for var in vars:
        vars_needed = [treatment, var] + controls
        data = tab5[vars_needed].dropna()

        X = sm.add_constant(data[[treatment] + controls])
        y = data[var]

        model = sm.OLS(y, X).fit()

        coef = model.params[treatment]
        std_err = model.bse[treatment]
        n = data.shape[0]

        print(f"{var}: coef={round(coef, 4)}, std_err={round(std_err, 4)}, n={n}")

In [6]:
tab5_panelA_col1(ols_vars_panelA, treatment, controls)

totalpts: coef=0.2168, std_err=0.1194, n=282
math: coef=0.1776, std_err=0.119, n=282
reading: coef=0.2036, std_err=0.1201, n=283
writing: coef=0.1259, std_err=0.1209, n=283


In [7]:
# Panel B Column 1

ols_vars_panelBandC = ['totalpts','math','reading']

def tab5_panelB_col1(vars, treatment, controls):

    for var in vars:
        vars_needed = [treatment, var] + controls
        data = tab5[tab5['sex2'] == 0][vars_needed].dropna()

        X = sm.add_constant(data[[treatment] + controls])
        y = data[var]

        model = sm.OLS(y, X).fit()

        coef = model.params[treatment]
        std_err = model.bse[treatment]
        n = data.shape[0]

        print(f"{var}: coef={round(coef, 4)}, std_err={round(std_err, 4)}, n={n}")

In [8]:
tab5_panelB_col1(ols_vars_panelBandC, treatment, controls)

totalpts: coef=0.2412, std_err=0.1534, n=147
math: coef=0.3281, std_err=0.1447, n=147
reading: coef=0.1449, std_err=0.1567, n=148


In [9]:
# Panel C Column 1

def tab5_panelC_col1(vars, treatment, controls):

    for var in vars:
        vars_needed = [treatment, var] + controls
        data = tab5[tab5['sex2'] == 1][vars_needed].dropna()

        X = sm.add_constant(data[[treatment] + controls])
        y = data[var]

        model = sm.OLS(y, X).fit()

        coef = model.params[treatment]
        std_err = model.bse[treatment]
        n = data.shape[0]

        print(f"{var}: coef={round(coef, 4)}, std_err={round(std_err, 4)}, n={n}")

In [10]:
tab5_panelC_col1(ols_vars_panelBandC, treatment, controls)

totalpts: coef=0.179, std_err=0.1834, n=135
math: coef=-0.0083, std_err=0.1884, n=135
reading: coef=0.2549, std_err=0.1841, n=135


In [11]:
# Panel A Column 2

treatment = 'vouch0'
controls_w_covariates = ['tsite1','tsite2','tsite3','svy','hsvisit','age','sex','mom_sch','strata1','strata2','strata3','strata4','strata5','strata6','dad_sch','mom_miss','dad_miss']

def tab5_panelA_col2(vars, treatment, controls):

    for var in vars:
        vars_needed = [treatment, var] + controls
        data = tab5[vars_needed].dropna()

        X = sm.add_constant(data[[treatment] + controls])
        y = data[var]

        model = sm.OLS(y, X).fit()

        coef = model.params[treatment]
        std_err = model.bse[treatment]
        n = data.shape[0]

        print(f"{var}: coef={round(coef, 4)}, std_err={round(std_err, 4)}, n={n}")

In [12]:
tab5_panelA_col2(ols_vars_panelA, treatment, controls_w_covariates)

totalpts: coef=0.2237, std_err=0.1091, n=282
math: coef=0.1763, std_err=0.1139, n=282
reading: coef=0.2113, std_err=0.1147, n=283
writing: coef=0.1391, std_err=0.1132, n=283


In [13]:
# Panel B Column 2

def tab5_panelB_col2(vars, treatment, controls):

    for var in vars:
        vars_needed = [treatment, var] + controls
        data = tab5[tab5['sex2'] == 0][vars_needed].dropna()

        X = sm.add_constant(data[[treatment] + controls])
        y = data[var]

        model = sm.OLS(y, X).fit()

        coef = model.params[treatment]
        std_err = model.bse[treatment]
        n = data.shape[0]

        print(f"{var}: coef={round(coef, 4)}, std_err={round(std_err, 4)}, n={n}")

In [14]:
tab5_panelB_col2(ols_vars_panelBandC, treatment, controls_w_covariates)

totalpts: coef=0.3046, std_err=0.1324, n=147
math: coef=0.3686, std_err=0.1361, n=147
reading: coef=0.1775, std_err=0.1494, n=148


In [15]:
# Panel B Column 2

def tab5_panelC_col2(vars, treatment, controls):

    for var in vars:
        vars_needed = [treatment, var] + controls
        data = tab5[tab5['sex2'] == 1][vars_needed].dropna()

        X = sm.add_constant(data[[treatment] + controls])
        y = data[var]

        model = sm.OLS(y, X).fit()

        coef = model.params[treatment]
        std_err = model.bse[treatment]
        n = data.shape[0]

        print(f"{var}: coef={round(coef, 4)}, std_err={round(std_err, 4)}, n={n}")

In [16]:
tab5_panelC_col2(ols_vars_panelBandC, treatment, controls_w_covariates)

totalpts: coef=0.1497, std_err=0.1858, n=135
math: coef=-0.0134, std_err=0.1926, n=135
reading: coef=0.2186, std_err=0.1864, n=135


In [17]:
# Panel A Column 1

treatment = 'vouch0'
controls = ['tsite1','tsite2','tsite3']
ols_vars_panelA = ['totalpts','math','reading','writing']


def tab5_panelA_col3_comb(vars, treatment, controls):

    for var in vars:
        vars_needed = [treatment, var] + controls
        data = tab5[vars_needed].dropna()

        X = sm.add_constant(data[[treatment] + controls])
        y = data[var]

        model = sm.OLS(y, X).fit()

        coef = model.params[treatment]
        std_err = model.bse[treatment]
        n = data.shape[0]

        print(f"{var}: coef={round(coef, 4)}, std_err={round(std_err, 4)}, n={n}")

In [18]:
tab5_panelA_col3_comb(ols_vars_panelA, treatment, controls)

totalpts: coef=0.2168, std_err=0.1194, n=282
math: coef=0.1776, std_err=0.119, n=282
reading: coef=0.2036, std_err=0.1201, n=283
writing: coef=0.1259, std_err=0.1209, n=283


In [22]:
temp3 = tab5[['id', 'math', 'vouch0', 'tsite1', 'tsite2', 'tsite3']].dropna().copy()
temp3['testscor'] = temp3['math']
temp3['subject'] = 'math'

temp4 = tab5[['id', 'reading', 'vouch0', 'tsite1', 'tsite2', 'tsite3']].dropna().copy()
temp4['testscor'] = temp4['reading']
temp4['subject'] = 'reading'

temp5 = tab5[['id', 'writing', 'vouch0', 'tsite1', 'tsite2', 'tsite3']].dropna().copy()
temp5['testscor'] = temp5['writing']
temp5['subject'] = 'writing'

merge1 = pd.concat([temp3, temp4, temp5], axis=0)


In [23]:
print("Panel A Column 3 (RE):")
print(f"coef = {round(panelA_col3_coef, 3)}")
print(f"(std err = {round(panelA_col3_se, 3)})")

print("Panel A Column 4 (RE + covariates):")
print(f"coef = {round(panelA_col4_coef, 3)}")
print(f"(std err = {round(panelA_col4_se, 3)})")


Panel A Column 3 (RE):


NameError: name 'panelA_col3_coef' is not defined

In [24]:
# Filter out writing scores for math+reading version
merge_mr = merge1[merge1['subject'].isin(['math', 'reading'])]

# === Panel A Column 3: RE without covariates ===
model3 = smf.mixedlm("testscor ~ vouch0 + tsite1 + tsite2 + tsite3",
                     data=merge_mr,
                     groups=merge_mr["id"])
result3 = model3.fit()

panelA_col3_coef = result3.params['vouch0']
panelA_col3_se = result3.bse['vouch0']

# === Panel A Column 4: RE with covariates ===
covariates = ['svy','hsvisit','age','sex','mom_sch','strata1','strata2','strata3',
              'strata4','strata5','strata6','dad_sch','mom_miss','dad_miss']
controls = ['tsite1','tsite2','tsite3'] + covariates

required_cols = ['id', 'testscor', 'vouch0'] + controls
merge_mr_wcov = merge_mr.dropna(subset=required_cols)

model4 = smf.mixedlm("testscor ~ vouch0 + " + " + ".join(controls),
                     data=merge_mr_wcov,
                     groups=merge_mr_wcov["id"])
result4 = model4.fit()

panelA_col4_coef = result4.params['vouch0']
panelA_col4_se = result4.bse['vouch0']


NameError: name 'smf' is not defined