In [1]:
import pandas as pd
from statsmodels.regression.linear_model import OLS
from linearmodels import IV2SLS, IVGMM
import numpy as np
import scipy.stats as st

In [2]:
df = pd.read_csv("ps5_data.csv", sep="\t", index_col=0)

In [3]:
df["exp76"] = df["age76"] - df["ed76"] - 6
df["expsq76"] = df["exp76"]*df["exp76"] / 100
df["const"] = 1
df = df[~df["lwage76"].isna()]

### 12.25a

In [4]:
Y = df["ed76"]
X = df[["const","exp76", "expsq76", "black", "reg76r", "smsa76r", "nearc4a", "nearc4b"]]

In [5]:
model1 = OLS(Y,X, hasconst=True)
results = model1.fit(cov_type='HC0')
results.summary()

0,1,2,3
Dep. Variable:,ed76,R-squared:,0.476
Model:,OLS,Adj. R-squared:,0.475
Method:,Least Squares,F-statistic:,527.2
Date:,"Mon, 29 Mar 2021",Prob (F-statistic):,0.0
Time:,22:05:53,Log-Likelihood:,-6261.0
No. Observations:,3010,AIC:,12540.0
Df Residuals:,3002,BIC:,12590.0
Df Model:,7,,
Covariance Type:,HC0,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,16.6573,0.147,113.650,0.000,16.370,16.945
exp76,-0.4133,0.032,-12.921,0.000,-0.476,-0.351
expsq76,0.0928,0.171,0.544,0.586,-0.241,0.427
black,-1.0063,0.088,-11.453,0.000,-1.179,-0.834
reg76r,-0.2671,0.079,-3.400,0.001,-0.421,-0.113
smsa76r,0.3998,0.085,4.723,0.000,0.234,0.566
nearc4a,0.4304,0.086,5.001,0.000,0.262,0.599
nearc4b,0.1226,0.101,1.212,0.225,-0.076,0.321

0,1,2,3
Omnibus:,12.456,Durbin-Watson:,1.767
Prob(Omnibus):,0.002,Jarque-Bera (JB):,12.583
Skew:,0.158,Prob(JB):,0.00185
Kurtosis:,2.969,Cond. No.,64.2


In [6]:
Y = df["lwage76"]
X_exo = df[["const","exp76", "expsq76", "black", "reg76r", "smsa76r"]]
X_endo = df["ed76"]
Z = df[["nearc4a", "nearc4b"]]

In [7]:
model2 = IV2SLS(Y,X_exo,X_endo,Z)
results = model2.fit()
results

0,1,2,3
Dep. Variable:,lwage76,R-squared:,0.1447
Estimator:,IV-2SLS,Adj. R-squared:,0.1430
No. Observations:,3010,F-statistic:,717.93
Date:,"Mon, Mar 29 2021",P-value (F-stat),0.0000
Time:,22:05:53,Distribution:,chi2(6)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,3.2680,0.6821,4.7910,0.0000,1.9311,4.6049
exp76,0.1193,0.0182,6.5681,0.0000,0.0837,0.1549
expsq76,-0.2305,0.0368,-6.2729,0.0000,-0.3026,-0.1585
black,-0.1017,0.0440,-2.3134,0.0207,-0.1879,-0.0155
reg76r,-0.0950,0.0217,-4.3717,0.0000,-0.1376,-0.0524
smsa76r,0.1164,0.0263,4.4327,0.0000,0.0650,0.1679
ed76,0.1611,0.0405,3.9804,0.0001,0.0818,0.2404


Note: these coefficients match those in Tables 12.1, 12.2

### 12.25b

In [8]:
Y = df["ed76"]
X = df[["const","exp76", "expsq76", "black", "reg76r", "smsa76r", "nearc4a", "nearc4b", 'nearc2']]

In [9]:
model3 = OLS(Y,X, hasconst=True)
results = model3.fit(cov_type='HC0')
results.summary()

0,1,2,3
Dep. Variable:,ed76,R-squared:,0.476
Model:,OLS,Adj. R-squared:,0.475
Method:,Least Squares,F-statistic:,461.6
Date:,"Mon, 29 Mar 2021",Prob (F-statistic):,0.0
Time:,22:05:53,Log-Likelihood:,-6260.6
No. Observations:,3010,AIC:,12540.0
Df Residuals:,3001,BIC:,12590.0
Df Model:,8,,
Covariance Type:,HC0,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,16.6343,0.149,111.382,0.000,16.342,16.927
exp76,-0.4128,0.032,-12.916,0.000,-0.475,-0.350
expsq76,0.0895,0.170,0.525,0.600,-0.245,0.423
black,-1.0106,0.088,-11.507,0.000,-1.183,-0.838
reg76r,-0.2603,0.079,-3.304,0.001,-0.415,-0.106
smsa76r,0.3904,0.085,4.570,0.000,0.223,0.558
nearc4a,0.4216,0.086,4.881,0.000,0.252,0.591
nearc4b,0.1301,0.102,1.281,0.200,-0.069,0.329
nearc2,0.0677,0.074,0.912,0.362,-0.078,0.213

0,1,2,3
Omnibus:,12.199,Durbin-Watson:,1.767
Prob(Omnibus):,0.002,Jarque-Bera (JB):,12.319
Skew:,0.156,Prob(JB):,0.00211
Kurtosis:,2.972,Cond. No.,64.6


### 12.25c

In [10]:
df["newvar1"] = df["nearc4a"]*df["age76"]
df["newvar2"] = df["nearc4a"]*df["age76"]*df["age76"]/100

In [11]:
Y = df["ed76"]
X = df[["const","exp76", "expsq76", "black", "reg76r", "smsa76r", "nearc4a", "nearc4b", 'nearc2', "newvar1", 'newvar2']]
model4 = OLS(Y,X, hasconst=True)
results = model4.fit(cov_type='HC0')
results.summary()

0,1,2,3
Dep. Variable:,ed76,R-squared:,0.651
Model:,OLS,Adj. R-squared:,0.649
Method:,Least Squares,F-statistic:,494.4
Date:,"Mon, 29 Mar 2021",Prob (F-statistic):,0.0
Time:,22:05:53,Log-Likelihood:,-5652.1
No. Observations:,3010,AIC:,11330.0
Df Residuals:,2999,BIC:,11390.0
Df Model:,10,,
Covariance Type:,HC0,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,18.4922,0.146,126.265,0.000,18.205,18.779
exp76,-0.6715,0.032,-21.271,0.000,-0.733,-0.610
expsq76,0.5524,0.169,3.278,0.001,0.222,0.883
black,-0.6900,0.075,-9.206,0.000,-0.837,-0.543
reg76r,-0.1980,0.068,-2.924,0.003,-0.331,-0.065
smsa76r,0.2605,0.076,3.427,0.001,0.111,0.409
nearc4a,-28.9819,2.825,-10.259,0.000,-34.519,-23.445
nearc4b,0.1402,0.106,1.326,0.185,-0.067,0.347
nearc2,0.0352,0.061,0.582,0.561,-0.083,0.154

0,1,2,3
Omnibus:,62.541,Durbin-Watson:,1.796
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82.386
Skew:,0.258,Prob(JB):,1.29e-18
Kurtosis:,3.625,Cond. No.,3050.0


### 12.25d

In [12]:
Y = df["lwage76"]
X_exo = df[["const","exp76", "expsq76", "black", "reg76r", "smsa76r"]]
X_endo = df["ed76"]
Z = df[["nearc4a", "nearc4b", "newvar1", "newvar2"]]

model5 = IV2SLS(Y,X_exo,X_endo,Z)
results = model5.fit()
results

0,1,2,3
Dep. Variable:,lwage76,R-squared:,0.2891
Estimator:,IV-2SLS,Adj. R-squared:,0.2877
No. Observations:,3010,F-statistic:,1018.9
Date:,"Mon, Mar 29 2021",P-value (F-stat),0.0000
Time:,22:05:53,Distribution:,chi2(6)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,4.5901,0.1106,41.489,0.0000,4.3733,4.8069
exp76,0.0871,0.0070,12.354,0.0000,0.0733,0.1009
expsq76,-0.2247,0.0320,-7.0284,0.0000,-0.2874,-0.1621
black,-0.1810,0.0180,-10.042,0.0000,-0.2164,-0.1457
reg76r,-0.1219,0.0154,-7.9126,0.0000,-0.1521,-0.0917
smsa76r,0.1570,0.0153,10.277,0.0000,0.1271,0.1870
ed76,0.0825,0.0062,13.275,0.0000,0.0704,0.0947


### 13.28a

In [13]:
Y = df["lwage76"]
X_exo = df[["const","exp76", "expsq76", "black", "reg76r", "smsa76r"]]
X_endo = df["ed76"]
Z = df[["nearc4a", "nearc4b"]]

model6 = IVGMM(Y,X_exo,X_endo,Z)
results = model6.fit()
results

0,1,2,3
Dep. Variable:,lwage76,R-squared:,0.1433
Estimator:,IV-GMM,Adj. R-squared:,0.1416
No. Observations:,3010,F-statistic:,715.88
Date:,"Mon, Mar 29 2021",P-value (F-stat),0.0000
Time:,22:05:53,Distribution:,chi2(6)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,3.2619,0.6827,4.7779,0.0000,1.9238,4.6000
exp76,0.1196,0.0182,6.5755,0.0000,0.0839,0.1552
expsq76,-0.2315,0.0368,-6.2890,0.0000,-0.3037,-0.1594
black,-0.1012,0.0440,-2.2998,0.0215,-0.1874,-0.0150
reg76r,-0.0954,0.0218,-4.3832,0.0000,-0.1380,-0.0527
smsa76r,0.1150,0.0263,4.3813,0.0000,0.0636,0.1665
ed76,0.1615,0.0405,3.9875,0.0001,0.0821,0.2409


### 13.28b

In [14]:
Y = df["lwage76"]
X_exo = df[["const","exp76", "expsq76", "black", "reg76r", "smsa76r"]]
X_endo = df["ed76"]
Z = df[["nearc4a", "nearc4b", "newvar1", "newvar2"]]

model7 = IVGMM(Y,X_exo,X_endo,Z)
results = model7.fit()
results

0,1,2,3
Dep. Variable:,lwage76,R-squared:,0.2886
Estimator:,IV-GMM,Adj. R-squared:,0.2872
No. Observations:,3010,F-statistic:,1020.2
Date:,"Mon, Mar 29 2021",P-value (F-stat),0.0000
Time:,22:05:53,Distribution:,chi2(6)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,4.5699,0.1105,41.345,0.0000,4.3533,4.7866
exp76,0.0876,0.0071,12.425,0.0000,0.0738,0.1015
expsq76,-0.2249,0.0320,-7.0279,0.0000,-0.2877,-0.1622
black,-0.1775,0.0180,-9.8683,0.0000,-0.2127,-0.1422
reg76r,-0.1245,0.0154,-8.0883,0.0000,-0.1547,-0.0943
smsa76r,0.1529,0.0152,10.055,0.0000,0.1231,0.1828
ed76,0.0839,0.0062,13.510,0.0000,0.0717,0.0960


In both cases, the coefficients do not change appreciably.