# Chapter 13

In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Exercise 1
fertil1 = pd.read_stata("./stata/FERTIL1.DTA")
X = sm.add_constant(fertil1[["educ", "age", "agesq", "black", "east", "northcen", "west", "farm", "othrural", "town", "smcity", "y74", "y76", "y78", "y80", "y82", "y84"]])
model = sm.OLS(fertil1.kids, X, missing = "drop").fit()
model.f_test("farm = othrural = town = smcity = 0")

  x = pd.concat(x[::order], 1)


<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=array([[1.15876537]]), p=0.3274579660022206, df_denom=1.11e+03, df_num=4>

In [3]:
model.f_test("east = northcen = west = 0")

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=array([[3.01165586]]), p=0.02925801768068351, df_denom=1.11e+03, df_num=3>

In [4]:
X = sm.add_constant(fertil1[["y74", "y76", "y78", "y80", "y82", "y84"]])
y = model.resid ** 2
model = sm.OLS(y, X, missing = "drop").fit()
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.015
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,2.905
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,0.00816
Time:,03:43:57,Log-Likelihood:,-2940.1
No. Observations:,1129,AIC:,5894.0
Df Residuals:,1122,BIC:,5929.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.2873,0.263,12.512,0.000,2.772,3.803
y74,-1.0722,0.362,-2.959,0.003,-1.783,-0.361
y76,-0.7698,0.374,-2.058,0.040,-1.504,-0.036
y78,-0.9559,0.380,-2.516,0.012,-1.701,-0.210
y80,-1.2110,0.381,-3.182,0.002,-1.958,-0.464
y82,-0.9221,0.356,-2.588,0.010,-1.621,-0.223
y84,-1.3719,0.360,-3.807,0.000,-2.079,-0.665

0,1,2,3
Omnibus:,536.62,Durbin-Watson:,2.014
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2684.707
Skew:,2.233,Prob(JB):,0.0
Kurtosis:,9.093,Cond. No.,8.0


In [5]:
X = sm.add_constant(fertil1[["educ", "age", "agesq", "black", "east", "northcen", "west", "farm", "othrural", "town", "smcity", "y74", "y76", "y78", "y80", "y82", "y84", "y74educ", "y76educ", "y78educ", "y80educ", "y82educ", "y84educ"]])
model = sm.OLS(fertil1.kids, X, missing = "drop").fit()
model.f_test("y74educ = y76educ = y78educ = y80educ = y82educ = y84educ = 0")

  x = pd.concat(x[::order], 1)


<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=array([[1.4835693]]), p=0.1803366345999419, df_denom=1.10e+03, df_num=6>

In [6]:
model.summary()

0,1,2,3
Dep. Variable:,kids,R-squared:,0.136
Model:,OLS,Adj. R-squared:,0.118
Method:,Least Squares,F-statistic:,7.593
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,3.4099999999999995e-23
Time:,03:43:57,Log-Likelihood:,-2086.7
No. Observations:,1129,AIC:,4221.0
Df Residuals:,1105,BIC:,4342.0
Df Model:,23,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-8.4773,3.126,-2.712,0.007,-14.612,-2.343
educ,-0.0225,0.054,-0.420,0.675,-0.128,0.083
age,0.5075,0.139,3.653,0.000,0.235,0.780
agesq,-0.0055,0.002,-3.519,0.000,-0.009,-0.002
black,1.0741,0.174,6.183,0.000,0.733,1.415
east,0.2061,0.133,1.548,0.122,-0.055,0.467
northcen,0.3483,0.121,2.876,0.004,0.111,0.586
west,0.1771,0.167,1.058,0.290,-0.151,0.506
farm,-0.0722,0.148,-0.489,0.625,-0.362,0.217

0,1,2,3
Omnibus:,10.028,Durbin-Watson:,2.013
Prob(Omnibus):,0.007,Jarque-Bera (JB):,10.214
Skew:,0.225,Prob(JB):,0.00606
Kurtosis:,2.883,Cond. No.,136000.0


C1.i Results printed above. F-Statistic is about 1.589 and p-value is about 0.328 (they are not jointly significant)

C1.ii Results printed above. The result has a p-value of 0.029 which is significant at the 5% level.

C1.iii The OLS summary provides us the F-test we are interested in with an F-statistic of 2.905 and p-value of 0.00816. We reject the null hypothesis at the 1% level, which is evidence of heteroskedasticity.

C1.iv Results printed above. The resulting p-value is about 0.180 and so the interaction terms are not jointly significant. These interactions allow the education level to vary for a given year and so the high p-value suggests it may not vary over time. However, some of the interactions are individually significant (78, 82, 84) with coefficients (generally) growing in absolute value over time. A growing link between education and a decrease in fertility seems intuitive, and we could explain the lack significance for the joint significance test through the fact that half the year dummies (generally the earlier half) are not significant.

In [7]:
# Exercise 2
cps78_85 = pd.read_stata("./stata/CPS78_85.DTA")
X = sm.add_constant(cps78_85[["y85", "educ", "y85educ", "exper", "expersq", "union", "female", "y85fem"]])
model = sm.OLS(cps78_85.lwage, X, missing = "drop").fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,lwage,R-squared:,0.426
Model:,OLS,Adj. R-squared:,0.422
Method:,Least Squares,F-statistic:,99.8
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,4.46e-124
Time:,03:43:57,Log-Likelihood:,-574.24
No. Observations:,1084,AIC:,1166.0
Df Residuals:,1075,BIC:,1211.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.4589,0.093,4.911,0.000,0.276,0.642
y85,0.1178,0.124,0.952,0.341,-0.125,0.361
educ,0.0747,0.007,11.192,0.000,0.062,0.088
y85educ,0.0185,0.009,1.974,0.049,0.000,0.037
exper,0.0296,0.004,8.293,0.000,0.023,0.037
expersq,-0.0004,7.75e-05,-5.151,0.000,-0.001,-0.000
union,0.2021,0.030,6.672,0.000,0.143,0.262
female,-0.3167,0.037,-8.648,0.000,-0.389,-0.245
y85fem,0.0851,0.051,1.658,0.098,-0.016,0.186

0,1,2,3
Omnibus:,83.747,Durbin-Watson:,1.918
Prob(Omnibus):,0.0,Jarque-Bera (JB):,317.985
Skew:,-0.271,Prob(JB):,8.920000000000001e-70
Kurtosis:,5.597,Cond. No.,8770.0


In [8]:
cps78_85["y85educ_adj"] = (cps78_85["educ"] - 12) * cps78_85["y85"]
X = sm.add_constant(cps78_85[["y85", "educ", "y85educ_adj", "exper", "expersq", "union", "female", "y85fem"]])
sm.OLS(cps78_85.lwage, X, missing = "drop").fit().summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,lwage,R-squared:,0.426
Model:,OLS,Adj. R-squared:,0.422
Method:,Least Squares,F-statistic:,99.8
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,4.46e-124
Time:,03:43:57,Log-Likelihood:,-574.24
No. Observations:,1084,AIC:,1166.0
Df Residuals:,1075,BIC:,1211.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.4589,0.093,4.911,0.000,0.276,0.642
y85,0.3393,0.034,9.977,0.000,0.273,0.406
educ,0.0747,0.007,11.192,0.000,0.062,0.088
y85educ_adj,0.0185,0.009,1.974,0.049,0.000,0.037
exper,0.0296,0.004,8.293,0.000,0.023,0.037
expersq,-0.0004,7.75e-05,-5.151,0.000,-0.001,-0.000
union,0.2021,0.030,6.672,0.000,0.143,0.262
female,-0.3167,0.037,-8.648,0.000,-0.389,-0.245
y85fem,0.0851,0.051,1.658,0.098,-0.016,0.186

0,1,2,3
Omnibus:,83.747,Durbin-Watson:,1.918
Prob(Omnibus):,0.0,Jarque-Bera (JB):,317.985
Skew:,-0.271,Prob(JB):,8.920000000000001e-70
Kurtosis:,5.597,Cond. No.,5910.0


In [9]:
cps78_85["rwage"] = np.exp(cps78_85["lwage"])
cps78_85.loc[cps78_85.y85 == 1, "rwage"] = cps78_85["rwage"] / 1.65
cps78_85["lrwage"] = np.log(cps78_85["rwage"])
X = sm.add_constant(cps78_85[["y85", "educ", "y85educ", "exper", "expersq", "union", "female", "y85fem"]])
model = sm.OLS(cps78_85.lrwage, X, missing = "drop").fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,lrwage,R-squared:,0.356
Model:,OLS,Adj. R-squared:,0.351
Method:,Least Squares,F-statistic:,74.35
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,1.88e-97
Time:,03:43:57,Log-Likelihood:,-574.24
No. Observations:,1084,AIC:,1166.0
Df Residuals:,1075,BIC:,1211.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.4589,0.093,4.911,0.000,0.276,0.642
y85,-0.3830,0.124,-3.094,0.002,-0.626,-0.140
educ,0.0747,0.007,11.192,0.000,0.062,0.088
y85educ,0.0185,0.009,1.974,0.049,0.000,0.037
exper,0.0296,0.004,8.293,0.000,0.023,0.037
expersq,-0.0004,7.75e-05,-5.151,0.000,-0.001,-0.000
union,0.2021,0.030,6.672,0.000,0.143,0.262
female,-0.3167,0.037,-8.648,0.000,-0.389,-0.245
y85fem,0.0851,0.051,1.658,0.098,-0.016,0.186

0,1,2,3
Omnibus:,83.747,Durbin-Watson:,1.918
Prob(Omnibus):,0.0,Jarque-Bera (JB):,317.985
Skew:,-0.271,Prob(JB):,8.920000000000001e-70
Kurtosis:,5.597,Cond. No.,8770.0


In [10]:
print("Share of union membership in 1978", cps78_85.loc[cps78_85.y85 == 0, "union"].sum() / cps78_85[cps78_85.y85 == 0].shape[0])
print("Share of union membership in 1985", cps78_85.loc[cps78_85.y85 == 1, "union"].sum() / cps78_85[cps78_85.y85 == 1].shape[0])

Share of union membership in 1978 0.3054545454545455
Share of union membership in 1985 0.1797752808988764


In [11]:
X = sm.add_constant(cps78_85[["y85", "educ", "y85educ", "exper", "expersq", "union", "female", "y85fem", "y85union"]])
model = sm.OLS(cps78_85.lwage, X, missing = "drop").fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,lwage,R-squared:,0.426
Model:,OLS,Adj. R-squared:,0.421
Method:,Least Squares,F-statistic:,88.63
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,4.6100000000000004e-123
Time:,03:43:57,Log-Likelihood:,-574.24
No. Observations:,1084,AIC:,1168.0
Df Residuals:,1074,BIC:,1218.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.4588,0.095,4.851,0.000,0.273,0.644
y85,0.1180,0.126,0.934,0.350,-0.130,0.366
educ,0.0747,0.007,11.158,0.000,0.062,0.088
y85educ,0.0185,0.009,1.968,0.049,5.58e-05,0.037
exper,0.0296,0.004,8.289,0.000,0.023,0.037
expersq,-0.0004,7.76e-05,-5.148,0.000,-0.001,-0.000
union,0.2023,0.039,5.176,0.000,0.126,0.279
female,-0.3167,0.037,-8.620,0.000,-0.389,-0.245
y85fem,0.0850,0.052,1.640,0.101,-0.017,0.187

0,1,2,3
Omnibus:,83.76,Durbin-Watson:,1.918
Prob(Omnibus):,0.0,Jarque-Bera (JB):,318.053
Skew:,-0.271,Prob(JB):,8.62e-70
Kurtosis:,5.598,Cond. No.,9010.0


C2.i y85 would be the change in wages for the base group in the end period (1985). The base group is males with no education which doesn't seem to be a demographic of particular interest.

C2.ii From the hint we replace $educ$ in the interaction with $educ - 12$. The confidence interval we want is in $y85$ which is (0.273, 0.406). The estimated percent increase in nominal wages is 34%.

C2.iii The $y85$ coefficient is the only one that has changed between the two equations.

C2.iv The hint tells us the residuals are the same. It must follow then that the total sum of squares between the regressions are different.

C2.v Union membership was almost cut in half between the two periods with 30.6% of observations belonging to a union in 1978 and 18% of observations belonging to a union in 1985.

C2.vi Adding an interaction for union membership with the $y85$ indicator produces a very small coefficient and wide confidence interval. There is no reason to think that the union premium has changed over time.

C2.vii There's no reason to think that a reduction in union membership should decrease the union premium.

In [12]:
# Exercise 3
kielmc = pd.read_stata("./stata/KIELMC.DTA")
X = sm.add_constant(kielmc[["y81", "ldist", "y81ldist"]])
model = sm.OLS(kielmc.lprice, X).fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,lprice,R-squared:,0.396
Model:,OLS,Adj. R-squared:,0.39
Method:,Least Squares,F-statistic:,69.22
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,1.87e-34
Time:,03:43:58,Log-Likelihood:,-109.24
No. Observations:,321,AIC:,226.5
Df Residuals:,317,BIC:,241.6
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,8.0585,0.508,15.850,0.000,7.058,9.059
y81,-0.0113,0.805,-0.014,0.989,-1.595,1.573
ldist,0.3167,0.052,6.145,0.000,0.215,0.418
y81ldist,0.0482,0.082,0.589,0.556,-0.113,0.209

0,1,2,3
Omnibus:,10.892,Durbin-Watson:,1.395
Prob(Omnibus):,0.004,Jarque-Bera (JB):,16.703
Skew:,0.224,Prob(JB):,0.000236
Kurtosis:,4.024,Cond. No.,512.0


In [13]:
X = sm.add_constant(kielmc[["y81", "ldist", "y81ldist", "age", "agesq", "rooms", "baths", "lintst", "lland", "larea"]])
model = sm.OLS(kielmc.lprice, X).fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,lprice,R-squared:,0.787
Model:,OLS,Adj. R-squared:,0.78
Method:,Least Squares,F-statistic:,114.6
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,7.73e-98
Time:,03:43:58,Log-Likelihood:,58.114
No. Observations:,321,AIC:,-94.23
Df Residuals:,310,BIC:,-52.74
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.6739,0.502,15.300,0.000,6.687,8.661
y81,-0.2254,0.495,-0.456,0.649,-1.199,0.748
ldist,0.0009,0.045,0.021,0.984,-0.087,0.089
y81ldist,0.0625,0.050,1.242,0.215,-0.036,0.161
age,-0.0080,0.001,-5.650,0.000,-0.011,-0.005
agesq,3.57e-05,8.71e-06,4.099,0.000,1.86e-05,5.28e-05
rooms,0.0461,0.017,2.660,0.008,0.012,0.080
baths,0.1010,0.028,3.632,0.000,0.046,0.156
lintst,-0.0600,0.032,-1.891,0.060,-0.122,0.002

0,1,2,3
Omnibus:,66.092,Durbin-Watson:,1.691
Prob(Omnibus):,0.0,Jarque-Bera (JB):,350.051
Skew:,-0.716,Prob(JB):,9.72e-77
Kurtosis:,7.912,Cond. No.,261000.0


C3.i $\delta_1$ should be positive since they are further away from the incinerator (which reduces values). $\beta_1 > 0$ would mean that houses further from the incinerator are worth more even before it was placed.

C3.ii Results listed above. In this case $\delta_1 > 0$, is in line with our expectations. However, the t-statistic is small and so there is no evidence the incinerator had any effect on housing prices.

C3.iii Results listed above. After controlling for housing characteristics $\delta_1$ has not changed much in that it is not statistically significant (although the coefficient is higher).

C3.iv The coefficient for $log(dist)$ is likely significant in the simpler regression do to houses further from the site of the incinerator having more of the characteristics associated with high property values. Once the characteristics of the houses are taken into account, the distance from the eventual location of the incenerator does not have any effect (nor should we expect it to).

In [14]:
# Exercise 4
injury = pd.read_stata("./stata/INJURY.DTA")
injury_ky = injury[injury.ky == 1]
X = sm.add_constant(injury_ky[["afchnge", "highearn", "afhigh", "male", "married", "head", "neck", "upextr", "trunk", "lowback", "lowextr", "occdis", "manuf", "construc"]])
model = sm.OLS(injury_ky.ldurat, X, missing = "drop").fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,ldurat,R-squared:,0.041
Model:,OLS,Adj. R-squared:,0.039
Method:,Least Squares,F-statistic:,16.37
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,4.81e-40
Time:,03:43:58,Log-Likelihood:,-8778.2
No. Observations:,5349,AIC:,17590.0
Df Residuals:,5334,BIC:,17690.0
Df Model:,14,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.2459,0.106,11.735,0.000,1.038,1.454
afchnge,0.0106,0.045,0.237,0.813,-0.077,0.099
highearn,0.1758,0.052,3.397,0.001,0.074,0.277
afhigh,0.2309,0.070,3.321,0.001,0.095,0.367
male,-0.0979,0.045,-2.198,0.028,-0.185,-0.011
married,0.1221,0.039,3.121,0.002,0.045,0.199
head,-0.5139,0.129,-3.975,0.000,-0.767,-0.260
neck,0.2699,0.161,1.671,0.095,-0.047,0.586
upextr,-0.1785,0.101,-1.765,0.078,-0.377,0.020

0,1,2,3
Omnibus:,23.922,Durbin-Watson:,1.935
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32.504
Skew:,-0.023,Prob(JB):,8.74e-08
Kurtosis:,3.379,Cond. No.,29.5


In [15]:
injury_mi = injury[injury.mi == 1]
X = sm.add_constant(injury_mi[["afchnge", "highearn", "afhigh"]])
model = sm.OLS(injury_mi.ldurat, X, missing = "drop").fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,ldurat,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,6.049
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,0.00043
Time:,03:43:58,Log-Likelihood:,-2647.4
No. Observations:,1524,AIC:,5303.0
Df Residuals:,1520,BIC:,5324.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.4127,0.057,24.908,0.000,1.301,1.524
afchnge,0.0974,0.085,1.149,0.251,-0.069,0.264
highearn,0.1691,0.106,1.602,0.109,-0.038,0.376
afhigh,0.1920,0.154,1.245,0.213,-0.110,0.494

0,1,2,3
Omnibus:,21.618,Durbin-Watson:,1.954
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33.331
Skew:,0.118,Prob(JB):,5.79e-08
Kurtosis:,3.685,Cond. No.,6.16


C4.i The interaction term is still statistically significant (at the 1% level) and the coefficient is higher.

C4.ii The small R-squared indicates that only a small amount of the variation has been explained by the equation. This does not mean the equation is useless since we have no reason to believe that the omitted variables are correlated with any of the variables we have included.

C4.iii The coefficient is not statistically significant. The coefficient is reasonably close. The difference in significance may be because Kentucky has three times as many observations as Michigan, which allowed us to more precisely estimate the interaction.

In [16]:
# Exercise 5
rental = pd.read_stata("./stata/RENTAL.DTA")
X = sm.add_constant(rental[["y90", "lpop", "lavginc", "pctstu"]])
model = sm.OLS(rental.lrent, X, missing = "drop").fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,lrent,R-squared:,0.861
Model:,OLS,Adj. R-squared:,0.857
Method:,Least Squares,F-statistic:,190.9
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,9.41e-52
Time:,03:43:58,Log-Likelihood:,86.161
No. Observations:,128,AIC:,-162.3
Df Residuals:,123,BIC:,-148.1
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.5688,0.535,-1.063,0.290,-1.628,0.490
y90,0.2622,0.035,7.543,0.000,0.193,0.331
lpop,0.0407,0.023,1.807,0.073,-0.004,0.085
lavginc,0.5714,0.053,10.762,0.000,0.466,0.677
pctstu,0.0050,0.001,4.949,0.000,0.003,0.007

0,1,2,3
Omnibus:,34.539,Durbin-Watson:,1.236
Prob(Omnibus):,0.0,Jarque-Bera (JB):,58.256
Skew:,1.255,Prob(JB):,2.24e-13
Kurtosis:,5.15,Cond. No.,1620.0


In [42]:
rental_panel = rental.loc[rental.y90 == 1].set_index("city") - rental.loc[rental.y90 == 0].set_index("city")
X = rental_panel[["y90", "lpop", "lavginc", "pctstu"]]
model = sm.OLS(rental_panel.lrent, X, missing = "drop").fit()
model.summary()

0,1,2,3
Dep. Variable:,lrent,R-squared:,0.322
Model:,OLS,Adj. R-squared:,0.288
Method:,Least Squares,F-statistic:,9.51
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,3.14e-05
Time:,03:56:16,Log-Likelihood:,65.272
No. Observations:,64,AIC:,-122.5
Df Residuals:,60,BIC:,-113.9
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
y90,0.3855,0.037,10.469,0.000,0.312,0.459
lpop,0.0722,0.088,0.818,0.417,-0.104,0.249
lavginc,0.3100,0.066,4.663,0.000,0.177,0.443
pctstu,0.0112,0.004,2.711,0.009,0.003,0.019

0,1,2,3
Omnibus:,2.653,Durbin-Watson:,1.655
Prob(Omnibus):,0.265,Jarque-Bera (JB):,2.335
Skew:,0.467,Prob(JB):,0.311
Kurtosis:,2.934,Cond. No.,23.0


In [46]:
# Testing to see if linearmodels can do the same thing
# What is up with this R^2?

from linearmodels import FirstDifferenceOLS
other_rental_panel = rental.set_index(["city", "year"])
X = other_rental_panel[["y90", "lpop", "lavginc", "pctstu"]]
FirstDifferenceOLS(other_rental_panel.lrent, X).fit()

  df.index = df.index.set_levels(final_levels, [0, 1])


0,1,2,3
Dep. Variable:,lrent,R-squared:,0.9765
Estimator:,FirstDifferenceOLS,R-squared (Between):,0.9391
No. Observations:,64,R-squared (Within):,0.9765
Date:,"Tue, Jul 27 2021",R-squared (Overall):,0.9392
Time:,03:58:22,Log-likelihood,65.272
Cov. Estimator:,Unadjusted,,
,,F-statistic:,624.15
Entities:,64,P-value,0.0000
Avg Obs:,2.0000,Distribution:,"F(4,60)"
Min Obs:,2.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
y90,0.3855,0.0368,10.469,0.0000,0.3119,0.4592
lpop,0.0722,0.0883,0.8178,0.4167,-0.1045,0.2490
lavginc,0.3100,0.0665,4.6627,0.0000,0.1770,0.4429
pctstu,0.0112,0.0041,2.7114,0.0087,0.0029,0.0195


In [49]:
model.get_robustcov_results("HC3").summary()

0,1,2,3
Dep. Variable:,lrent,R-squared:,0.322
Model:,OLS,Adj. R-squared:,0.288
Method:,Least Squares,F-statistic:,9.598
Date:,"Tue, 27 Jul 2021",Prob (F-statistic):,2.88e-05
Time:,04:16:10,Log-Likelihood:,65.272
No. Observations:,64,AIC:,-122.5
Df Residuals:,60,BIC:,-113.9
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
y90,0.3855,0.056,6.935,0.000,0.274,0.497
lpop,0.0722,0.074,0.980,0.331,-0.075,0.220
lavginc,0.3100,0.102,3.041,0.003,0.106,0.514
pctstu,0.0112,0.003,3.602,0.001,0.005,0.017

0,1,2,3
Omnibus:,2.653,Durbin-Watson:,1.655
Prob(Omnibus):,0.265,Jarque-Bera (JB):,2.335
Skew:,0.467,Prob(JB):,0.311
Kurtosis:,2.934,Cond. No.,23.0


In [52]:
FirstDifferenceOLS(other_rental_panel.lrent, X).fit(cov_type = "robust")

0,1,2,3
Dep. Variable:,lrent,R-squared:,0.9765
Estimator:,FirstDifferenceOLS,R-squared (Between):,0.9391
No. Observations:,64,R-squared (Within):,0.9765
Date:,"Tue, Jul 27 2021",R-squared (Overall):,0.9392
Time:,04:19:12,Log-likelihood,65.272
Cov. Estimator:,Robust,,
,,F-statistic:,624.15
Entities:,64,P-value,0.0000
Avg Obs:,2.0000,Distribution:,"F(4,60)"
Min Obs:,2.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
y90,0.3855,0.0487,7.9132,0.0000,0.2881,0.4830
lpop,0.0722,0.0697,1.0368,0.3040,-0.0671,0.2116
lavginc,0.3100,0.0893,3.4706,0.0010,0.1313,0.4886
pctstu,0.0112,0.0029,3.8159,0.0003,0.0053,0.0171


C5.i Results reported above. The $y90$ dummy is 0.262 and significant at the 1% level. This would mean that rents have increased over 10 years.

C5.ii Given that we started with an unobserved effects model it would suggest that we should not trust the standard errors. This is because the pooled OLS does not account for the unobserved effects which would mean there are endogeneity issues.

C5.iii the coefficient for $pctstu$ increases from 0.005 to 0.0112 and is still significant (though with a smaller t-statistic than the previous one). We may conclude that housing prices increase with the size of the student population, provided that there are not time varying factors that might be affecting $pctstu$ that we have not observed.

C5.iv The error becomes even smaller after adjusting for heteroskedasticity. This is generally encouraging for our results, but would not address the concern for time varying unobserved factors.

In [None]:
# Exercise 6
crime3 = pd.read_stata("./stata/CRIME3.DTA")
X = sm.add_constant(crime3[["lolyolo420!"]])
model = sm.OLS(crime3.allthecrimes, X, missing = "drop").fit()
model.summary()