# Chapter 12

In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

In [2]:
# Exercise 1
fertil3 = pd.read_stata("./stata/FERTIL3.DTA")
X = sm.add_constant(fertil3[["cpe", "cpe_1", "cpe_2"]])
model = sm.OLS(fertil3.cgfr, X, missing = "drop").fit()
sm.OLS(model.resid, model.resid.shift(1), missing = "drop").fit().summary()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.085
Model:,OLS,Adj. R-squared (uncentered):,0.071
Method:,Least Squares,F-statistic:,6.209
Date:,"Sun, 16 May 2021",Prob (F-statistic):,0.0152
Time:,21:04:08,Log-Likelihood:,-183.76
No. Observations:,68,AIC:,369.5
Df Residuals:,67,BIC:,371.7
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.2918,0.117,2.492,0.015,0.058,0.525

0,1,2,3
Omnibus:,4.062,Durbin-Watson:,1.99
Prob(Omnibus):,0.131,Jarque-Bera (JB):,3.962
Skew:,0.233,Prob(JB):,0.138
Kurtosis:,4.087,Cond. No.,1.0


C1. There is evidence of AR(1) serial correlation due to the positive and significant coefficient on the regression between the residuals.

In [3]:
# Exercise 2
wageprc = pd.read_stata("./stata/WAGEPRC.DTA")
X = sm.add_constant(wageprc[["gwage", "gwage_1", "gwage_2", "gwage_3", "gwage_4", "gwage_5", "gwage_6", "gwage_7", "gwage_8", "gwage_9", "gwage_10", "gwage_11", "gwage_12"]])
model = sm.OLS(wageprc.gprice, X, missing = "drop").fit()
model = sm.OLS(model.resid, model.resid.shift(1), missing = "drop").fit()
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.254
Model:,OLS,Adj. R-squared (uncentered):,0.252
Method:,Least Squares,F-statistic:,92.42
Date:,"Sun, 16 May 2021",Prob (F-statistic):,5.14e-19
Time:,21:04:08,Log-Likelihood:,1254.9
No. Observations:,272,AIC:,-2508.0
Df Residuals:,271,BIC:,-2504.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.5027,0.052,9.613,0.000,0.400,0.606

0,1,2,3
Omnibus:,57.698,Durbin-Watson:,2.209
Prob(Omnibus):,0.0,Jarque-Bera (JB):,400.715
Skew:,0.612,Prob(JB):,9.68e-88
Kurtosis:,8.819,Cond. No.,1.0


In [4]:
# Define a couple of helper functions to do answer parts (ii) and (iii)
# FGLS_given_rho applies FGLS for an estimated ρ helper to next function
# FGLS_AR1 is the main function to apply FGLS

def FGLS_given_rho(model, ρ, pw = False):
    """
    Estimate AR(1) model with given ρ by Feasible GLS
    
    model -- OLS model with AR(1) strucutre
    ρ     -- Estimate for rho
    pw    -- Prais-Winsten method (i.e. Don't drop first observation)
    
    Returns statsmodels model
    """
    y = model.model.endog
    X = model.model.exog
    
    # Subtract lag weighted by ρ
    y_tilde = y[1:] - (ρ * y[:-1])
    X_tilde = X[1:,] - (ρ * X[:-1,])
    
    # Improve efficiency with Prias-Winsten if specified
    if pw:
        y_tilde = np.append(np.sqrt(1 - ρ**2) * y[0], y_tilde)
        x_tilde = np.append([np.sqrt(1 - ρ**2) * X[0,]], x_tilde, axis = 0)
        
    # Estimate FGLS
    new_model = sm.OLS(y_tilde, X_tilde).fit()
    return new_model

def FGLS_AR1(model, pw = False, tol = 1.0e-5, max_iter = 100):
    """
    Iteratively estimate AR(1) model with unknown ρ by Feasible GLS
    
    model    -- OLS model with AR(1)
    pw       -- Prais-Winsten method (i.e. Don't drop first observation)
    tol      -- Tolerance level (default from numpy.isclose)
    max_iter -- Maximum number of iterations
    """
    y = model.model.endog
    X = model.model.exog
    
    # Estimate ρ by regressing on lagged residual
    u0, u1 = model.resid, model.resid.shift(1) # Get residuals and lag 
    ρ = sm.OLS(u0, u1, missing = "drop").fit().params[0]
    
    # Iterate until within tolerance or at specified end
    diff = 100
    while ((diff > tol) and (max_iter > 0)):
        new_model = FGLS_given_rho(model, ρ, pw)
        resids = (y - new_model.predict(X))
        u0, u1 = resids[1:], resids[:-1] # Get new residuals
        ρ_new = sm.OLS(u0, u1, missing = "drop").fit().params[0]
        diff = np.abs(ρ - ρ_new)
        ρ = ρ_new
        print("ρ = ", ρ)
        max_iter -= 1
        
    # Found ρ return model
    new_model = FGLS_given_rho(model, ρ, pw)
    return(new_model)

In [5]:
# Estimate Cochrane-Orcutt LRP
model = sm.OLS(wageprc.gprice, X, missing = "drop").fit()
FGLS_AR1(model).params[1:].sum()

ρ =  0.5085439956656537
ρ =  0.5086441241680572
ρ =  0.5086458513625196


1.1097786906548022

In [6]:
# Subtract gwage_t from all lagged columns
X = np.concatenate((X.iloc[:,:2].to_numpy(), X.iloc[:,2:].sub(X["gwage"], axis = 0).to_numpy()), axis = 1)
model = sm.OLS(wageprc.gprice, X, missing = "drop").fit()
model = FGLS_AR1(model)

# Coefficient for x2 should be equal to LRP
model.summary()

ρ =  0.5085439956656534
ρ =  0.5086441241680568
ρ =  0.5086458513625197


0,1,2,3
Dep. Variable:,y,R-squared:,0.151
Model:,OLS,Adj. R-squared:,0.108
Method:,Least Squares,F-statistic:,3.516
Date:,"Sun, 16 May 2021",Prob (F-statistic):,4.55e-05
Time:,21:04:09,Log-Likelihood:,1255.4
No. Observations:,272,AIC:,-2483.0
Df Residuals:,258,BIC:,-2432.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,-0.0006,0.001,-0.612,0.541,-0.003,0.001
x2,1.1098,0.191,5.820,0.000,0.734,1.485
x3,0.0726,0.040,1.815,0.071,-0.006,0.151
x4,0.0239,0.041,0.582,0.561,-0.057,0.105
x5,0.0334,0.041,0.814,0.416,-0.047,0.114
x6,0.0801,0.041,1.952,0.052,-0.001,0.161
x7,0.1132,0.041,2.761,0.006,0.032,0.194
x8,0.0972,0.041,2.357,0.019,0.016,0.178
x9,0.1011,0.041,2.436,0.016,0.019,0.183

0,1,2,3
Omnibus:,57.643,Durbin-Watson:,2.211
Prob(Omnibus):,0.0,Jarque-Bera (JB):,377.017
Skew:,0.634,Prob(JB):,1.35e-82
Kurtosis:,8.626,Cond. No.,653.0


In [7]:
# Test to see if LRP is statistically different than 1

print("t-statistic is:", (model.params[1] - 1) / model.bse[1])

t-statistic is: 0.5757213104123983


C2.i Running the regression on the lagged residuals provides strong evidence for serial correlation at any given significance level

C2.ii The new estimate for the LRP using the Cochrane-Orcutt estimation is 1.110

C2.iii Using the tricks learned earlier we subtract $gwage_t$ from the lagged values. Since the test is to determine if the LRP is statistically different from 1 we can't use the results from summary (which would sho it's statistically different from 0). The t-statistic for the test we are interested in is 0.57