## Problem 2

### (a)

In [1]:
# === Packages ===
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from linearmodels.iv import IV2SLS

path = "PS4data.xls"
raw = pd.read_excel(path)
raw.columns = [c.strip().lower() for c in raw.columns]

Cn = raw['real consumption of nondurables']
Cs = raw['real consumption of services']
pop = raw['population']
Y   = raw['real disposable income']

C_pc = (Cn + Cs) / pop
Y_pc = Y / pop

df_a = pd.DataFrame({
    'C'   : C_pc,
    'C1'  : C_pc.shift(1),
    'C2'  : C_pc.shift(2),
    'C3'  : C_pc.shift(3),
    'C4'  : C_pc.shift(4),
}).dropna()

X_a  = sm.add_constant(df_a[['C1','C2','C3','C4']])
y_a  = df_a['C']
ols_a = sm.OLS(y_a, X_a).fit()   
# ols_a_rob = sm.OLS(y_a, X_a).fit(cov_type='HC1')

R = np.zeros((3, X_a.shape[1]))   
R[0, X_a.columns.get_loc('C2')] = 1
R[1, X_a.columns.get_loc('C3')] = 1
R[2, X_a.columns.get_loc('C4')] = 1
f_test_conventional = ols_a.f_test((R, np.zeros(3)))
# f_test_white        = ols_a_rob.f_test((R, np.zeros(3)))   

print("\n==== (a) OLS with lags of C (levels, per capita) ====")
# print(ols_a.summary())
# print("\nWhite-robust stderrs:")
# print(ols_a_rob.summary())
print("\nF-test H0:b2=b3=b4=0:", f_test_conventional)
# print("\nF-test H0:b2=b3=b4=0 (White-robust):",  f_test_white)


==== (a) OLS with lags of C (levels, per capita) ====

F-test H0:b2=b3=b4=0: <F test: F=8.409996895527895, p=2.636414573619734e-05, df_denom=211, df_num=3>


### (c)

In [17]:
# ====== (c) 2SLS in logs: Δlog C_t on Δlog Y_t, IV = lags of Δlog C ======
lC  = np.log(C_pc)
lY  = np.log(Y_pc)
dlC = lC.diff()
dlY = lY.diff()

# instruments: log(C_{t-2}/C_{t-3}), log(C_{t-3}/C_{t-4}), log(C_{t-4}/C_{t-5}), log(C_{t-5}/C_{t-6})
Z = pd.DataFrame({
    'z1': dlC.shift(2),   
    'z2': dlC.shift(3),
    'z3': dlC.shift(4),
    'z4': dlC.shift(5),
})

data_c = pd.concat([dlC.rename('dlC'), dlY.rename('dlY'), Z], axis=1).dropna()

iv_res = IV2SLS.from_formula(
    formula="dlC ~ 1 + [dlY ~ z1 + z2 + z3 + z4]",
    data=data_c
).fit(cov_type="robust")  # White-robust

print("\n==== (c) 2SLS (logs, White-robust) ====")
print(iv_res.summary)


==== (c) 2SLS (logs, White-robust) ====
                          IV-2SLS Estimation Summary                          
Dep. Variable:                    dlC   R-squared:                      0.0685
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0641
No. Observations:                 214   F-statistic:                    4.6529
Date:                Thu, Nov 20 2025   P-value (F-stat)                0.0310
Time:                        14:18:41   Distribution:                  chi2(1)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.0026     0.0012     2.2579     0.0240      0.0003      0.0049
dlY        

### (d)

In [9]:
def _get_test(res, nm):
    obj = None
    if hasattr(res, nm):
        obj = getattr(res, nm)
        obj = obj() if callable(obj) else obj 
    if obj is None:
        return None
    stat = getattr(obj, 'stat', None)
    if stat is None: stat = getattr(obj, 'statistic', None)
    pval = getattr(obj, 'pval', None)
    if pval is None: pval = getattr(obj, 'pvalue', None)
    df   = getattr(obj, 'df',   None)
    return float(stat), float(pval), int(df)

durbin = _get_test(iv_res, 'durbin')
print("Durbin test:", durbin)

wu = _get_test(iv_res, 'wu_hausman')
print("Wu–Hausman test:", wu)

overid = _get_test(iv_res, 'sargan')
print("Over-ID test (Sargan):", overid)

Durbin test: (3.378081230885269, 0.06606893034867656, 1)
Wu–Hausman test: (3.3841451254564863, 0.0672312418410439, 1)
Over-ID test (Sargan): (7.738279229290716, 0.05174198783912787, 3)


In [16]:
from linearmodels.iv import IVGMM

res_gmm = IVGMM.from_formula('dlC ~ 1 + [dlY ~ z1 + z2 + z3 + z4]', data=data_c).fit()
print(res_gmm.j_stat.stat, res_gmm.j_stat.pval)                # Hansen J

5.087016998755816 0.16553510320539822


## Porblem 3

### (d)

In [16]:
import pandas as pd
import numpy as np

# Load
df = pd.read_excel("SP500Index.xlsx")
sp = pd.to_numeric(df['Level of the S&P 500 Index'], errors='coerce')

sp = sp.dropna().astype(float).to_numpy()

# x_t = log(SP_t / SP_0), monthly log-returns Δx_t
x  = np.log(sp) - np.log(sp[0])
dx = np.diff(x)

T = dx.size
delta_hat = dx.mean()                                     # MLE for monthly drift
sigma_hat = np.sqrt(((dx - delta_hat)**2).mean())         # MLE for monthly vol (divide by T)

print(f"T (monthly returns) = {T}")
print(f"Monthly drift  (delta_hat) = {delta_hat:.6f}")
print(f"Monthly vol    (sigma_hat) = {sigma_hat:.6f}")

T (monthly returns) = 701
Monthly drift  (delta_hat) = 0.005548
Monthly vol    (sigma_hat) = 0.042180
