In [28]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import statsmodels.api as sm
import os
import sympy as sp

In [2]:
df = pd.read_csv('card_wage_2008.txt', sep=r'\s+', header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,3.1724,2.8375,3.7785,3.5897,3.4087,3.1824,3.6491,2.6932,3.0921,3.0286,3.3037
1,4.1398,7.9196,6.7298,5.6753,4.223,2.3601,3.4464,4.6142,5.5848,4.3622,9.2881
2,5.295,4.9544,4.8649,5.3206,5.7243,4.4856,4.4806,5.1001,5.0569,7.0589,7.0985
3,3.8113,3.8997,4.3631,4.515,4.8445,4.6112,5.1696,4.7046,4.8564,4.7662,5.3897
4,2.2322,1.9871,2.4561,2.2988,3.7832,3.9397,4.1684,3.8104,3.9173,3.2493,2.8749


In [3]:
print("################ part a ################")
shifted_df = df.shift(axis=1)
factor_change = (df > shifted_df * 10) | (df < shifted_df / 10)
rows_to_drop = factor_change.any(axis=1)
df = df[~rows_to_drop].reset_index(drop=True)
df.shape

################ part a ################


(1302, 11)

In [37]:
print("################ part d ################")
theta = 0.3
num_individuals = df.shape[0]
total_moments_psi1 = 0
total_moments_psi2 = 0

for i in range(num_individuals):
    Y_i = df.iloc[i, :4]  # wage data for first 4 years

    # psi_1
    psi_13 = Y_i[0] * (Y_i[2] - Y_i[1] - theta * (Y_i[1] - Y_i[0]))
    psi_14 = Y_i[0] * (Y_i[3] - Y_i[2] - theta * (Y_i[2] - Y_i[1]))
    total_moments_psi1 += (psi_13 + psi_14)

    # psi_2
    psi_23 = (Y_i[1] - Y_i[0]) * (Y_i[2] - theta * Y_i[1])
    psi_24 = (Y_i[1] - Y_i[0]) * (Y_i[3] - theta * Y_i[2])
    psi_34 = (Y_i[2] - Y_i[1]) * (Y_i[3] - theta * Y_i[2])
    total_moments_psi2 += (psi_23 + psi_24 + psi_34)

# Calculate average moments
avg_moments_psi1 = total_moments_psi1 / num_individuals
avg_moments_psi2 = total_moments_psi2 / num_individuals

print("Average psi1 at θ = 0.3:", avg_moments_psi1)
print("Average psi2 at θ = 0.3:", avg_moments_psi2)

################ part d ################
Average psi1 at θ = 0.3: 1.0761692899877084
Average psi2 at θ = 0.3: 1.569793182499232


In [36]:
print("################ part e ################")
moments = np.array([avg_moments_psi1, avg_moments_psi2]) # array of average moments from part d
W = np.identity(len(moments)) # identity matrix as weight matrix (2x2 because we have 2 moment conditions)

# GMM objective function Q(θ)= g_bar(θ)' * W * g_bar(θ)
Q_theta = moments.T @ W @ moments

print("objective function value at θ = 0.3:", Q_theta)

################ part e ################
objective function value at θ = 0.3: 3.6223909765337154


In [41]:
print("################ part f ################")
total_d_psi1_dtheta = 0
total_d_psi2_dtheta = 0

for i in range(num_individuals):
    Y_i = df.iloc[i, :4]

    # psi_1 derivatives
    d_psi13_dtheta = -Y_i[0] * (Y_i[1] - Y_i[0])
    d_psi14_dtheta = -Y_i[0] * (Y_i[2] - Y_i[1])
    total_d_psi1_dtheta += (d_psi13_dtheta + d_psi14_dtheta)

    # psi_2 derivatives
    d_psi23_dtheta = - (Y_i[1] - Y_i[0]) * Y_i[1]
    d_psi24_dtheta = - (Y_i[1] - Y_i[0]) * Y_i[2]
    d_psi34_dtheta = - (Y_i[2] - Y_i[1]) * Y_i[2]
    total_d_psi2_dtheta += (d_psi23_dtheta + d_psi24_dtheta + d_psi34_dtheta)

# average derivatives
avg_d_psi1_dtheta = total_d_psi1_dtheta / num_individuals
avg_d_psi2_dtheta = total_d_psi2_dtheta / num_individuals
avg_d_psi_dtheta = np.array([avg_d_psi1_dtheta, avg_d_psi2_dtheta]) # average derivative vector

# derivative of the objective function
dQ_dtheta = 2 * avg_d_psi_dtheta.T @ W @ moments
print(f"Derivative of the objective function at θ = 0.3: {dQ_dtheta}")

################ part f ################
Derivative of the objective function at θ = 0.3: -10.682992263540516


In [49]:
print("################ part g ################")
# define GMM objective function using code from above
def gmm_objective(theta, data, W=np.eye(2)):
    num_individuals = data.shape[0]
    total_psi1 = 0
    total_psi2 = 0

    for i in range(num_individuals):
        Y_i = data.iloc[i, :4]

        # psi_1
        psi_13 = Y_i[0] * (Y_i[2] - Y_i[1] - theta * (Y_i[1] - Y_i[0]))
        psi_14 = Y_i[0] * (Y_i[3] - Y_i[2] - theta * (Y_i[2] - Y_i[1]))
        total_psi1 += (psi_13 + psi_14)

        # psi_2
        psi_23 = (Y_i[1] - Y_i[0]) * (Y_i[2] - theta * Y_i[1])
        psi_24 = (Y_i[1] - Y_i[0]) * (Y_i[3] - theta * Y_i[2])
        psi_34 = (Y_i[2] - Y_i[1]) * (Y_i[3] - theta * Y_i[2])
        total_psi2 += (psi_23 + psi_24 + psi_34)

    avg_psi1 = total_psi1 / num_individuals
    avg_psi2 = total_psi2 / num_individuals
    moments = np.array([avg_psi1, avg_psi2])
    Q_theta = moments.T @ W @ moments

    return Q_theta

# minimize GMM objective function starting from θ = 0.3
result = minimize(gmm_objective, x0=0.3, args=(df,), bounds=[(0, 1)], method='L-BFGS-B')

# get theta hat
theta_hat = result.x[0]
print(f"Estimated value of θ: {theta_hat}")

################ part g ################
Estimated value of θ: 0.7271108240545127


In [50]:
print("################ part h ################")
# function to compute the moment conditions for all individuals
def calculate_moments(theta, data):
    num_individuals = data.shape[0]
    moments = []

    for i in range(num_individuals):
        Y_i = data.iloc[i, :4]

        # Calculate individual moment functions
        psi_13 = Y_i[0] * (Y_i[2] - Y_i[1] - theta * (Y_i[1] - Y_i[0]))
        psi_14 = Y_i[0] * (Y_i[3] - Y_i[2] - theta * (Y_i[2] - Y_i[1]))
        psi_23 = (Y_i[1] - Y_i[0]) * (Y_i[2] - theta * Y_i[1])
        psi_24 = (Y_i[1] - Y_i[0]) * (Y_i[3] - theta * Y_i[2])
        psi_34 = (Y_i[2] - Y_i[1]) * (Y_i[3] - theta * Y_i[2])

        # vector of moment functions
        moments_i = np.array([psi_13 + psi_14, psi_23 + psi_24 + psi_34])
        moments.append(moments_i)

    return np.array(moments)

moments_at_theta_hat = calculate_moments(theta_hat, df) # moments at theta_hat
cov_matrix = np.cov(moments_at_theta_hat, rowvar=False) # variance-covariance matrix of moment conditions

# calculate optimal weight matrix as the inverse of the covariance matrix
optimal_weight_matrix = np.linalg.inv(cov_matrix)
print("Optimal Weight Matrix:")
print(optimal_weight_matrix)

################ part h ################
Optimal Weight Matrix:
[[0.00533503 0.00158904]
 [0.00158904 0.00669447]]


In [53]:
print("################ part i ################")
# second-step GMM: use the optimal weight matrix from part h
result_second_step = minimize(
    gmm_objective,
    x0=theta_hat,  # start from first-step estimate
    args=(df, optimal_weight_matrix),  # pass the optimal weight matrix
    bounds=[(0, 1)],
    method='L-BFGS-B'
)

theta_hat_second_step = result_second_step.x[0]
print(f"Second-step GMM estimate of θ: {theta_hat_second_step}")

################ part i ################
Second-step GMM estimate of θ: 0.8100605170108355
