In [1]:
using Pkg
# Pkg.activate(joinpath(pwd(),".."))
# Pkg.add("Optim")
# Pkg.add("ForwardDiff")
# Pkg.add("PrettyTables")
# Pkg.add("Parameters")
# Pkg.add("Distributions")

In [2]:
using Parameters, Optim, ForwardDiff, LinearAlgebra, Distributions, Random, PrettyTables

In [3]:
Random.seed!(123)

MersenneTwister(123)

In [4]:
const beta  = 1
const pi = [1,1]
const gamma = [1,1]
const rho = 0.95
const n = 500
const R = 10000

10000

(a) Define the sample generating function.

In [5]:
function generate_data_Q1(n)
    #Define the Multivariate Normal Distribution instance
    mvnormal = MvNormal([0;0], [1 rho ; rho 1])
    mvnormal_z = MvNormal([0;0], [1 0;0 1])

    #Matrix Z
    Z = rand(mvnormal_z,n)'
    error = rand(mvnormal,n)'

    epsilon = error[:,1]
    V = error[:,2]
    
    U = exp.(Z*gamma).* epsilon

    X = Z * pi + V

    Y = X * beta + U

    return ( Y = Y, X = X , Z = Z)
end

generate_data_Q1 (generic function with 1 method)

(b) Define the 2SLS estimator

In [6]:
function est2SLS(y, x, z)

    # Set some parameter
    n=size(y,1)
    Q = (z'* x)/n
    W = inv(z' * z/n)

    #Beta
    
    beta2SLS = (Q' * W * Q)^(-1) * Q' * W * (z' * y)/n

    # Asymptotic variance and standard error  
    u = y - x * beta2SLS
    zu = z .* u
    Omega = (zu' * zu)/n

    var = (Q' * W * Q)^(-1) * Q' * W * Omega * W * Q * (Q' * W * Q)^(-1)
    
    sd2SLS = sqrt(var/n)

    return (beta2SLS=beta2SLS, sd2SLS = sd2SLS)
end

est2SLS (generic function with 1 method)

(c) Define GMM estimator

In [7]:
function estGMM(y,x,z)
    n=size(y,1)
    Q = (z'* x)/n
    W = inv(z' * z/n)

    #First Step
    An = [1 0;0 1]
    betaAn = (Q' * An * Q)^(-1) * Q' * An * (z'* y)/n

    #Second Step
    u = y - x * betaAn
    zu = z .* u
    Omega = (zu' * zu)/n
    betaGMM = (Q' * inv(Omega) * Q)^(-1) * Q' * inv(Omega) * (z' * y)/n

    avar = (Q' * inv(Omega) * Q)^(-1)  
    sdGMM = sqrt(avar)/sqrt(n)

    return (betaGMM =betaGMM, sdGMM = sdGMM)
end

estGMM (generic function with 1 method)

(d) Compute statistics and store all the result

In [8]:
function data(r)
    result = zeros(r,10)
    for i in 1:r
        (Y,X,Z) = generate_data_Q1(n)
        (beta2SLS, sd2SLS) = est2SLS(Y,X,Z)
        (betaGMM, sdGMM) = estGMM(Y,X,Z)
        result[i,1] = abs(beta2SLS - beta)
        result[i,2] = abs(betaGMM - beta)
        result[i,3] = beta2SLS - sd2SLS
        result[i,4] = beta2SLS + sd2SLS
        result[i,5] = betaGMM - sdGMM
        result[i,6] = betaGMM + sdGMM
        result[i,7] = sd2SLS
        result[i,8] = sdGMM
        result[i,9] = abs(beta2SLS - beta) < 1.96 * sd2SLS ? 1 : 0
        result[i,10] = abs(betaGMM - beta) < 1.96 * sdGMM ? 1 : 0
    end
    

    return (result = result)
end

data (generic function with 1 method)

(e) Use the result to generate the table

In [9]:
result = data(R)
table= ["Average Bias" round(mean(result[:,1]), digits = 7)  round(mean(result[:,2]), digits = 7); "Average S.D." round(mean(result[:,7]), digits = 7) round(mean(result[:,8]), digits = 7); "Coverage Probability" round(mean(result[:,9]), digits = 7)    round(mean(result[:,10]), digits = 7)]

header = [" ", "2SLS", "GMM"]
pretty_table(table; header = header)


┌──────────────────────┬──────────┬──────────┐
│[1m                      [0m│[1m     2SLS [0m│[1m      GMM [0m│
├──────────────────────┼──────────┼──────────┤
│         Average Bias │ 0.456862 │ 0.373941 │
│         Average S.D. │ 0.523279 │ 0.447052 │
│ Coverage Probability │    0.955 │   0.9479 │
└──────────────────────┴──────────┴──────────┘


Compared with 2SlS, GMM estimator has less bias and less standard error, which means it's more accurate and more stable. GMM is prefered

In [15]:
result = data(100)
table= ["Average Bias" round(mean(result[:,1]), digits = 7)  round(mean(result[:,2]), digits = 7); "Average S.D." round(mean(result[:,7]), digits = 7) round(mean(result[:,8]), digits = 7); "Coverage Probability" round(mean(result[:,9]), digits = 7)    round(mean(result[:,10]), digits = 7)]

header = [" ", "2SLS", "GMM"]
pretty_table(table; header = header)

┌──────────────────────┬──────────┬──────────┐
│[1m                      [0m│[1m     2SLS [0m│[1m      GMM [0m│
├──────────────────────┼──────────┼──────────┤
│         Average Bias │ 0.413595 │ 0.343832 │
│         Average S.D. │ 0.495333 │ 0.437977 │
│ Coverage Probability │     0.97 │     0.96 │
└──────────────────────┴──────────┴──────────┘


The results are relatively the same, but can sometimes be far away from what we estimated. For example, we expect the coverage probability to be 0.95 according to our setting, but when n = 100, some time we get the number larger or smaller. When n = 500, the coverage probabilities are stick to 0.95.