# Setting the Environment

In [65]:
using Pkg
Pkg.activate(joinpath(pwd(),".."))
using Random, Distributions, PrettyTables
Random.seed!(1234)

[32m[1m  Activating[22m[39m environment at `~/Desktop/ECON 627/Project.toml`


MersenneTwister(1234)

In [18]:
const ρ = 0.9
const β = 0.15

0.15

# Part a - c: Data Generating

In [173]:
function gendata(n)
    #generate W and z
    W = rand(n)
    Z = -0.5 .*(W .< 0.2) - 0.1 .*(W .>= 0.2).* (W .< 0.4) + 0.1 .*(W .>= 0.4) .*(W.<0.6) + (W.>=0.6)

    #generate ϵ, v and u
    mvnormal = MvNormal([0;0], [1 ρ ; ρ 1])
    err = rand(mvnormal, n)'

    ϵ = err[:,1]
    V = err[:,2]

    U = (1 .+ Z) .* ϵ

    #generate X and y
    X = 4 .* Z.^2 .+ V
    Y = β .* X .+ U

    #generate g(Z)
    gZ = (4 * Z.^2 ) ./ ((1 .+ Z).^2)

    return (Y = Y, X = X, Z = Z, gZ = gZ)
end


gendata (generic function with 1 method)

# Part d: Estimations

In [134]:
# A funciton for 2SLS estimator
function est2SLS(y, x, z)
    # Set some parameter
    n = size(y,1)
    Q = (z'* x)/n
    W = inv(z' * z/n)

    #Beta
    beta2SLS = (Q' * W * Q)^(-1) * Q' * W * (z' * y)/n

    # Asymptotic variance and standard error  
    u = y - x * beta2SLS
    zu = z .* u
    Omega = (zu' * zu)/n

    var = (Q' * W * Q)^(-1) * Q' * W * Omega * W * Q * (Q' * W * Q)^(-1)
    
    sd2SLS = sqrt(var/n)


    return (beta2SLS=beta2SLS, sd2SLS = sd2SLS)
end

est2SLS (generic function with 1 method)

In [156]:
# A function for estimating the feasible IV
function feasibleIV(Y,X,Z)
    Zd = [1*(Z.==-0.5) 1*(Z.==-0.1) 1*(Z.==0.1) 1*(Z.==1)]
    #E(X|Z)
    EXZ =  (Zd' * Zd)^(-1) * Zd' * X 

    #E(U^2|Z)
    (beta_hat, sd)= est2SLS(Y, X, Z)
    U_hat = (Y .- X .* beta_hat).^2
    EUZ =  (Zd' * Zd)^(-1) * Zd' * U_hat 

    #gZ2
    gZ2 = Zd * (EXZ ./ EUZ) 

    return (gZ2 = gZ2)
end

feasibleIV (generic function with 1 method)

# Part e-g: Iterations and Results

In [140]:
# A function that automatically gives the result table 

function result(n,R)
    result = zeros(R,24)
    z1 = quantile(Normal(0,1), 1 - 0.1/2)
    z2 = quantile(Normal(0,1), 1 - 0.05/2)
    z3 = quantile(Normal(0,1), 1 - 0.01/2)

    for i in 1:R
        (Y, X, Z, gZ) = gendata(n)
        gZ2 = feasibleIV(Y,X,Z)

        #Store all the estimation results
        (beta1, sd1) = est2SLS(Y, X, Z)
        (beta2, sd2) = est2SLS(Y, X, gZ)
        (beta3, sd3) = est2SLS(Y, X, gZ2)
        result[i,1] = beta1
        result[i,2] = beta2
        result[i,3] = beta3
        result[i,4] = sd1
        result[i,5] = sd2
        result[i,6] = sd3

        #Store the coverage result
        result[i,7] = abs(beta1 - β) < z1 * sd1  ? 1 : 0
        result[i,8] = abs(beta2 - β) < z1 * sd2  ? 1 : 0
        result[i,9] = abs(beta3 - β) < z1 * sd3  ? 1 : 0
        result[i,10] = abs(beta1 - β) < z2 * sd1  ? 1 : 0
        result[i,11] = abs(beta2 - β) < z2 * sd2  ? 1 : 0
        result[i,12] = abs(beta3 - β) < z2 * sd3  ? 1 : 0
        result[i,13] = abs(beta1 - β) < z3 * sd1  ? 1 : 0
        result[i,14] = abs(beta2 - β) < z3 * sd2  ? 1 : 0
        result[i,15] = abs(beta3 - β) < z3 * sd3  ? 1 : 0
        result[i,16] = abs(beta1 - 0) > z1 * sd1  ? 1 : 0
        result[i,17] = abs(beta2 - 0) > z1 * sd2  ? 1 : 0
        result[i,18] = abs(beta3 - 0) > z1 * sd3  ? 1 : 0
        result[i,19] = abs(beta1 - 0) > z2 * sd1  ? 1 : 0
        result[i,20] = abs(beta2 - 0) > z2 * sd2  ? 1 : 0
        result[i,21] = abs(beta3 - 0) > z2 * sd3  ? 1 : 0
        result[i,22] = abs(beta1 - 0) > z3 * sd1  ? 1 : 0
        result[i,23] = abs(beta2 - 0) > z3 * sd2  ? 1 : 0
        result[i,24] = abs(beta3 - 0) > z3 * sd3  ? 1 : 0
    end

    #Use tables to compute the probabilities and generate results
    table1 = ["α = 0.1" mean(result[:,7]) mean(result[:,8]) mean(result[:,9]); "α = 0.05" mean(result[:,10]) mean(result[:,11]) mean(result[:,12]); "α = 0.01" mean(result[:,13]) mean(result[:,14]) mean(result[:,15])] 

    table2 = ["α = 0.1" z1*mean(result[:,4])*2 z1*mean(result[:,5])*2 z1*mean(result[:,6])*2  ; "α = 0.05" z2*mean(result[:,4])*2 z2*mean(result[:,5])*2 z2*mean(result[:,6])*2; "α = 0.01" z3*mean(result[:,4])*2 z3*mean(result[:,5])*2 z3*mean(result[:,6])*2 ] 

    table3 = ["α = 0.1" mean(result[:,16]) mean(result[:,17]) mean(result[:,18]); "α = 0.05" mean(result[:,19]) mean(result[:,20]) mean(result[:,21]); "α = 0.01" mean(result[:,22]) mean(result[:,23]) mean(result[:,24])] 

    header = [" ", "2SLS", "Infeasible", " Feasible"]


    println("Table 1 : Coverage probability" )
    pretty_table(table1; header = header)

    println("Table 2 : Length of confidence intervals" )
    pretty_table(table2; header = header)

    println("Table 3 : Probability of statistically signifficant" )
    pretty_table(table3; header = header)
end 

result (generic function with 1 method)

In [174]:
#Run for 100 sample size and 10000 iterations
result(100,10000)

Table 1 : Coverage probability
┌──────────┬────────┬────────────┬───────────┐
│[1m          [0m│[1m   2SLS [0m│[1m Infeasible [0m│[1m  Feasible [0m│
├──────────┼────────┼────────────┼───────────┤
│  α = 0.1 │ 0.8893 │     0.8891 │    0.8673 │
│ α = 0.05 │ 0.9372 │     0.9422 │    0.9218 │
│ α = 0.01 │ 0.9852 │     0.9849 │    0.9741 │
└──────────┴────────┴────────────┴───────────┘
Table 2 : Length of confidence intervals
┌──────────┬──────────┬────────────┬───────────┐
│[1m          [0m│[1m     2SLS [0m│[1m Infeasible [0m│[1m  Feasible [0m│
├──────────┼──────────┼────────────┼───────────┤
│  α = 0.1 │ 0.277435 │   0.212882 │  0.204619 │
│ α = 0.05 │ 0.330584 │   0.253664 │  0.243818 │
│ α = 0.01 │ 0.434461 │   0.333371 │  0.320431 │
└──────────┴──────────┴────────────┴───────────┘
Table 3 : Probability of statistically signifficant
┌──────────┬────────┬────────────┬───────────┐
│[1m          [0m│[1m   2SLS [0m│[1m Infeasible [0m│[1m  Feasible [0m│
├──────────┼──

# Part h


All the method seems to have coverage probablity close to $1 - \alpha$. As we can see in the average confidence interval table, the infeasible IV gives much smaller confidence inteval. Therefore, the result in question 1 doesn't hold with finite sample.

# Part i

The feasible IV gives the smallest confidence interval and the highest significant probability, therefore we can conclude it is the most powerful method among the three.

Compared with the feasible IV, the infeasible IV has larger confidence interval and lower significant probability.

# Part j

In [175]:
#Run for 400 sample size and 10000 iterations
result(400,1000)

Table 1 : Coverage probability
┌──────────┬───────┬────────────┬───────────┐
│[1m          [0m│[1m  2SLS [0m│[1m Infeasible [0m│[1m  Feasible [0m│
├──────────┼───────┼────────────┼───────────┤
│  α = 0.1 │ 0.912 │      0.905 │     0.898 │
│ α = 0.05 │  0.97 │      0.954 │      0.95 │
│ α = 0.01 │ 0.995 │      0.991 │     0.986 │
└──────────┴───────┴────────────┴───────────┘
Table 2 : Length of confidence intervals
┌──────────┬──────────┬────────────┬───────────┐
│[1m          [0m│[1m     2SLS [0m│[1m Infeasible [0m│[1m  Feasible [0m│
├──────────┼──────────┼────────────┼───────────┤
│  α = 0.1 │ 0.138957 │   0.105818 │  0.104741 │
│ α = 0.05 │ 0.165578 │    0.12609 │  0.124806 │
│ α = 0.01 │ 0.217606 │   0.165711 │  0.164023 │
└──────────┴──────────┴────────────┴───────────┘
Table 3 : Probability of statistically signifficant
┌──────────┬───────┬────────────┬───────────┐
│[1m          [0m│[1m  2SLS [0m│[1m Infeasible [0m│[1m  Feasible [0m│
├──────────┼───────┼───

Compared with the result with $n = 100$, we have much smaller confidence interval and much higher significant probability. The difference betweem the feasible IV and the infeasible IV is much smaller, which is in accord with the fact that they have the same asymptotic distribution. 