# Econometrics Assignment 4
## Nicolas Wesseler

### Activate Environment

In [7]:
using Pkg
Pkg.activate(joinpath(pwd(),".."))
using Random, Distributions, PrettyTables, Printf
Random.seed!(1234)

[32m[1m  Activating[22m[39m environment at `~/Desktop/ECON 627/Project.toml`


MersenneTwister(1234)

### Install missing packages

In [8]:
#Pkg.add("Optim")
#Pkg.add("ForwardDiff")
#Pkg.add("PrettyTables")
#Pkg.add("Parameters")
#Pkg.add("Distributions")
#Pkg.add("Statistics")

### Load Packages

## Question 3: Monte Carlo simulations for the overidentifying restrictions test

I first simulate data for the following model:

$$Y_i = \beta X_i + U_i$$
$$X_i = Z_i' \pi + V_i$$

Define the parameters

In [51]:
const R = 10000;
const β = 1;
const ρ = 0.9;
const pi = [1, 1];

## a) Generate n = 100 observations

In [52]:
function generate_data(n, δ)

    # Errors
    mvnormal_Z = MvNormal([0;0],[1 0 ; 0 1] )
    mvnormal_error = MvNormal([0;0],[1 ρ ; ρ 1] )
    errors = rand(mvnormal_error,n)'
    e = errors[:,1]
    V = errors[:,2]
    Z = rand(mvnormal_Z,n)
    U = (Z[1,:] .+ Z[2,:]) .*e 

    # Vector X
    X = Z'* pi + V

    # Vector y
    y = β*X + U

    ϵ = U .+ Z'* δ
    y_alt =  β*X + ϵ

    # Return the data
    return(y=y, X=X, Z=Z, y_alt = y_alt)
end

generate_data (generic function with 1 method)

## b) Compute Efficient 2-step GMM and GMM efficient weight matrix

In [42]:
function two_step_gmm(Y,X,Z)
    n = size(X,1)

    # 2 SLS
    xz = Z*X/n
    zz = Z*Z'/n
    zy = Z*y/n
    β_2SLS = (xz'*inv(zz)xz)\(xz'*inv(zz)zy)
    
    # Efficient GMM
    res = y - X*β_2SLS
    zu = Z' .* res
    Ω = zu'*zu / n  

    β_GMM = (xz'*inv(Ω)xz)\(xz'*inv(Ω)*zy) 
    u_hat = y - X*β_GMM
    zu2 = Z' .* u_hat
    Ω_GMM = zu2'*zu2 /n

    return (coef_GMM = β_GMM, Ω_GMM = Ω_GMM)
end
coef_GMM, Ω_GMM = two_step_gmm(y,X,Z)  

(coef_GMM = 0.9268097142677967, Ω_GMM = [1.6589783811479473 0.46515282652479256; 0.46515282652479256 1.9373051266507728])

## c) Calculate the overidentifying restrictions test statistic

In [53]:
function J_test(y,Z,coef_GMM, Ω_GMM)
   n = size(y,1)
    u_hat = y - X*coef_GMM
    zu = Z * u_hat 

    J = 1/n *  zu' * inv(Ω_GMM) * zu

    r = J > quantile(Chisq(1), .95) ? 1 : 0
     

    return (J = J, r = r)
end
J, r = J_test(y,Z,coef_GMM, Ω_GMM)

(J = 0.00023600857082162714, r = 0)

We don't reject $H_0$ as there is no evidence for overidentification. 

## d) Repeat the test 10,000 times

In [54]:
rejection_rate = []
for i in 1:R
    (y,X,Z,y_alt) = generate_data(100, [0;0])
    (coef_GMM, Ω_GMM) = two_step_gmm(y,X,Z) 
    (J,r) = J_test(y,Z,coef_GMM, Ω_GMM)
    append!(rejection_rate, r)
end
mean_rr = mean(rejection_rate)

0.0442

In [29]:
(y,X,Z,y_alt) = generate_data(100, [0;0])
(coef_GMM, Ω_GMM) = two_step_gmm(y,X,Z) 

(coef_GMM = 0.976793026070875, Ω_GMM = [541.8868418230556 146.21779654010848; 146.21779654010848 376.7432620215407])

The average rejection rate is 0.0459 and therefore very close to the nominal 5\% level.

## e) Power of the Test 

$$Z_i \epsilon_i = Z_i U_i + Z_i Z_i' \delta$$
$$E[Z_i \epsilon_i] = E[Z_i U_i] + E[Z_i Z_i'] \delta$$
$$E[Z_i \epsilon_i] = 0 + I \delta$$
$$E[Z_i \epsilon_i] = \delta$$


$$Z_i X_i' = Z_i Z_i' \pi + Z_i V_i $$
$$E[Z_i X_i'] = E[Z_i Z_i'] \pi + E[Z_i V_i] $$
$$E[Z_i X_i'] = I \pi + 0 $$
$$E[Z_i X_i'] = \pi$$

## f) g) h) i) 

In [55]:
Δ = [0.2 1 20; -0.2 -1 20]
for j in 1:3
    δ = Δ[:,j]
    rejection_rate = []
    for i in 1:R
        (y,X,Z,y_alt) = generate_data(100,δ)
        (coef_GMM, Ω_GMM) = two_step_gmm(y_alt,X,Z) 
        (J,r) = J_test(y_alt,Z,coef_GMM, Ω_GMM)
        append!(rejection_rate, r)
    end
    mean_rr = mean(rejection_rate)
    println( δ,  "Rejection Rate is ", mean_rr)
end

[0.2, -0.2]Rejection Rate is 0.6021


[1.0, -1.0]Rejection Rate is 1.0


[20.0, 20.0]Rejection Rate is 1.0


In [16]:
y,X,Z,y_alt = generate_data(100,[20.0; 20.0])
coef_GMM, Ω_GMM = two_step_gmm(y_alt,X,Z) 
estgmm = J_test(y_alt,Z,coef_GMM, Ω_GMM)

(12827.847923825599, 1)

The J-test is based on the null hypothesis that $E[Z_i \epsilon_i] = 0$. When setting $E[Z_i \epsilon_i] \neq 0$ but $\delta_1 = - \delta_2$ the power of the test increases ad the more so the greater $|\delta|$. This makes sense since the $H_0$ extends farther away from the real value and so it is easier to reject it when false. 