# Template Problem Set Solution

You can optionally activate any environment. In this particular case, I will go to such location and activate it. If you don't use any particular environment in your computer, you can ignore the cell below.

In [2]:
using Pkg
Pkg.activate(joinpath(pwd(),"..")) ;

[32m[1m  Activating[22m[39m project at `c:\Users\steve\Documents\GitHub\ECON627_UBC.jl`




## State the Data Generating Process 

Suppose we want to simulate data that follows this GDP 

$$ y_i = \beta X_i  + U_i $$ 

$$ X_i = \pi' Z_i + V_i $$ 

$$ U_i = \exp(\gamma'Z_i \times \epsilon_i) $$

where 

$Z_i \sim N(0,I_2)$ independent of $\epsilon_i$ and $V_i$, and 

$\left( \begin{array}{c}  \epsilon_i \\ V_i \end{array} \right)  \sim N\left( \left( \begin{array}{c}  0 \\ 0 \end{array} \right), \left( \begin{array}{c}  1.0 & \rho\\ \rho & 1.0 \end{array} \right) \right)$

Furthermore, let  $\beta = 1$, $\pi = \gamma =  \left[ \begin{array}{c}  1 \\ 1 \end{array} \right]$ , and  $\rho =0.95$.


The goal is to compute the coverage of the confidence intervals of $\beta_1$. For this example I'll set $n = 1000$ observations and $R=1000$ repetitions.


## Load Main Packages

In [3]:
using Distributions, PrettyTables, Random, LinearAlgebra, Parameters

## Define parameters to be used 

In [7]:
n=1000
β=1.0
π=[1.0;1.0]
γ=[1.0;1.0]
ρ=0.95
Σ=[1.0 ρ; ρ 1;];



2×2 Matrix{Float64}:
 1.0   0.95
 0.95  1.0

## Set Random Seed (make your code reproducible!)

In [4]:
Random.seed!(1234);

#### Define function that generates data

In [5]:
function generate_data(n)
    #Define the Multivariate Normal Distribution instance
    mvnormal = MvNormal([0.0; 0.0], Σ)
    
    #DGP
    Errors=rand(mvnormal,n)'
    ϵ=Errors[:,1]
    V=Errors[:,2]
    Z=randn(n,2)
    
    X=Z*π+V
    U=exp.(Z*π) .* ϵ
    Y=β*X+U
    return (Y = Y , X = X , Z = Z)
end

generate_data (generic function with 1 method)

In [8]:
@unpack X, Y , Z = generate_data(n);

#### Function for estimation of $\Omega$

In [None]:
# E(Z_iZ_i' u_i^2)
function Ω(U,Z)
    n=length(U)
    zr = Z.*U
    omega = (zr' * zr)/n
    
    return omega
end

Ω (generic function with 1 method)

#### Function that computes 2SLS and the two-step efficient GMM and their standard errors

Recall that we can write the estimator (for any choice of weighting matrix) as 
$$ \hat{\beta}_n(W_n) = (X'Z W_n Z'X)^{-1} X'Z W_n Z'Y $$

and don't forget that 2SLS corresponds to the case where $W_n^{-1} = \frac{(Z'Z)}{n}$.

In [11]:
function ols(X,Y)
    n = length(Y)
    bhat = (X'*X)\(X'*Y)

    rhat = Y-X*bhat
    rX = X.*rhat
    #E X_iX_i' u_i^2
    avar = n*(X'*X)\(rX'*rX)/(X'*X)
    
    #It's a scalar, otherwise we would need to take the diagonal of the matrix
    se = sqrt.(avar)  
    return (b=bhat,se=se)
end

ols (generic function with 1 method)

In [12]:
function GMM(Y,X,Z,W)
    n = length(Y)
    bhat = (X'*Z*W*Z'*X)\(X'*Z*W*Z'*Y)

    Q = Z'*X/n
    omega = Ω(Y-bhat*X,Z)

    avar =  ( (Q'*W*Q)\(Q'*W*omega*W*Q)/(Q'*W*Q) )/n
    
    #It's a scalar, otherwise we would need to take the diagonal of the matrix
    se = sqrt.(avar)  

    return (b=bhat,se=se)
end

GMM (generic function with 1 method)

In [13]:
function GMM_TS(Y,X,Z)
    n = length(Y)

    #Step 1: Do 2SLS 
    b2sls = GMM(Y,X,Z,inv(Z'*Z)*n).b
    Ω1 = Ω(Y-b2sls*X,Z)

    #Step 2: Do GMM with W = inv(Ω1)
    @unpack b, se = GMM(Y,X,Z,inv(Ω1)) 

    return (b=b, se=se)
end

GMM_TS (generic function with 1 method)

## MonteCarlo 

## Using 100 observations 

In [None]:
n=10^2
R = 1000

bias_OLS = 0.0
bias_2SLS = 0.0
bias_TSGMM = 0.0
inCI_OLS = 0.0
inCI_2SLS=0.0
inCI_TSGMM=0.0

CritVal = quantile(Normal(0,1), .975);

1.9599639845400576

In [None]:
for r=1:R
    Y, X, Z = generate_data(n)

    bOLS , sOLS = ols(X,Y)

    b2sls , s2SLS = GMM(Y,X,Z,inv(Z'*Z)*n)

    bGMM, sGMM = GMM_TS(Y,X,Z)
    
    # Compute bias in each method 
    bias_OLS += abs(bOLS - β)
    bias_2SLS += abs(b2sls - β) 
    bias_TSGMM += abs(bGMM - β)
    
    # Check whether true beta is in the confidence interval for each method
    inCI_OLS += (β>bOLS - CritVal*sOLS)*(β<bOLS + CritVal*sOLS)
    inCI_2SLS += (β>b2sls - CritVal*s2SLS)*(β<b2sls + CritVal*s2SLS)
    inCI_TSGMM += (β>bGMM - CritVal*sGMM)*(β<bGMM + CritVal*sGMM)
end
    

In [None]:
table_data = ["Coverage Prob of CI" inCI_OLS/R inCI_2SLS/R inCI_TSGMM/R; 
"Bias" bias_OLS/R bias_2SLS/R bias_TSGMM/R;               
]
header=["Statistic" ,"OLS", "2SLS" ,"Two-step efficient GMM"]
pretty_table(table_data;header)

┌─────────────────────┬──────────┬─────────┬────────────────────────┐
│[1m           Statistic [0m│[1m      OLS [0m│[1m    2SLS [0m│[1m Two-step efficient GMM [0m│
├─────────────────────┼──────────┼─────────┼────────────────────────┤
│ Coverage Prob of CI │    0.048 │   0.945 │                   0.92 │
│                Bias │ 0.915267 │ 0.80173 │               0.607331 │
└─────────────────────┴──────────┴─────────┴────────────────────────┘


## Using 10000 observations 

In [None]:
n=10^4
R = 1000
bias_OLS = 0.0
bias_2SLS = 0.0
bias_TSGMM = 0.0
inCI_OLS = 0.0
inCI_2SLS=0.0
inCI_TSGMM=0.0
CritVal = quantile(Normal(0,1), .975)

1.9599639845400576

In [None]:
for r=1:R
    Y, X, Z = generate_data(n)

    bOLS , sOLS = ols(X,Y)

    b2sls , s2SLS = GMM(Y,X,Z,inv(Z'*Z)*n)

    bGMM, sGMM = GMM_TS(Y,X,Z)
    
    # Compute bias in each method 
    bias_OLS += abs(bOLS - β)
    bias_2SLS += abs(b2sls - β) 
    bias_TSGMM += abs(bGMM - β)
    
    # Check whether true beta is in the confidence interval for each method
    inCI_OLS += (β>bOLS - CritVal*sOLS)*(β<bOLS + CritVal*sOLS)
    inCI_2SLS += (β>b2sls - CritVal*s2SLS)*(β<b2sls + CritVal*s2SLS)
    inCI_TSGMM += (β>bGMM - CritVal*sGMM)*(β<bGMM + CritVal*sGMM)
end
    

In [None]:
table_data = ["Coverage Prob of CI" inCI_OLS/R inCI_2SLS/R inCI_TSGMM/R; 
"Bias" bias_OLS/R bias_2SLS/R bias_TSGMM/R;               
]
header=["Statistic" ,"OLS", "2SLS" ,"Two-step efficient GMM"]
pretty_table(table_data;header)

┌─────────────────────┬──────────┬──────────┬────────────────────────┐
│[1m           Statistic [0m│[1m      OLS [0m│[1m     2SLS [0m│[1m Two-step efficient GMM [0m│
├─────────────────────┼──────────┼──────────┼────────────────────────┤
│ Coverage Prob of CI │      0.0 │    0.952 │                  0.953 │
│                Bias │ 0.854472 │ 0.120181 │                0.11139 │
└─────────────────────┴──────────┴──────────┴────────────────────────┘


## Speeding things up: Use multithreads in Julia!

In [None]:
function simulate(n,R)

    # Initialize the values for all R reps 
    bias_OLS = zeros(R)
    bias_2SLS =  zeros(R)
    bias_TSGMM =  zeros(R)
    inCI_OLS =  zeros(R)
    inCI_2SLS = zeros(R)
    inCI_TSGMM = zeros(R)

    CritVal = quantile(Normal(0,1), .975)

    Threads.@threads for r=1:R
        
        Y, X, Z = generate_data(n)

        bOLS , sOLS = ols(X,Y)

        b2sls , s2SLS = GMM(Y,X,Z,inv(Z'*Z)*n)

        bGMM, sGMM = GMM_TS(Y,X,Z)
        
        # Compute bias in each method 
        bias_OLS[r] = abs(bOLS - β)
        bias_2SLS[r] = abs(b2sls - β) 
        bias_TSGMM[r] = abs(bGMM - β)
        
        # Check whether true beta is in the confidence interval for each method
        inCI_OLS[r] = (β>bOLS - CritVal*sOLS)*(β<bOLS + CritVal*sOLS)
        inCI_2SLS[r] = (β>b2sls - CritVal*s2SLS)*(β<b2sls + CritVal*s2SLS)
        inCI_TSGMM[r] = (β>bGMM - CritVal*sGMM)*(β<bGMM + CritVal*sGMM)
    end
    
    return  (cov_OLS=mean(inCI_OLS), cov_2SLS=mean(inCI_2SLS), cov_TSGMM=mean(inCI_TSGMM),
            bias_OLS=mean(bias_OLS), bias_2SLS=mean(bias_2SLS), bias_TSGMM=mean(bias_TSGMM))
end     

simulate (generic function with 1 method)

In [None]:
@unpack cov_OLS, cov_2SLS, cov_TSGMM, bias_OLS, bias_2SLS, bias_TSGMM = simulate(10^4,1000)
table_data = ["Coverage Prob of CI" cov_OLS cov_2SLS cov_TSGMM; 
"Bias" bias_OLS bias_2SLS bias_TSGMM;               
]
header=["Statistic" ,"OLS", "2SLS" ,"Two-step efficient GMM"]
pretty_table(table_data;header)

┌─────────────────────┬──────────┬──────────┬────────────────────────┐
│[1m           Statistic [0m│[1m      OLS [0m│[1m     2SLS [0m│[1m Two-step efficient GMM [0m│
├─────────────────────┼──────────┼──────────┼────────────────────────┤
│ Coverage Prob of CI │      0.0 │    0.962 │                  0.956 │
│                Bias │ 0.860489 │ 0.113845 │               0.103939 │
└─────────────────────┴──────────┴──────────┴────────────────────────┘
