In [110]:
using Pkg
Pkg.activate(joinpath(pwd(),".."))
using Random, Distributions, PrettyTables, Printf, Plots, Optim
Random.seed!(1234)

[32m[1m  Activating[22m[39m environment at `~/Desktop/ECON 627/Project.toml`


MersenneTwister(1234)

# Part (a): Data Generating

In [121]:
function data(n)
    U = randn(n)
    x = randn(n)
    Y =  (-0.5 .+ x .+ U .>= 0) 
    one = ones(n)
    X = [one x]
    return (Y = Y, X = X)
end

data (generic function with 1 method)

# Part(b): Criterion Function

In [51]:
function Qn(beta, Y, X)
    n = length(Y)
    Xβ = X * beta
    ΦXβ = cdf(Normal(0,1), Xβ)
    Qn = - 1/n * Y' * log.(ΦXβ) - 1/n * (1 .- Y)' * log.(1 .- ΦXβ)
    return Qn = Qn
end


Qn (generic function with 1 method)

# Part(c): Minimization

In [131]:
(Y,X) = data(100)
result=optimize(b->Qn(b,Y,X),[0.0;0.0;])
b_n=Optim.minimizer(result)

2-element Vector{Float64}:
 -0.354983642597063
  1.419856815883338

# Part(d): Logit Estimation

In [132]:
function logit(beta, Y, X)
    n = length(Y)
    Xβ = X * beta
    Λ = exp.(Xβ) ./ (1 .+ exp.(Xβ))
    logit = - 1/n * Y' * log.(Λ) - 1/n * (1 .- Y)' * log.(1 .- Λ)
    return logit = logit
end


logit (generic function with 1 method)

In [135]:
(Y,X) = data(100)
result=optimize(b->logit(b,Y,X),[0.0;0.0;])
b_n=Optim.minimizer(result)

2-element Vector{Float64}:
 -1.1701227217263606
  1.866946196535078

# Part(e): Linear Probability

In [133]:
function linear(Y,X)
    beta = inv(X' * X) * X' * Y
    return beta = beta
end

linear (generic function with 1 method)

# Part(f): Iteration

In [147]:
R = 1000
error = zeros((R,6))
for i in 1:R
    (Y,X) = data(100)
    result1=optimize(b->Qn(b,Y,X),[0.0;0.0;])
    b1=Optim.minimizer(result1)
    result2=optimize(b->logit(b,Y,X),[0.0;0.0;])
    b2=Optim.minimizer(result2)
    b3 = linear(Y,X)
    b21 = b1[2]
    b22 = b2[2]
    b23 = b3[2]
    error[i,1] = b21 - 1
    error[i,2] = b22 - 1
    error[i,3] = b23 - 1
    error[i,4] = (b21 - 1)^2
    error[i,5] = (b22 - 1)^2
    error[i,6] = (b23 - 1)^2
end

table =["Mean Bias" mean(error[:,1]) mean(error[:,2]) mean(error[:,3]) ; "RMSE" sqrt(mean(error[:,4])) sqrt(mean(error[:,5])) sqrt(mean(error[:,6]))]
header = ["",  "MLE", "Logit",  "Linear"]
pretty_table(table; header = header)


┌───────────┬───────────┬──────────┬───────────┐
│[1m           [0m│[1m       MLE [0m│[1m    Logit [0m│[1m    Linear [0m│
├───────────┼───────────┼──────────┼───────────┤
│ Mean Bias │ 0.0359918 │ 0.775107 │ -0.735457 │
│      RMSE │  0.233287 │  0.87827 │  0.736234 │
└───────────┴───────────┴──────────┴───────────┘


# Part(g)

Based our result, the ML estimation with standard normal CDF gives the lowest mean bias and lowest RMSE

# Part(h): More Sample

In [148]:
R = 1000
error = zeros((R,6))
for i in 1:R
    (Y,X) = data(1000)
    result1=optimize(b->Qn(b,Y,X),[0.0;0.0;])
    b1=Optim.minimizer(result1)
    result2=optimize(b->logit(b,Y,X),[0.0;0.0;])
    b2=Optim.minimizer(result2)
    b3 = linear(Y,X)
    b21 = b1[2]
    b22 = b2[2]
    b23 = b3[2]
    error[i,1] = b21 - 1
    error[i,2] = b22 - 1
    error[i,3] = b23 - 1
    error[i,4] = (b21 - 1)^2
    error[i,5] = (b22 - 1)^2
    error[i,6] = (b23 - 1)^2
end

table =["Mean Bias" mean(error[:,1]) mean(error[:,2]) mean(error[:,3]) ; "RMSE" sqrt(mean(error[:,4])) sqrt(mean(error[:,5])) sqrt(mean(error[:,6]))]
header = ["",  "MLE", "Logit",  "Linear"]
pretty_table(table; header = header)

┌───────────┬────────────┬──────────┬───────────┐
│[1m           [0m│[1m        MLE [0m│[1m    Logit [0m│[1m    Linear [0m│
├───────────┼────────────┼──────────┼───────────┤
│ Mean Bias │ 0.00487937 │ 0.720133 │ -0.734766 │
│      RMSE │  0.0653197 │ 0.729704 │  0.734836 │
└───────────┴────────────┴──────────┴───────────┘


As we can see, with more sample in each iteration, the bias and RMSE of MLE is decreased, while the bias for other two methods are almost the same as previous simulation. ML estimator seems to be constant, since the bias shrinks as sample size goes larger.