# PSET 11
### Juan M Jimenez R.

In [1]:
#using Pkg
#Pkg.add(["Convex", "SCS", "ECOS"])
using Distributions, Optim, LinearAlgebra, Random, PrettyTables, Convex, SCS, ECOS
MOI=Convex.MOI #use the convex pkg for optimizing

MathOptInterface

## Question 2
### (a)

In [2]:
function DGP(;α,β₁,β₂,σ,ρ,k,n)
    Σ=ones(k,k)*ρ+I*(1-ρ)
    X=rand(MvNormal(Σ),n)'; ##Correlated regressors
    Y=β₁*X[:,1]+β₂*X[:,2] +σ*randn(n,1) .+α
    return Y,X
end

DGP (generic function with 1 method)

### (a)

In [3]:
opt = MOI.OptimizerWithAttributes(SCS.Optimizer, "eps_abs"=>1.0e-08, "eps_rel"=>1.0e-08, MOI.Silent() => true);

function Lasso(Y,X,λ)
        
    N,K=size(X)
    
    X=[X ones(N,1)]
    
    Q=X'X/N;
    c=X'Y/N;
    
    b=Variable(K+1) ##define variables to optimize over
    L1=quadform(b, Q)/2  #b'Q*b
    L2 = dot(c, b) #c'b
    L3 = norm(b[1:K], 1) #sum(|b|) we use the absolute value, not the square
    LassoProblem=minimize(L1-L2+λ*L3)
    
    solve!(LassoProblem, opt,verbose=false)

    β̂ = round.(vec(evaluate(b)),digits=5)
    
    return β̂
    
end

Lasso (generic function with 1 method)

### (c)-(f)

In [4]:
function MC(;ρ,σ,β₁,β₂,R,n,k)

    β₁_Included=zeros(R) #store results b1
    β₂_Included=zeros(R) #store results b2
    β_wrong_Included=zeros(R) 


    λ=1.1*σ*(2*log(n*k)/n)^0.5

    Threads.@threads for r in eachindex(β₁_Included)
        
        
        Y,X=DGP(;α=1.0,β₁=β₁,β₂=β₂,σ=σ,ρ=ρ,k=k,n=n)
        β̂ = Lasso(Y,X,λ)
    
        β₁_Included[r] = (β̂[1] !=0) #indicator if β₁ inluded
        β₂_Included[r] = (β̂[2] !=0) #indicator if β₂ inluded
        β_wrong_Included[r] = (maximum(abs.(β̂[3:k])) != 0) #indicator if a wrong β₂ inluded
    end

    #simulated probabilities
    OUT::Matrix{Float64}=[sum(β₁_Included)/R sum(β₂_Included)/R sum(β_wrong_Included)/R]
    return OUT
end

MC (generic function with 1 method)

In [5]:
@time OUT=MC(;ρ=0.2,σ=2.0,β₁=2.0,β₂=2.0,R=2,n=100,k=4);

 21.335689 seconds (54.01 M allocations: 2.890 GiB, 3.59% gc time, 99.77% compilation time)


In [6]:
Random.seed!(627)

const n=300
const k=50
const R=100 #1000

#Part (c)
ρ=0.2; σ=2.0; @time OUT_c = MC(;ρ=ρ,σ=σ,β₁=σ,β₂=σ, R=R, n=n, k=k);
data=["(c)" ρ σ σ σ OUT_c;]; #collect results to be used with PrettyTable

#Part (d)
ρ=0.2; σ=0.1; @time OUT_d = MC(;ρ=ρ,σ=σ,β₁=σ,β₂=σ, R=R, n=n, k=k);
data=[data; "(d)" ρ σ σ σ OUT_d;];

#Part (e)
ρ=0.2; σ=2.0; @time OUT_e = MC(;ρ=ρ,σ=σ,β₁=σ,β₂=σ*0.1, R=R, n=n, k=k);
data=[data; "(e)" ρ σ σ σ*0.1 OUT_e;];

#Part (f)
ρ=0.9; σ=2.0; @time OUT_f = MC(;ρ=ρ,σ=σ,β₁=σ,β₂=σ, R=R, n=n, k=k);
data=[data; "(f)" ρ σ σ σ OUT_f;];

  1.147154 seconds (532.76 k allocations: 121.784 MiB, 2.37% gc time, 0.24% compilation time)
  1.158558 seconds (532.82 k allocations: 121.791 MiB, 2.40% gc time)
  1.126899 seconds (532.73 k allocations: 121.782 MiB, 2.26% gc time, 0.21% compilation time)
  1.092746 seconds (532.72 k allocations: 121.782 MiB, 2.09% gc time)


In [7]:
Header= ([ "Part", "ρ", "σ", "β₁", "β₂" , "Pr(X1 is selected)", "P(X2 is selected)", "P(wrong X is selected)"])
pretty_table(data; header=Header)

┌──────┬─────┬─────┬─────┬─────┬────────────────────┬───────────────────┬────────────────────────┐
│[1m Part [0m│[1m   ρ [0m│[1m   σ [0m│[1m  β₁ [0m│[1m  β₂ [0m│[1m Pr(X1 is selected) [0m│[1m P(X2 is selected) [0m│[1m P(wrong X is selected) [0m│
├──────┼─────┼─────┼─────┼─────┼────────────────────┼───────────────────┼────────────────────────┤
│  (c) │ 0.2 │ 2.0 │ 2.0 │ 2.0 │                1.0 │               1.0 │                   0.04 │
│  (d) │ 0.2 │ 0.1 │ 0.1 │ 0.1 │                1.0 │               1.0 │                   0.05 │
│  (e) │ 0.2 │ 2.0 │ 2.0 │ 0.2 │                1.0 │              0.05 │                   0.02 │
│  (f) │ 0.9 │ 2.0 │ 2.0 │ 2.0 │                1.0 │               1.0 │                   0.95 │
└──────┴─────┴─────┴─────┴─────┴────────────────────┴───────────────────┴────────────────────────┘


### Answers 
In c), the probability to select the correct regressors is 100%, while the probability to select the wrong regresors is approximately XX%

In d), the result doesn't change significantly with the new parameters of delta and beta.

In e), the coefficient of the second regressor is small than in previous exercises, therefore is not selected which large probability (1-5%=95%).

In f), the correlation between the irrelevant and the main regressor is too high, so the irrepresentability condition fails. Then, the probability to include the wrong regressor in the model is 95%.