# A Dual Approach to Holistic Regression
## March 6th, 2021

In [1]:
using Random, Distributions
using LinearAlgebra
using Gurobi, JuMP
using DataFrames
using CSV
using StatsBase
using Plots
using ProgressBars
using Optim

In [2]:
gurobi_env = Gurobi.Env()

function create_gurobi_model(; TimeLimit=-1, LogFile=nothing)
    model = Model(optimizer_with_attributes(() -> Gurobi.Optimizer(gurobi_env)));
    if TimeLimit >= 0
        println("Set Gurobi TimeLimit.")
        set_optimizer_attribute(model, "TimeLimit", TimeLimit)
    end
    if LogFile != nothing
        println("LogFile: $(LogFile).")
        set_optimizer_attribute(model, "LogFile", LogFile)
    else
        set_optimizer_attribute(model, "OutputFlag", 0)
    end
    set_optimizer_attribute(model, "NumericFocus", 3)
    return model
end;


--------------------------------------------
--------------------------------------------

Academic license - for non-commercial use only


____
## 0. Utils

In [3]:
function write_list(file_path, l)

    if length(l) == 0
        return
    end
    
    str = ""
    for e in l
        try e = round(e, digits=3) catch _ end
        str *= "$(e),"
    end
    open(file_path, "a+") do io
        write(io, chop(str)*"\n")
    end
    
    return
end

function write_to_file(file_path, str)
    open(file_path, "a+") do io
        write(io, str)
    end
end

write_to_file (generic function with 1 method)

In [4]:
function get_support(s)
    supp = similar(s, Int)
    count_supp = 1
    
    supp_c = similar(s, Int)
    count_supp_c = 1
    
    @inbounds for i in eachindex(s)
        supp[count_supp] = i
        supp_c[count_supp_c] = i
        is_zero = s[i] < 0.5
        count_supp += !is_zero
        count_supp_c += is_zero
    end
    return resize!(supp, count_supp-1), resize!(supp_c, count_supp_c-1)
end

get_support([0, 0, 1, 1, 0])

([3, 4], [1, 2, 5])

____
## 1. Generate Synthetic Data  

In [5]:
function generate_synthetic_data(n, p, k, NR; seed=-1)
    """
        n = num. of samples
        p = num. of features
        k = num. of non zero coefficients
        NR = noise ratio ~ σ_noise = NR * σ_y_true
    """
    if seed >= 0
        Random.seed!(seed)
    end
    
    # Generate PD matrix
    A = randn(p, p)
    A = A'*A
    Σ = (A' + A)/2
    
    # Generate data X
    d = MvNormal(Σ)
    X = rand(d, n)'I
    
    # Split data
    index_train = 1:floor(Int, 0.5*n)
    index_val = floor(Int, 0.5*n)+1:floor(Int, 0.75*n)
    index_test = floor(Int, 0.75*n)+1:n
    
    X_train = X[index_train,:]
    X_val = X[index_val,:]
    X_test = X[index_test,:]
    
    # Center
    μ_train = [mean(X_train[:, j]) for j=1:p]
    for j=1:p
         X_train[:,j] = X_train[:,j] .- μ_train[j]
         X_val[:,j] = X_val[:,j] .- μ_train[j]
         X_test[:,j] = X_test[:,j] .- μ_train[j]
    end
    
    # Scale
    σ_train = [norm(X_train[:, j]) for j=1:p]
    for j=1:p
         X_train[:,j] = X_train[:,j]/σ_train[j]
         X_val[:,j] = X_val[:,j] ./ σ_train[j]
         X_test[:,j] = X_test[:,j] ./ σ_train[j]
    end
    
    # Generate β
    β = zeros(p)
    for j=1:k
        β[floor(Int, j*p/k)] = 1.0*rand([-1, 1])
    end
    
    # Noise
    ϵ = rand(Normal(0, std(X*β)*NR), n)
    
    # Target
    y_train = X_train*β + ϵ[index_train]
    y_val = X_val*β + ϵ[index_val]
    y_test = X_test*β + ϵ[index_test]
            
    return  (X_train, y_train), (X_val, y_val), (X_test, y_test), β
end

function get_t_α(n, p, α)
    return quantile(TDist(n-p), 1 - α/2)
end

function get_σ_X(X, y, γ)
    n, p = size(X)
    
    # Estimator σ
    M_inv = inv(I/γ + X'X)
    σ_tilde = sqrt((y'*(I - X*M_inv*X')*y)/(n-p))
    σ_X = σ_tilde * sqrt.(diag(M_inv))
    
    return σ_X
end

function get_R2(y_pred, y_true, y_train)
    SS_res = norm(y_true .- y_pred)
    SS_tot = norm(y_true .- mean(y_train))
    return 1 - (SS_res/SS_tot)^2
end

;

## 2. Compute inner problems and gradients

### a. Compute g_s

In [6]:
function g_s(D_s, b_s, σ_X_s; GD=true)
    
    # Get length of support of s
    l = length(b_s)
    
    # Case s == 0
    if l==0
        return zeros(0), 0.0
    end
    
    # Initial solution
    λ_s0 = zeros(l) .+ 1.0

    # Compute objective and gradient at the same time
    function fg!(F, G, λ_s)
        
        μ_s = λ_s .+ b_s
        β_s = D_s*μ_s
        
        if G != nothing
            G .= β_s .- σ_X_s
        end
        
        if F != nothing
            return -λ_s'σ_X_s + 0.5*μ_s'β_s
        end
    end
    
    # Lagrangian multipliers constraint
    lower = zeros(l)
    upper = [Inf for _ in 1:l]

    res = Optim.optimize(Optim.only_fg!(fg!), lower, upper, λ_s0, 
        Fminbox(GD ? GradientDescent() : LBFGS()))

    return Optim.minimizer(res), - Optim.minimum(res)
    
end

g_s (generic function with 1 method)

In [7]:
function g_s_gurobi(D_s, b_s, σ_X_s, model)
    
    # Get length of support of s
    l = length(b_s)
    if l==0
        return zeros(0), 0.0
    end

    λ_s = model[:λ][1:l]
    μ_s = λ_s .+ b_s
    β_s = D_s*μ_s
    
    @objective(model, Max, λ_s'σ_X_s - 0.5*μ_s'β_s)
    
    optimize!(model)
    
    value.(λ_s), objective_value(model)
end;

In [8]:
function g_gurobi(supp, Z, D, b, σ_X, model)

    # Create DZ once
    DZ = D*Z
    
    λ = model[:λ]
    μ = b + λ

    @objective(model, Max, λ'*Z*σ_X - 0.5μ'*DZ*μ)
    
    optimize!(model)
    
    value.(λ)[supp], objective_value(model)
end;

### b. Compute ∇g_s

In [9]:
function ∇g_s(supp, supp_c, b, M, λ_s, D_s, σ_X_s, γ)
    
    β_s = D_s*(b[supp] .+ λ_s)
  
    grad = zeros(length(b))
    grad[supp] = λ_s .* σ_X_s - (β_s .^ 2)/(2γ)
    grad[supp_c] = - 0.5*γ*(b[supp_c] - M[supp_c, supp]*β_s).^2
    
    return grad
    
end

∇g_s (generic function with 1 method)

In [10]:
function ∇g(supp, D, b, λ_s, σ_X, γ)
    
    λ = zeros(length(b))
    λ[supp] = λ_s
    
    grad = λ .* σ_X - ((D'*(b + λ)).^ 2)/(2γ)

    return grad
    
end

∇g (generic function with 1 method)

_____
## 3. Compare speed 

### /!\ t_α is already in σ_X /!\

In [24]:
# Parameters
n_train = 10000
n = 2*n_train
p = 100
k = 10
NR = 0.001
α = 0.05
t_α = get_t_α(n_train, p, α)
γ = 1/sqrt(n_train)

# Generate data
(X_p, y), _, _, β_true = generate_synthetic_data(n, p, k, NR, seed=42);
σ_X_p = t_α * get_σ_X(X_p, y, γ); #t_α is already in σ_X

# Compute data in p dimensions
M_p = X_p'X_p
b_p = X_p'y

# Compute data in 2p dimensions
M = [M_p -M_p; -M_p  M_p]
b = [b_p; -b_p];
σ_X = [σ_X_p; σ_X_p] ;

In [25]:
# Create s
s_true = vcat(β_true .> 0, β_true .< 0) .* 1 
supp, supp_c = get_support(s_true)

# Get projected variables
b_s = b[supp];
σ_X_s = σ_X[supp];
D_s = inv(I/γ + M[supp, supp]);

# Create model for g_s_gurobi
model_inner_g_s = create_gurobi_model();
@variable(model_inner_g_s, λ[1:k] >= 0)

# Create model for g_gurobi
model_inner_g = create_gurobi_model();
@variable(model_inner_g, λ[1:2p] >= 0);

Z = Diagonal(s_true);
D = inv(I/γ + Z*M);

### Compare g_s

In [26]:
λ_s_GD, g_s_GD = g_s(D_s, b_s, σ_X_s; GD=true)
λ_s_LBFGS, g_s_LBFGS = g_s(D_s, b_s, σ_X_s; GD=false)
λ_s_guro_s, g_s_guro_s = g_s_gurobi(D_s, b_s, σ_X_s, model_inner_g_s)
λ_s_guro, g_s_guro = g_gurobi(supp, Z, D, b, σ_X, model_inner_g)

# Compare objective values
println("GD: ",g_s_GD)
println("LBFGS: ",g_s_LBFGS)
println("Gurobi (g_s): ", g_s_guro_s)
println("Gurobi (g): ", g_s_guro)

# Compare λ
hcat(λ_s_GD, λ_s_LBFGS, λ_s_guro_s, λ_s_guro)

GD: -0.04579702702244497
LBFGS: -0.04579702702244497
Gurobi (g_s): -0.04579702842816385
Gurobi (g): -0.04579702842816385


10×4 Array{Float64,2}:
 1.67444e-18  6.68598e-18  3.15464e-8  3.15464e-8
 0.112048     0.112048     0.112048    0.112048  
 1.02156e-20  6.86706e-17  4.4832e-8   4.4832e-8 
 0.0750941    0.0750941    0.0750945   0.0750945 
 0.0427485    0.0427485    0.0427494   0.0427494 
 0.042549     0.042549     0.04255     0.04255   
 0.0776394    0.0776394    0.0776398   0.0776398 
 1.6996e-17   3.3594e-17   4.57974e-6  4.57974e-6
 4.70998e-20  1.13528e-17  4.41367e-7  4.41367e-7
 1.63789e-18  3.9433e-17   1.26127e-8  1.26127e-8

In [27]:
# Computational time for D_s compared to D (given s)

total_time_D_s = 0
total_time_D = 0
for _ in 1:1000
    
    total_time_D_s += @elapsed begin 
        supp, supp_c = get_support(s_true)
        b_s = b[supp];
        σ_X_s = σ_X[supp];
        D_s = inv(I/γ + M[supp, supp]);
    end
    
    total_time_D += @elapsed begin
        Z = Diagonal(s_true);
        D = inv(I/γ + Z*M);
    end
end

println("Compute D_s: ", total_time_D_s)
println("Compute D: ", total_time_D)

Compute D_s: 0.023847533000000008
Compute D: 3.6927635900000046


In [28]:
# Computational time of g_s and g for given D_s or D

total_time_GD = 0
total_time_LBFGS = 0
total_time_Gurobi_g_s = 0
total_time_Gurobi_g = 0

for _ in 1:1000
    total_time_GD += @elapsed g_s(D_s, b_s, σ_X_s; GD=true)
    total_time_LBFGS += @elapsed g_s(D_s, b_s, σ_X_s; GD=false)
    total_time_Gurobi_g_s += @elapsed g_s_gurobi(D_s, b_s, σ_X_s, model_inner_g_s)
    total_time_Gurobi_g += @elapsed g_gurobi(supp, Z, D, b, σ_X, model_inner_g)
end

println("Optim.jl + GD: ",total_time_GD)
println("Optim.jl + LBFGS: ", total_time_LBFGS)
println("Gurobi g_s (model created outside): ", total_time_Gurobi_g_s)
println("Gurobi g (model created outside): ", total_time_Gurobi_g)

Optim.jl + GD: 4.810573553999998
Optim.jl + LBFGS: 1.779046216000001
Gurobi g_s (model created outside): 1.3252974939999995
Gurobi g (model created outside): 41.289308443000074


### Compare ∇g_s

In [29]:
# Compare the gradient for different optimal lambdas

∇g_s_guro = ∇g_s(supp, supp_c, b, M, λ_s_guro, D_s, σ_X_s, γ)
∇g_s_GD = ∇g_s(supp, supp_c, b, M, λ_s_GD, D_s, σ_X_s, γ)
∇g_guro = ∇g(supp, D, b, λ_s_guro, σ_X, γ)

println("|| ∇g_s_guro - ∇g_s_GD || =  ", norm(∇g_s_guro - ∇g_s_GD))
println("|| ∇g_s_guro - ∇g_guro || =  ", norm(∇g_s_guro - ∇g_guro))
hcat(∇g_s_GD, ∇g_s_guro, ∇g_guro)[1:10, :]

|| ∇g_s_guro - ∇g_s_GD || =  7.28425921268618e-10
|| ∇g_s_guro - ∇g_guro || =  6.151124447574778e-18


10×3 Array{Float64,2}:
 -0.000204746  -0.000204746  -0.000204746
 -0.000297864  -0.000297864  -0.000297864
 -5.99079e-6   -5.99079e-6   -5.99079e-6 
 -0.000144345  -0.000144345  -0.000144345
 -2.23807e-5   -2.23807e-5   -2.23807e-5 
 -0.000190836  -0.000190836  -0.000190836
 -2.47693e-6   -2.47693e-6   -2.47693e-6 
 -3.31235e-6   -3.31235e-6   -3.31235e-6 
 -4.72655e-5   -4.72655e-5   -4.72655e-5 
 -0.00671411   -0.00671411   -0.00671411 

In [30]:
# Computational time for D_s compared to D (given s)

total_time_∇g_s = 0
total_time_∇g = 0

for _ in 1:1000
    
    total_time_∇g_s += @elapsed begin 
        ∇g_s(supp, supp_c, b, M, λ_s_GD, D_s, σ_X_s, γ)
    end
    
    total_time_∇g += @elapsed begin
        ∇g(supp, D, b, λ_s_GD, σ_X, γ)
    end
end

println("Compute ∇g_s: ", total_time_∇g_s)
println("Compute ∇g: ", total_time_∇g)

Compute ∇g_s: 0.023095553000000008
Compute ∇g: 0.02642997599999998


_____
## 4. Compute Cutting plane algorithm

In [31]:
function compute_primal(X, y, k, γ, σ_X; TimeLimit=-1, LogFile=nothing)
    
    n, p = size(X)
    
    model = create_gurobi_model(;LogFile=LogFile, TimeLimit=TimeLimit)

    # TODO: change big-M values
    M1 = 1000
    M2 = 1000

    @variable(model, β[i=1:p])
    @variable(model, s[i=1:p], Bin)
    @variable(model, b[i=1:p], Bin)

    @constraint(model, sum(s) <= k)
    
    @constraint(model, [i=1:p], β[i] <= M1*s[i])
    @constraint(model, [i=1:p], β[i] >= -M1*s[i])

    @constraint(model, [i=1:p], β[i]/σ_X[i] + M2*b[i] >= s[i])
    @constraint(model, [i=1:p], -β[i]/σ_X[i] + M2*(1-b[i]) >= s[i])

    yty = y'y
    XtX = X'X
    ytX = y'X
    
    @objective(model, Min, 0.5*(yty - 2*ytX*β + β'*XtX*β + (1/γ)*sum(β[j]^2 for j=1:p)))
        
    JuMP.optimize!(model)
    
    return objective_value(model), value.(β)
end

compute_primal (generic function with 1 method)

In [32]:
function compute_warm_start_primal(M_p, b_p, k, γ, σ_X_p, time_limit; LogFile=nothing)
    
    p = length(b_p)
    
    model = create_gurobi_model(;TimeLimit=time_limit, LogFile=LogFile)

    # TODO: change big-M values
    M1 = 1000
    M2 = 1000

    @variable(model, β[i=1:p])
    @variable(model, s[i=1:p], Bin)
    @variable(model, b[i=1:p], Bin)

    @constraint(model, sum(s) <= k)
    
    @constraint(model, [i=1:p], β[i] <= M1*s[i])
    @constraint(model, [i=1:p], β[i] >= -M1*s[i])

    @constraint(model, [i=1:p], β[i]/σ_X_p[i] + M2*b[i] >= s[i])
    @constraint(model, [i=1:p], -β[i]/σ_X_p[i] + M2*(1-b[i]) >= s[i])
    
    @objective(model, Min, 0.5*(- 2*b_p'β + β'*M_p*β + (1/γ)*sum(β[j]^2 for j=1:p)))
    
    optimize!(model)
    
    s_val = Int.(value.(s))
    b_val = Int.(value.(b))
    
    return vcat(s_val .* (b_val .== 0), s_val .* (b_val .== 1))
end

compute_warm_start_primal (generic function with 1 method)

In [33]:
"""
WarmStart ∈ { :None, :RidgeStart, :PrimalStart }
"""
function compute_dual(X_p, y, k, γ, σ_X_p; LogFile=nothing, WarmStart=:None, TimeLimit=-1)
    
    # Get dimensions
    n, p = size(X_p)
    
    # Constant
    C = 0.5*y'y
 
    # Compute data in p dimensions
    M_p = X_p'X_p
    b_p = X_p'y

    # Compute data in 2p dimensions
    M = [M_p -M_p; -M_p  M_p]
    b = [b_p; -b_p];
    σ_X = [σ_X_p; σ_X_p]
    
    # Outer problem
    miop = create_gurobi_model(;LogFile=LogFile, TimeLimit=TimeLimit)
    @variable(miop, s[1:2p], Bin)
    @variable(miop, t >= -C)
    @constraint(miop, sum(s) <= k)
    @constraint(miop, [i=1:p], s[i]+s[p+i]<=1)
    
    # Initial solution
    if (WarmStart == :RidgeStart)
        s0 = zeros(2p)
        β_ridge = inv(I/γ + M_p)*b_p
        s0[findall(x -> x>0, β_ridge)] .= 1.0
        s0[findall(x -> x<0, β_ridge) .+ p] .= 1.0
    
    elseif (WarmStart == :PrimalStart)
        s0 = compute_warm_start_primal(M_p, b_p, k, γ, σ_X_p, 20; LogFile=LogFile) # TODO: change time limit
    else 
        s0 = zeros(2p)
        s0[1:k] .= 1
    end

    # Initial cut
    supp, supp_c = get_support(s0)
    D_s, b_s, σ_X_s = inv(I/γ + M[supp, supp]), b[supp], σ_X[supp]
    λ_s0, g_s0 = g_s(D_s, b_s, σ_X_s; GD=true)
    ∇g_s0 = ∇g_s(supp, supp_c, b, M, λ_s0, D_s, σ_X_s, γ)
    offset = g_s0 - sum(∇g_s0[supp])
    
    @constraint(miop, t >= ∇g_s0's + offset)
    @objective(miop, Min, t + C)
    
    # Cutting planes    
    function outer_approximation(cb_data)
        
        # Get feasible solution
        s_val = [callback_value(cb_data, s[i]) for i=1:2p]
        
        # Generate cut
        supp, supp_c = get_support(s_val)
        D_s, b_s, σ_X_s = inv(I/γ + M[supp, supp]), b[supp], σ_X[supp]
        λ_s_val, g_s_val = g_s(D_s, b_s, σ_X_s; GD=true)
        ∇g_s_val = ∇g_s(supp, supp_c, b, M, λ_s_val, D_s, σ_X_s, γ)
        offset = g_s_val - sum(∇g_s_val[supp])
        
        con = @build_constraint(t >= ∇g_s_val's + offset)
        MOI.submit(miop, MOI.LazyConstraint(cb_data), con)
        
    end
    
    MOI.set(miop, MOI.LazyConstraintCallback(), outer_approximation)
    optimize!(miop)
    
    s_opt = value.(miop[:s])
    
    supp, supp_c = get_support(s_opt)
    D_s, b_s, σ_X_s = inv(I/γ + M[supp, supp]), b[supp], σ_X[supp]
    λ_s_opt, g_s_opt = g_s(D_s, b_s, σ_X_s; GD=true)
    β = zeros(2p)
    β[supp] = D_s*(λ_s_opt + b_s)
    
    return objective_value(miop), β[1:p] - β[p+1:end]
end

compute_dual

In [34]:
@time primal_obj, β_primal = compute_primal(X_p, y, k, γ, σ_X_p, TimeLimit=120, LogFile="debug.txt")

Set Gurobi TimeLimit.
LogFile: debug.txt.
Gurobi Optimizer version 9.0.3 build v9.0.3rc0 (mac64)
Optimize a model with 401 rows, 300 columns and 1100 nonzeros
Model fingerprint: 0x526306df
Model has 5050 quadratic objective terms
Variable types: 100 continuous, 200 integer (200 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+03]
  Objective range  [1e-02, 1e+00]
  QObjective range [5e-06, 1e+02]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+01, 1e+03]
Found heuristic solution: objective 9.3237205
Presolve time: 0.00s
Presolved: 401 rows, 300 columns, 1100 nonzeros
Presolved model has 5050 quadratic objective terms
Variable types: 100 continuous, 200 integer (200 binary)

Root relaxation: objective 9.233698e+00, 486 iterations, 0.03 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    9.23370    0  159    9.32372    9.23370  0.97%     -    0

(9.276708834665603, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0115632  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00842449])

In [35]:
@time dual_obj, β_dual = compute_dual(X_p, y, k, γ, σ_X_p, TimeLimit=120, LogFile="debug.txt")

Set Gurobi TimeLimit.
LogFile: debug.txt.
Gurobi Optimizer version 9.0.3 build v9.0.3rc0 (mac64)
Optimize a model with 102 rows, 201 columns and 601 nonzeros
Model fingerprint: 0xa89f65ed
Variable types: 1 continuous, 200 integer (200 binary)
Coefficient statistics:
  Matrix range     [4e-07, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [9e+00, 9e+00]
  RHS range        [4e-04, 1e+01]
Presolve time: 0.00s
Presolved: 102 rows, 201 columns, 601 nonzeros
Variable types: 1 continuous, 200 integer (200 binary)

Root relaxation: objective 9.275754e+00, 22 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    9.27612    0    2          -    9.27612      -     -    0s
     0     2    9.27612    0    2          -    9.27612      -     -    0s
H    6     7                       9.2897739    9.27667  0.14%   2.7    0s
*   15     5               

(9.276735332741374, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.011561  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00842449])

In [36]:
hcat(β_true, β_primal, β_dual)

100×3 Array{Float64,2}:
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 1.0  0.0115632   0.011561  
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 ⋮                          
 0.0  0.0         0.0       
 1.0  0.00842449  0.00842449
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 0.0  0.0         0.0       
 1.0  0.00842449  0.00842449

_____
## 5. Experiences

In [44]:
# Parameters
n_train = 10000
n = 2*n_train
NR = 0.05
α = 0.05;
t_max = 5*60 # 30 min max per solving

300

In [47]:
csv_path = "results/2021_03_09-19h00.csv"
logfile = "results/2021_03_09-19h00.log"

write_list(csv_path, ["Algo", "Seed", "n", "p", "k_true", "k", "γ", 
                      "NR", "α", "obj", "R2", "OR2", "t_algo", "t_data", "t_variance"])

In [None]:
for seed ∈ [35]
    for p ∈ [100, 500, 1000, 2500, 5000]

        # True sparsity
        k_true = 10

        # Generate data
        t_data = @elapsed (X_train, y_train), _, (X_test, y_test), β_true = generate_synthetic_data(n, p, k_true, NR, seed=seed);

        # Significance
        t_variance = @elapsed t_α = get_t_α(n_train, p, α)
        
        # Robustness
        for γ ∈ [p/sqrt(n_train)]
            
            # Variance
            t_variance += @elapsed σ_X = t_α * get_σ_X(X_train, y_train, γ)
        
            # Estimated sparsity
            Random.seed!(seed)
            k = 12
            
            # Dual
            t_dual = @elapsed obj_dual, β_dual = compute_dual(X_train, y_train, k, γ, σ_X; LogFile=logfile, TimeLimit=t_max);
            R2_dual = get_R2(X_train*β_dual, y_train, y_train)
            OR2_dual = get_R2(X_test*β_dual, y_test, y_train)
            write_list(csv_path, ["dual", seed, n_train, p, k_true, k, γ, 
                                  NR, α, obj_dual, R2_dual, OR2_dual, t_dual, t_data, t_variance])
            
            # Primal
            t_primal = @elapsed obj_primal, β_primal = compute_primal(X_train, y_train, k, γ, σ_X; LogFile=logfile, TimeLimit=t_max);
            R2_primal = get_R2(X_train*β_primal, y_train, y_train)
            OR2_primal = get_R2(X_test*β_primal, y_test, y_train)
            write_list(csv_path, ["primal", seed, n_train, p, k_true, k, γ, 
                                  NR, α, obj_primal, R2_primal, OR2_primal, t_primal, t_data, t_variance])
            
        end
    end
end

Set Gurobi TimeLimit.
LogFile: results/2021_03_09-19h00.log.
Gurobi Optimizer version 9.0.3 build v9.0.3rc0 (mac64)
Optimize a model with 102 rows, 201 columns and 601 nonzeros
Model fingerprint: 0x1b31c87a
Variable types: 1 continuous, 200 integer (200 binary)
Coefficient statistics:
  Matrix range     [1e-04, 3e+01]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+04, 1e+04]
  RHS range        [1e+00, 3e+01]
Presolve time: 0.00s
Presolved: 102 rows, 201 columns, 601 nonzeros
Variable types: 1 continuous, 200 integer (200 binary)

Root relaxation: objective 1.459672e+04, 20 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0 14663.8707    0    2          - 14663.8707      -     -    0s
H    0     0                    14850.270097 14663.8707  1.26%     -    0s
     0     2 14696.6736    0   10 14850.2701 14696.6736  1.03%     -    0s
* 1030  