In [1]:
include("SparseRegression.jl")

sparseregression (generic function with 1 method)

In [2]:
function evaluate(X,y,β)
    norm(y - X * β)
end

evaluate (generic function with 1 method)

In [42]:
function lasso_reg(X, y, ρ)
    m = Model(solver=GurobiSolver(OutputFlag=0))

    p = size(X, 2)

    # Variables
    @variable(m, t)
    @variable(m, θ)
    @variable(m, β[1:p])
    @variable(m, z[1:p])

    # Constraints
    @constraint(m, norm(y - X * β) <= t)
    @constraint(m, sum(z[j] for j = 1:p) <= θ)
    @constraint(m, [j=1:p], z[j] >=  β[j])
    @constraint(m, [j=1:p], z[j] >= -β[j])

    # Objective
    @objective(m, Min, t + ρ * θ)

    solve(m)

    return getvalue(β)
end


lasso_reg (generic function with 1 method)

In [3]:
function detection_rate(pred, truth)
    k = countnz(truth)
    inds_true = find(truth)
    inds_pred = find(pred)
    A = length(intersect(inds_true, inds_pred)) / k
    F = length(setdiff(inds_pred, inds_true)) / length(inds_pred)
    return A, F
end

detection_rate (generic function with 1 method)

In [43]:
using JuMP, Gurobi

function sparseregressionbigM(X, y, k, M)
    m = Model(solver=GurobiSolver(OutputFlag=0, TimeLimit = 1500))
    
    p = size(X, 2)

    # Variables
    @variable(m, t)
    @variable(m, β[1:p])
    @variable(m, s[1:p], Bin)

    # Constraints
    @constraint(m, norm(y - X * β) <= t)
    @constraint(m, sum(s[j] for j = 1:p) <= k)
    @constraint(m, [j=1:p], β[j] <=  M * s[j])
    @constraint(m, [j=1:p], β[j] >= -M * s[j])

    # Objective
    @objective(m, Min, t)

    solve(m)

    return getvalue(β), getobjectivevalue(m)
end

sparseregressionbigM (generic function with 1 method)

In [4]:
using MLDataUtils
using DataFrames
using DataFramesMeta

In [5]:


myDataX = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseX2.csv",header=false);
myDataY = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseY2.csv",header=false)[1];
myDataB = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseB2.csv",header=false)[1];

betaTrue = Array{Float64}(myDataB);


# Part A)  Run the cutting plane algorithm provided for sparse linear regression, finding the best sparsity k through validation

In [36]:
using MLDataUtils
srand(1)
(train_X, train_Y), (test_X, test_Y) = splitobs(shuffleobs((myDataX,myDataY)), at=.5);
(train_X, train_Y), (vl_X, vl_Y) = splitobs(shuffleobs((train_X,train_Y)), at=.5);

In [37]:
X1 = Matrix(train_X) 
Y1 = Array{Float64}(train_Y)
V1 = Matrix(vl_X)
V2 = Array{Float64}(vl_Y)
best_score = Inf
best_k = Inf
ks = []
As = []
Fs = []
scores = []

for i in [4, 6, 8, 10,12]
    push!(ks, i)
    betaTS = sparseregression(X1,Y1,i)
    
    score = evaluate(V1, V2, betaTS)
    push!(scores, score)    
        if score < best_score
        best_score = score
        best_k = i
        end
     A,F= detection_rate(betaTS, betaTrue)  
    push!(As, A)
    push!(Fs, F)
end

@show best_score, best_k

tablebest = hcat(ks, scores, As, Fs)


println("I ran the cutting plane algorithm for best sparsity and determined
that k=$best_k had the best error through validation")


Stacktrace:
 [1] [1mdepwarn[22m[22m[1m([22m[22m::String, ::Symbol[1m)[22m[22m at [1m.\deprecated.jl:70[22m[22m
 [2] [1mabs[22m[22m[1m([22m[22m::RowVector{Float64,Array{Float64,1}}[1m)[22m[22m at [1m.\deprecated.jl:57[22m[22m
 [3] [1mfit_relaxation![22m[22m[1m([22m[22m::SparseRegression.SparseRegressor, ::Array{Float64,1}, ::Array{Float64,2}[1m)[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:86[22m[22m
 [4] [1mfit![22m[22m[1m([22m[22m::SparseRegression.SparseRegressor, ::Array{Float64,1}, ::Array{Float64,2}[1m)[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:167[22m[22m
 [5] [1msparseregression[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:226[22m[22m [inlined]
 [6] [1mmacro expansion[22m[22m at [1m.\In[37]:14[22m[22m [inlined]
 [7] [1manonymous[22m[22m at [1m.\<missing>:?[22m[22m
 [8] [1minclude_string[22m[22m[1m([22m[22m::String, ::String[1m)[22m[22m a

Academic license - for non-commercial use only
Optimize a model with 2 rows, 101 columns and 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [6e-04, 4e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [3e+00, 4e+00]
Presolve time: 0.00s
Presolved: 2 rows, 101 columns, 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)

Root relaxation: objective 0.000000e+00, 6 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.00000    0    2          -    0.00000      -     -    0s
H    0     0                       3.0168783    0.00000   100%     -    0s
H    0     0                       2.9301944    0.00000   100%     -    0s
H    0     0                       2.8371528    0.00000   100%     -    0s
     0     0    



Academic license - for non-commercial use only
Optimize a model with 2 rows, 101 columns and 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [7e-08, 5e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [2e+00, 8e+00]
Presolve time: 0.00s
Presolved: 2 rows, 101 columns, 200 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)

Root relaxation: objective 0.000000e+00, 6 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.00000    0    4          -    0.00000      -     -    0s
H    0     0                       1.6696983    0.00000   100%     -    0s
H    0     0                       1.4182496    0.00000   100%     -    0s
H    0     0                       1.0451342    0.00000   100%     -    0s
     0     0    



Academic license - for non-commercial use only
Optimize a model with 2 rows, 101 columns and 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [1e-08, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+01]
Presolve time: 0.00s
Presolved: 2 rows, 101 columns, 200 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Found heuristic solution: objective 0.6308883

Root relaxation: objective 2.867514e-01, 13 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.28675    0    2    0.63089    0.28675  54.5%     -    0s
     0     0    0.28854    0    3    0.63089    0.28854  54.3%     -    0s
     0     0    0.29144    0    6    0.63089    0.29144  53.8%     -    0s
     0     0    0.29630    0    6    0.63089



Academic license - for non-commercial use only
Optimize a model with 2 rows, 101 columns and 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [6e-08, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+01]
Presolve time: 0.00s
Presolved: 2 rows, 101 columns, 200 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)

Root relaxation: objective 2.363862e-01, 9 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.23639    0    2          -    0.23639      -     -    0s
H    0     0                       1.9494985    0.23639  87.9%     -    0s
     0     0    0.24679    0    4    1.94950    0.24679  87.3%     -    0s
H    0     0                       1.8403648    0.24679  86.6%     -    0s
H    0     0    



(best_score, best_k) = (5.592658754072292, 10)
I ran the cutting plane algorithm for best sparsity and determined
that k=10 had the best error through validation


In [45]:
 table = hcat(ks, As, Fs, scores)


5×4 Array{Real,2}:
  4  0.4  0.0   18.3967 
  6  0.6  0.0   13.9161 
  8  0.8  0.0    9.36528
 10  1.0  0.0    5.59266
 12  0.9  0.25   8.20702

Part A)  Ran the cutting plane algorithm provided for 
sparse linear regression, finding the best sparsity
k through validation, noted it was 10. See table above. 

In [67]:

#noted via validation set that the best k is 10
X1 = Matrix(test_X) 
Y1 = Array{Float64}(test_Y)

x1 = Matrix(train_X) 
y1 = Array{Float64}(train_Y)
betaTest = sparseregression(x1,y1,best_k)

score = evaluate(X1, Y1, betaTest)
A,F= detection_rate(betaTest, betaTrue)

@show score, A, F

Stacktrace:
 [1] [1mdepwarn[22m[22m[1m([22m[22m::String, ::Symbol[1m)[22m[22m at [1m.\deprecated.jl:70[22m[22m
 [2] [1mabs[22m[22m[1m([22m[22m::RowVector{Float64,Array{Float64,1}}[1m)[22m[22m at [1m.\deprecated.jl:57[22m[22m
 [3] [1mfit_relaxation![22m[22m[1m([22m[22m::SparseRegression.SparseRegressor, ::Array{Float64,1}, ::Array{Float64,2}[1m)[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:86[22m[22m
 [4] [1mfit![22m[22m[1m([22m[22m::SparseRegression.SparseRegressor, ::Array{Float64,1}, ::Array{Float64,2}[1m)[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:167[22m[22m
 [5] [1msparseregression[22m[22m[1m([22m[22m::Array{Float64,2}, ::Array{Float64,1}, ::Int64[1m)[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:226[22m[22m
 [6] [1minclude_string[22m[22m[1m([22m[22m::String, ::String[1m)[22m[22m at [1m.\loading.jl:515[22m[22m
 [7] [1minclude_string[22m[22m

Academic license - for non-commercial use only
Optimize a model with 2 rows, 101 columns and 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [1e-08, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+01]
Presolve time: 1.49s
Presolved: 2 rows, 101 columns, 200 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Found heuristic solution: objective 0.6308883

Root relaxation: objective 2.867514e-01, 13 iterations, 0.05 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.28675    0    2    0.63089    0.28675  54.5%     -    3s
     0     0    0.28854    0    3    0.63089    0.28854  54.3%     -    5s
     0     0    0.29144    0    6    0.63089    0.29144  53.8%     -    6s
     0     0    0.29630    0    6    0.63089



(score, A, F) = (8.369623111091634, 1.0, 0.0)


(8.369623111091634, 1.0, 0.0)

# Part B ) Compare and contrast this method with your primal approach using the big-M method from Homework 1. If things take too long to run, you can fix M = 1 and k to the best value found by the cutting plane approach. Please make sure that you do at least try other values of M to see how much they affect the solve time and solution quality.

In [10]:
using JuMP, Gurobi

function sparseregressionbigM(X, y, k, M)
    m = Model(solver=GurobiSolver(OutputFlag=0, TimeLimit = 1500))
    
    p = size(X, 2)

    # Variables
    @variable(m, t)
    @variable(m, β[1:p])
    @variable(m, s[1:p], Bin)

    # Constraints
    @constraint(m, norm(y - X * β) <= t)
    @constraint(m, sum(s[j] for j = 1:p) <= k)
    @constraint(m, [j=1:p], β[j] <=  M * s[j])
    @constraint(m, [j=1:p], β[j] >= -M * s[j])

    # Objective
    @objective(m, Min, t)

    solve(m)

    return getvalue(β), getobjectivevalue(m)
end

sparseregressionbigM (generic function with 1 method)

In [68]:
myDataX = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseX2.csv",header=false);
myDataY = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseY2.csv",header=false)[1];
myDataB = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseB2.csv",header=false)[1];

betaTrue = Array{Float64}(myDataB)

using MLDataUtils
srand(1)
(train_X, train_Y), (test_X, test_Y) = splitobs(shuffleobs((myDataX,myDataY)), at=.5);
(train_X, train_Y), (vl_X, vl_Y) = splitobs(shuffleobs((train_X,train_Y)), at=.5);

X1 = Matrix(train_X) 
Y1 = Array{Float64}(train_Y)
V1 = Matrix(vl_X)
V2 = Array{Float64}(vl_Y)
T1 = Matrix(test_X)
T2 = Array{Float64}(test_Y)

k = 10 # fix k for this run based on best solution from cutting planes. 
best_error = Inf 
best_M = 1 #initialize the best M 
for M = [0.5, 1, 2]
    @show M
    @show k
    @time β, obj = sparseregressionbigM(X1, Y1, k, M)
    error = evaluate(V1, V2, β)
    if error < best_error
        best_error = error 
        best_M = M
    @show obj
    end    
end



β, obj = sparseregressionbigM(X1, Y1, 10, best_M)
error = evaluate(T1, T2, β)
(A, F) = detection_rate(β, betaTrue)
@show error, A, F
   

    

M = 0.5
k = 10
Academic license - for non-commercial use only
  7.475839 seconds (21.81 k allocations: 2.841 MiB)
obj = 10.472474110100396
M = 1.0
k = 10
Academic license - for non-commercial use only
  2.811910 seconds (21.81 k allocations: 2.841 MiB)
obj = 2.934009834327178
M = 2.0
k = 10
Academic license - for non-commercial use only
404.797117 seconds (21.81 k allocations: 2.841 MiB)
Academic license - for non-commercial use only
(error, A, F) = (5.4228073748299055, 1.0, 0.0)


(5.4228073748299055, 1.0, 0.0)

In [69]:
best_M

1.0

Solution quality of primal (Big M) takes alot longer compared to cutting planes. Test error goes down as you INCREASE M but it also takes longer. 


In [12]:
using DataFrames
using DataFramesMeta

myDataX = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseX2.csv",header=false);
myDataY = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseY2.csv",header=false)[1];
myDataB = readtable("C:/Users/subha/Desktop/ML - HW 2/sparseB2.csv",header=false)[1];


In [70]:
# using DataFrames
# X = Matrix(myDataX) 
# y = Array{Float64}(myDataY)
# w = Array{Float64}(myDataB)
# M = 1

# for k = [5,10,15,20]
#     @show M
#     @show k
#     @time β, obj = sparseregressionbigM(X, y, k, M)
#     @show detection_rate(β, w)
#     @show obj
# end

Solution quality of primal (Big M) on the whole data set is not very accurate either. It takes quite a lot of time to run the code as well as the fact that the detection rates of the accuracy and the false positive rates are too varied.


# Part C)  Compare and contrast the cutting plane method with ridge and lasso regression (you can reuse your code from the first part of Homework 1). In particular you should compare the accuracy and false positive rates and the errors of each method.


In [40]:
function lasso_reg(X, y, ρ)
    m = Model(solver=GurobiSolver(OutputFlag=0))

    p = size(X, 2)

    # Variables
    @variable(m, t)
    @variable(m, θ)
    @variable(m, β[1:p])
    @variable(m, z[1:p])

    # Constraints
    @constraint(m, norm(y - X * β) <= t)
    @constraint(m, sum(z[j] for j = 1:p) <= θ)
    @constraint(m, [j=1:p], z[j] >=  β[j])
    @constraint(m, [j=1:p], z[j] >= -β[j])

    # Objective
    @objective(m, Min, t + ρ * θ)

    solve(m)

    return getvalue(β)
end





lasso_reg (generic function with 1 method)

In [71]:
# TEST OF LASSO #
srand(1)
(train_X, train_Y), (test_X, test_Y) = splitobs(shuffleobs((myDataX,myDataY)), at=.5)
(train_X, train_Y), (vl_X, vl_Y) = splitobs(shuffleobs((train_X,train_Y)), at=.5)
X1tr = Matrix(train_X) 
Y1tr = Array{Float64}(train_Y)
X1va = Matrix(vl_X)
Y1va = Array{Float64}(vl_Y)
X1te = Matrix(test_X) 
Y1te = Array{Float64}(test_Y)
best_rho = Inf
best_score = Inf
for rho in [.01, .1, .5, 1, 2]
    β = lasso_reg(X1tr, Y1tr,rho)
    score = evaluate(X1va,Y1va,β)
    if score < best_score
        best_score = score
        best_rho = rho
    end
    w = Array{Float64}(myDataB)
    A,F= detection_rate(β, w)
end
@show best_rho

betatest = lasso_reg(X1tr, Y1tr,best_rho)
score = evaluate(X1te, Y1te, betatest)
A,F= detection_rate(betatest, w)
@show A, F, score

Academic license - for non-commercial use only
Academic license - for non-commercial use only
Academic license - for non-commercial use only
Academic license - for non-commercial use only
Academic license - for non-commercial use only
best_rho = 1.0
Academic license - for non-commercial use only
(A, F, score) = (1.0, 0.9, 9.915520376579174)


(1.0, 0.9, 9.915520376579174)

In [16]:
function ridge_reg(X, y, ρ)
    m = Model(solver=GurobiSolver(OutputFlag=0))

    p = size(X, 2)

    # Variables
    @variable(m, t)
    @variable(m, θ)
    @variable(m, β[1:p])

    # Constraints
    @constraint(m, norm(y - X * β) <= t)
    @constraint(m, norm(β) <= θ)

    # Objective
    @objective(m, Min, t + ρ * θ)

    solve(m)

    return getvalue(β)
end


ridge_reg (generic function with 1 method)

In [72]:
# TEST OF Ridge #
srand(1)
(train_X, train_Y), (test_X, test_Y) = splitobs(shuffleobs((myDataX,myDataY)), at=.5)
(train_X, train_Y), (vl_X, vl_Y) = splitobs(shuffleobs((train_X,train_Y)), at=.5)
X1tr = Matrix(train_X) 
Y1tr = Array{Float64}(train_Y)
X1va = Matrix(vl_X)
Y1va = Array{Float64}(vl_Y)
X1te = Matrix(test_X) 
Y1te = Array{Float64}(test_Y)
best_rho = Inf
best_score = Inf
for rho in [.01, .1, .5, 1, 2]
    β = ridge_reg(X1tr, Y1tr,rho)
    score = evaluate(X1va,Y1va,β)
    if score < best_score
        best_score = score
        best_rho = rho
    end
    w = Array{Float64}(myDataB)
    A,F= detection_rate(β, w)
end
@show best_rho

betatest = ridge_reg(X1tr, Y1tr,best_rho)
score = evaluate(X1te, Y1te, betatest)
A,F= detection_rate(betatest, w)
@show A, F, score

Academic license - for non-commercial use only
Academic license - for non-commercial use only
Academic license - for non-commercial use only
Academic license - for non-commercial use only
Academic license - for non-commercial use only
best_rho = 0.01
Academic license - for non-commercial use only
(A, F, score) = (1.0, 0.9, 23.872598262708497)


(1.0, 0.9, 23.872598262708497)

The lasso and ridge regressions take a short amount of time, but are yielding high false positive rates. While they have a 100% accuracy rate, they are not very useful because these methods are not good at identifying features which are false positives. The score from the evaluate  functions are bigger for ridge and lasso. The order of the best methods are the sparse regression (cutting planes), lasso, and ridge. 

In [20]:
# X1te = Matrix(test_X) 
Y1te = Array{Float64,1}(test_Y)

100-element Array{Float64,1}:
 -3.44753 
  1.67705 
 -5.21783 
  8.5104  
  2.23237 
 -0.713251
 -0.366368
  0.826948
  1.22937 
  3.45285 
  2.34286 
  8.06128 
  2.71377 
  ⋮       
  4.66784 
  0.469463
 -4.05879 
  0.403874
 -1.06833 
  1.30128 
  8.82361 
 -0.346036
  0.673077
 -4.52402 
 -3.60963 
  0.92946 

# Part D ) Try running the regression on only the first 100 points in the dataset, then the first 90 points only, and so on for 80, 70, and so on down to 20. What do you observe?



In [48]:
k = 10 
y = myDataY
l,p= size(Matrix(myDataX))

w = Array{Float64}(myDataB)
ks = []
As = []
Fs = []
scores = []


for i in [100, 90, 80, 70, 60, 50, 40, 30, 20]
    push!(ks, i)
    X = Matrix(myDataX[1:i, 1:p])
    y = Array{Float64}(y[1:i])
    betaTS = sparseregression(X,y,k)    
    (A, F)= detection_rate(betaTS, w)
    score = evaluate(X,y,betaTS)
    push!(As, A)
    push!(Fs, F)
    push!(scores, score)
end



Stacktrace:
 [1] [1mdepwarn[22m[22m[1m([22m[22m::String, ::Symbol[1m)[22m[22m at [1m.\deprecated.jl:70[22m[22m
 [2] [1mabs[22m[22m[1m([22m[22m::RowVector{Float64,Array{Float64,1}}[1m)[22m[22m at [1m.\deprecated.jl:57[22m[22m
 [3] [1mfit_relaxation![22m[22m[1m([22m[22m::SparseRegression.SparseRegressor, ::Array{Float64,1}, ::Array{Float64,2}[1m)[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:86[22m[22m
 [4] [1mfit![22m[22m[1m([22m[22m::SparseRegression.SparseRegressor, ::Array{Float64,1}, ::Array{Float64,2}[1m)[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:167[22m[22m
 [5] [1msparseregression[22m[22m at [1mC:\Users\subha\Desktop\ML - HW 2\SparseRegression.jl:226[22m[22m [inlined]
 [6] [1mmacro expansion[22m[22m at [1m.\In[48]:16[22m[22m [inlined]
 [7] [1manonymous[22m[22m at [1m.\<missing>:?[22m[22m
 [8] [1minclude_string[22m[22m[1m([22m[22m::String, ::String[1m)[22m[22m a

Academic license - for non-commercial use only
Optimize a model with 2 rows, 101 columns and 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [5e-08, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [9e-01, 1e+01]
Presolve time: 0.00s
Presolved: 2 rows, 101 columns, 200 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Found heuristic solution: objective 0.5235982

Root relaxation: objective 3.660638e-01, 12 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.36606    0    2    0.52360    0.36606  30.1%     -    0s
     0     0    0.36728    0    4    0.52360    0.36728  29.9%     -    0s
     0     0    0.36736    0    4    0.52360    0.36736  29.8%     -    0s
     0     0    0.36868    0    5    0.52360



Academic license - for non-commercial use only
Optimize a model with 2 rows, 101 columns and 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [2e-06, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+01]
Presolve time: 0.00s
Presolved: 2 rows, 101 columns, 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)

Root relaxation: objective 2.384970e-01, 7 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.24770    0    2          -    0.24770      -     -    0s
H    0     0                       1.2498747    0.24770  80.2%     -    0s
     0     0    0.29074    0    5    1.24987    0.29074  76.7%     -    0s
H    0     0                       1.1241130    0.29074  74.1%     -    0s
     0     0    



Academic license - for non-commercial use only
Optimize a model with 2 rows, 101 columns and 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [4e-06, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+01]
Presolve time: 0.00s
Presolved: 2 rows, 101 columns, 201 nonzeros
Variable types: 1 continuous, 100 integer (100 binary)

Root relaxation: objective 0.000000e+00, 9 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.00000    0    2          -    0.00000      -     -    0s
H    0     0                       1.8535687    0.00000   100%     -    0s
H    0     0                       1.4751417    0.00000   100%     -    0s
     0     0    0.00000    0    3    1.47514    0.00000   100%     -    0s
     0     0    



In [49]:
 table = hcat(ks, As, Fs, scores)

9×4 Array{Real,2}:
 100  1.0  0.0  5.06335
  90  1.0  0.0  4.73272
  80  1.0  0.0  4.58688
  70  1.0  0.0  4.41564
  60  1.0  0.0  4.09611
  50  1.0  0.0  3.84741
  40  0.7  0.3  5.06185
  30  1.0  0.0  3.21666
  20  0.3  0.7  2.83241

Quality of solution goes down as you decrease the number of data points. The run time of the code increases as you decrease the number of points used, particularly at n = 40 and fewer where it hits the user limit. From n = 40, the time limit is reached so the solution may not have to optimality. 
The accuracy and false positive rates stay fairly accurate until n = 40, at which point they stop being accurate of 100% and false positive rate of 0%. 