In [1]:
import numpy as np
import pandas as pd
import gurobipy as gp
from tqdm import tqdm
import json
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso

In [3]:
training = pd.read_csv('training_data.csv')
testing = pd.read_csv('test_data.csv')

In [4]:
m = training.shape[1]-1
n = training.shape[0]
training['cv'] = np.random.choice(10,250,replace=True)
X = np.ones((n,m+1))
X[:,1:] = training.iloc[:,1:-1].values
y = training.iloc[:,0].values

In [5]:
results = pd.DataFrame(columns=['k', 'cv_index', 'betas','ssr'])

In [6]:
ks = [i for i in range(5,m+1,5)]
M = 15

In [8]:
sses = []
for k in ks:
    sse = 0
    for i in range(10):
        
        Q = np.zeros((2*m+1,2*m+1))
        Q[:m+1,:m+1] = X[training['cv']!=i].transpose() @ X[training['cv']!=i]
        c = np.zeros(2*m+1)
        c[:m+1] = -2*(y[training['cv']!=i].transpose() @ X[training['cv']!=i])

        A = np.zeros((2*m+1,2*m+1))
        b = np.zeros(2*m+1)

        for idx in range(m):
            A[idx, idx+1] = 1
            A[idx, idx+1+m] = M
            A[idx+m,idx+1] = 1
            A[idx+m,idx+1+m] = -M

        A[2*m,m+1:] = 1
        b[2*m] = k

        sense = ['>']*m+['<']*m+['<']
        lassoMod = gp.Model()
        lassoMod.setParam('TimeLimit', 500)
        lassoMod_x = lassoMod.addMVar(2*m+1,vtype=['C']*(m+1)+['B']*m,lb=[-M]*(m+1)+[0]*m)
        lassoMod_con = lassoMod.addMConstrs(A, lassoMod_x, sense, b)
        lassoMod.setMObjective(Q,c,0,sense=gp.GRB.MINIMIZE)

        lassoMod.Params.OutputFlag = 0 

        lassoMod.optimize()
        
        sse += sum((y[training['cv']==i] - X[training['cv']==i] @ lassoMod.x[:m+1])**2)
        results.loc[len(results)] = [k, i, json.dumps(lassoMod.x[:m+1]), sum((y[training['cv']==i] - X[training['cv']==i] @ lassoMod.x[:m+1])**2)]
        results.to_csv('results.csv')
        
    sses.append(sse)

Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  

In [9]:
sses

[907.4638468880851,
 724.8834938263229,
 774.0271682963153,
 795.0179291218362,
 788.3388921362716,
 805.995679625909,
 813.8431361988557,
 808.7878289652613,
 817.0314525699657,
 817.086060050166]

In [7]:
Q = np.zeros((2*m+1,2*m+1))
Q[:m+1,:m+1] = X.transpose() @ X
c = np.zeros(2*m+1)
c[:m+1] = -2*(y.transpose() @ X)

A = np.zeros((2*m+1,2*m+1))
b = np.zeros(2*m+1)

for idx in range(m):
    A[idx, idx+1] = 1
    A[idx, idx+1+m] = M
    A[idx+m,idx+1] = 1
    A[idx+m,idx+1+m] = -M

A[2*m,m+1:] = 1
b[2*m] = 10

sense = ['>']*m+['<']*m+['<']
lassoMod = gp.Model()
lassoMod.setParam('TimeLimit', 500)
lassoMod_x = lassoMod.addMVar(2*m+1,vtype=['C']*(m+1)+['B']*m,lb=[-M]*(m+1)+[0]*m)
lassoMod_con = lassoMod.addMConstrs(A, lassoMod_x, sense, b)
lassoMod.setMObjective(Q,c,0,sense=gp.GRB.MINIMIZE)

lassoMod.Params.OutputFlag = 0 

lassoMod.optimize()

Academic license - for non-commercial use only - expires 2022-08-24
Using license file /Users/agnes/gurobi.lic
Changed value of parameter TimeLimit to 500.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf


In [8]:
X_test = np.ones((50,m+1))
X_test[:,1:] = testing.iloc[:,1:].values
testing['predicted'] = X_test @ lassoMod.x[:m+1]

In [9]:
sum((testing['predicted'] - testing['y'])**2)

116.82719822762624

In [30]:
lasso = GridSearchCV(Lasso(),{'alpha':list(np.linspace(0,1,100))}, cv=10).fit(X,y)

In [31]:
lasso.best_params_

{'alpha': 0.07564633275546291}

In [32]:
np.linspace(0,1,100)

array([0.        , 0.01010101, 0.02020202, 0.03030303, 0.04040404,
       0.05050505, 0.06060606, 0.07070707, 0.08080808, 0.09090909,
       0.1010101 , 0.11111111, 0.12121212, 0.13131313, 0.14141414,
       0.15151515, 0.16161616, 0.17171717, 0.18181818, 0.19191919,
       0.2020202 , 0.21212121, 0.22222222, 0.23232323, 0.24242424,
       0.25252525, 0.26262626, 0.27272727, 0.28282828, 0.29292929,
       0.3030303 , 0.31313131, 0.32323232, 0.33333333, 0.34343434,
       0.35353535, 0.36363636, 0.37373737, 0.38383838, 0.39393939,
       0.4040404 , 0.41414141, 0.42424242, 0.43434343, 0.44444444,
       0.45454545, 0.46464646, 0.47474747, 0.48484848, 0.49494949,
       0.50505051, 0.51515152, 0.52525253, 0.53535354, 0.54545455,
       0.55555556, 0.56565657, 0.57575758, 0.58585859, 0.5959596 ,
       0.60606061, 0.61616162, 0.62626263, 0.63636364, 0.64646465,
       0.65656566, 0.66666667, 0.67676768, 0.68686869, 0.6969697 ,
       0.70707071, 0.71717172, 0.72727273, 0.73737374, 0.74747

In [33]:
10**np.linspace(-3,3,100)

array([1.00000000e-03, 1.14975700e-03, 1.32194115e-03, 1.51991108e-03,
       1.74752840e-03, 2.00923300e-03, 2.31012970e-03, 2.65608778e-03,
       3.05385551e-03, 3.51119173e-03, 4.03701726e-03, 4.64158883e-03,
       5.33669923e-03, 6.13590727e-03, 7.05480231e-03, 8.11130831e-03,
       9.32603347e-03, 1.07226722e-02, 1.23284674e-02, 1.41747416e-02,
       1.62975083e-02, 1.87381742e-02, 2.15443469e-02, 2.47707636e-02,
       2.84803587e-02, 3.27454916e-02, 3.76493581e-02, 4.32876128e-02,
       4.97702356e-02, 5.72236766e-02, 6.57933225e-02, 7.56463328e-02,
       8.69749003e-02, 1.00000000e-01, 1.14975700e-01, 1.32194115e-01,
       1.51991108e-01, 1.74752840e-01, 2.00923300e-01, 2.31012970e-01,
       2.65608778e-01, 3.05385551e-01, 3.51119173e-01, 4.03701726e-01,
       4.64158883e-01, 5.33669923e-01, 6.13590727e-01, 7.05480231e-01,
       8.11130831e-01, 9.32603347e-01, 1.07226722e+00, 1.23284674e+00,
       1.41747416e+00, 1.62975083e+00, 1.87381742e+00, 2.15443469e+00,
      