In [405]:
import pandas as pd
import pickle
import numpy as np
import math
from sklearn import linear_model
warf = pd.read_pickle('../data/cleaned_warfarin.pkl')

true_values = []
with open('../data/true_labels.pkl', 'rb') as f:
    true_values = pickle.load(f)

In [406]:
len(warf)

5528

In [407]:
len(list(warf))

94

In [408]:
train_len = math.ceil(len(true_values)*0.8)
test_len = len(true_values) - train_len

In [409]:
test_len

1105

## Training

In [410]:
clf = linear_model.Lasso(alpha=0.0007, fit_intercept=False)

x_values = []
true_labels = []
indices = np.random.permutation(list(range(len(warf))))
for i in indices:
    feature = np.array(warf.iloc[i], dtype=float)
    x_values.append(feature)
    if true_values[i] == "low": true_labels.append(0)
    elif true_values[i] == "medium": true_labels.append(1)
    else: true_labels.append(2)


clf.fit(np.array(x_values[:train_len]), true_labels[:train_len])

Lasso(alpha=0.0007, copy_X=True, fit_intercept=False, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

## Testing

In [411]:
prediction = np.rint(clf.predict(np.array(x_values[train_len:])))
correct_count = 0
count = 0
for j in range(len(prediction)):
    if prediction[j] == true_labels[train_len + j]:
        correct_count += 1
    count += 1

In [412]:
correct_count/count

0.7149321266968326

## Lasso Bandits

In [None]:
# Best: 0.6009049773755656

In [None]:
# hyper parameters
h = 1
lambda2_0 = 0.05
lambda1 = 0.05
lambda2 = lambda2_0

In [419]:
T = {0:{"index": [], "x":[], "y":[]}, 
     1:{"index": [], "x":[], "y":[]}, 
     2:{"index": [], "x":[], "y":[]}}
S = {0:{"index": [], "x":[], "y":[]}, 
     1:{"index": [], "x":[], "y":[]}, 
     2:{"index": [], "x":[], "y":[]}}

q = 0
for i in range(3):
    for j in range(q*(i), q*(i+1), 1):
        for n in range(11):
            index = (2**n - 1)*3*q + j
            if index >= train_len: break
            T[i]["index"].append(index)
            T[i]["x"].append(x_values[index])
            T[i]["y"].append(true_labels[index])

# train
accuracy_list = []
regret_list = []
pi_ = {}
y_ = []
for t in range(train_len):
    if t in T[0]["index"]:
        pi_[t] = 0
    elif t in T[1]["index"]:
        pi_[t] = 1
    elif t in T[2]["index"]:
        pi_[t] = 2
    else:         
        clf = linear_model.Lasso(alpha=lambda1, fit_intercept=False, max_iter=100000)
        clf_new = linear_model.Lasso(alpha=lambda2, fit_intercept=False, max_iter=100000)
        beta = {}
        beta_new = {}
        
        # select T up to Now
        T_curr = {0:{"index": [], "x":[], "y":[]}, 
                 1:{"index": [], "x":[], "y":[]}, 
                 2:{"index": [], "x":[], "y":[]}}
        for k in range(3):
            for i in range(len(T[k]["index"])):
                if T[k]["index"][i] > t: break
                T_curr[k]["index"].append(T[k]["index"][i])
                T_curr[k]["x"].append(T[k]["x"][i])
                T_curr[k]["y"].append(T[k]["y"][i])
            
        # get betas
        for i in range(3):
            if len(T_curr[i]["x"]) >= 1:
                clf.fit(np.array(T_curr[i]["x"]), np.array(T_curr[i]["y"]))
                beta[i] = clf.coef_
            else:
                beta[i] = np.zeros(len(x_values[t]))
            
            if len(S[i]["x"]) >= 1:
                clf_new.fit(np.array(S[i]["x"]), np.array(S[i]["y"]))
                beta_new[i] = clf_new.coef_
            else:
                beta_new[i] = np.zeros(len(x_values[t]))
        
        # get K_hat
        K = []
        max_value = 0
        for j in range(3):
                val = np.matmul(x_values[t].T, beta[j]) - h/2
                if val > max_value:
                    max_value = val            
        for i in range(3):
            if np.matmul(x_values[t].T, beta[i]) >= max_value:
                K.append(i)
        
        # find best arm
        max_arm = 0
        max_pi_value = 0
        for k in K:
            val = np.matmul(x_values[t].T, beta_new[k])
            if val == max_pi_value:
                if np.random.choice(a=[False, True]): max_arm = k
            if val > max_pi_value:
                max_arm = k
                max_pi_value = val
        pi_[t] = max_arm
    
    # update S
    S[pi_[t]]["index"].append(t)
    S[pi_[t]]["x"].append(x_values[t])
    S[pi_[t]]["y"].append(true_labels[t])
    
    # update lambda
    lambda2 = lambda2_0*np.sqrt((np.log(t+1) + np.log(len(x_values[t])))/(t+1))
    
    #calculate reward
    if pi_[t] == true_labels[t]:
        y_.append(0)
    else:
        y_.append(-1)
        
    if t%100 == 0: 
        print("Iter: " + str(t) +  " Score: " + \
              str((sum(y_) + len(y_))/len(y_))+ " pi: " + str(pi_[t])\
             + " Regret: " + str(-sum(y_)))
    
    accuracy_list.append((sum(y_) + len(y_))/len(y_))
    regret_list.append(-sum(y_))

Iter: 0 Score: 1.0 pi: 1 Regret: 0
Iter: 100 Score: 0.6633663366336634 pi: 1 Regret: 34
Iter: 200 Score: 0.6467661691542289 pi: 1 Regret: 71
Iter: 300 Score: 0.6445182724252492 pi: 1 Regret: 107
Iter: 400 Score: 0.6359102244389028 pi: 1 Regret: 146
Iter: 500 Score: 0.6307385229540918 pi: 1 Regret: 185
Iter: 600 Score: 0.627287853577371 pi: 1 Regret: 224
Iter: 700 Score: 0.6191155492154066 pi: 1 Regret: 267
Iter: 800 Score: 0.6154806491885143 pi: 1 Regret: 308
Iter: 900 Score: 0.6148723640399556 pi: 1 Regret: 347
Iter: 1000 Score: 0.6083916083916084 pi: 1 Regret: 392
Iter: 1100 Score: 0.6039963669391463 pi: 1 Regret: 436
Iter: 1200 Score: 0.6086594504579517 pi: 1 Regret: 470
Iter: 1300 Score: 0.6033820138355112 pi: 1 Regret: 516
Iter: 1400 Score: 0.6024268379728765 pi: 1 Regret: 557
Iter: 1500 Score: 0.6002664890073285 pi: 1 Regret: 600
Iter: 1600 Score: 0.5977514053716427 pi: 1 Regret: 644
Iter: 1700 Score: 0.5937683715461494 pi: 1 Regret: 691
Iter: 1800 Score: 0.5935591338145475 pi: 1

In [402]:
# test 
risk = 0
correct_count = 0
count = 0
for t in range(train_len, len(x_values), 1):
    count += 1
    K = []
    max_value = 0
    for j in range(3):
        val = np.matmul(x_values[t].T, beta[j]) - h/2
        if val > max_value:
            max_value = val            
    for i in range(3):
        if np.matmul(x_values[t].T, beta[i]) >= max_value:
            K.append(i)
        
    max_arm = 0
    max_pi_value = 0
    for k in K:
        val = np.matmul(x_values[t].T, beta_new[k])
        if val == max_pi_value:
            if k == 1: max_arm = k
            elif max_arm == 1: max_arm = 1
            elif np.random.choice(a=[False, True]): max_arm = k
        elif val > max_pi_value:
            max_arm = k
            max_pi_value = val
    
    pi_[t] = max_arm
    if max_arm == true_labels[t]: correct_count += 1
    if (max_arm == 'high' and true_values[i] == 'low') or (max_arm == 'low' and true_values[i] == 'high'):
        risk += 1
    

In [403]:
risk/count

0.0

In [404]:
correct_count/count

0.5574660633484163

In [None]:
with open('../results/lasso_regret.pkl', 'wb') as f:
    pickle.dump(regret_list, f)
with open('../results/lasso_accuracy.pkl', 'wb') as f:
    pickle.dump(accuracy_list, f)