In [29]:
import pandas as pd
import pickle
import numpy as np
import math
from sklearn import linear_model
warf = pd.read_pickle('../data/cleaned_warfarin.pkl')

true_values = []
with open('../data/true_labels.pkl', 'rb') as f:
    true_values = pickle.load(f)

In [30]:
len(warf)

5528

In [31]:
len(list(warf))

208

In [32]:
train_len = math.ceil(len(true_values)*0.8)
test_len = len(true_values) - train_len

## Training

In [33]:
clf = linear_model.Lasso(alpha=0.01, fit_intercept=False)

x_values = []
true_labels = []
indices = np.random.permutation(list(range(len(warf))))
for i in indices:
    feature = np.array(warf.iloc[i], dtype=float)[1:]
    x_values.append(feature)
    if true_values[i] == "low": true_labels.append(1.0)
    elif true_values[i] == "medium": true_labels.append(2.0)
    else: true_labels.append(3.0)


clf.fit(np.array(x_values[:train_len]), true_labels[:train_len])

Lasso(alpha=0.01, copy_X=True, fit_intercept=False, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

## Testing

In [34]:
prediction = np.rint(clf.predict(np.array(x_values[train_len:])))
correct_count = 0
count = 0
for j in range(len(prediction)):
    if prediction[j] == true_labels[train_len + j]:
        correct_count += 1
    count += 1

In [35]:
correct_count/count

0.676923076923077

## Lasso Bandits

In [None]:
lambda2_0 = 0.05
lambda1 = 0.05
lambda2 = lambda2_0

T = {1:{"index": [], "x":[], "y":[]}, 
     2:{"index": [], "x":[], "y":[]}, 
     3:{"index": [], "x":[], "y":[]}}
S = {1:{"index": [], "x":[], "y":[]}, 
     2:{"index": [], "x":[], "y":[]}, 
     3:{"index": [], "x":[], "y":[]}}

h = 5
q = 1
# 50% q = 20, h = 5, n = 0

for i in range(1, 4, 1):
    for j in range(q*(i - 1) + 1, q*i, 1):
        for n in range(12):
            index = (2**n - 1)*3*q + j
            if index >= train_len: break
            T[i]["index"].append(index)
            T[i]["x"].append(x_values[index])
            T[i]["y"].append(true_labels[index])
        
# train
pi_ = {}
y_ = []
for t in range(train_len):
#     index = np.random.randint(train_len)
    if t in T[1]["index"]:
        pi_[t] = 1
    elif t in T[2]["index"]:
        pi_[t] = 2
    elif t in T[3]["index"]:
        pi_[t] = 3
    else:         
        clf = linear_model.Lasso(alpha=lambda1/2, fit_intercept=False, max_iter=50000)
        clf_new = linear_model.Lasso(alpha=lambda2/2, fit_intercept=False, max_iter=50000)
        beta = {}
        beta_new = {}
        
        # get betas
        for i in range(1, 4, 1):
            if len(T[i]["x"]) >= 1:
                clf.fit(np.array(T[i]["x"]), np.array(T[i]["y"]))
                beta[i] = clf.coef_
            else:
                beta[i] = np.zeros(len(x_values[t]))
            
            if len(S[i]["x"]) >= 1:
                clf_new.fit(np.array(S[i]["x"]), np.array(S[i]["y"]))
                beta_new[i] = clf_new.coef_
            else:
                beta_new[i] = np.zeros(len(x_values[t]))
        
        # get K_hat
        K = []
        max_value = 0
        for j in range(1, 4, 1):
                val = np.matmul(x_values[t].T, beta[j]) - h/2
                if val > max_value:
                    max_value = val            
        for i in range(1, 4, 1):
            if np.matmul(x_values[t].T, beta[i]) >= max_value:
                K.append(i)
        
        # find best arm
        max_arm = 1
        max_pi_value = 0
        for k in K:
            val = np.matmul(x_values[t].T, beta_new[k])
            if val > max_pi_value:
                max_arm = k
                max_pi_value = val
        pi_[t] = max_arm
                
    S[pi_[t]]["index"].append(t)
    S[pi_[t]]["x"].append(x_values[t])
    S[pi_[t]]["y"].append(true_labels[t])
    
    # update everything
    lambda2 = lambda2_0*np.sqrt((np.log(t+1) + np.log(len(x_values[t]))) / (t + 1))
    if pi_[t] == true_labels[t]:
        y_.append(0)
    else:
        y_.append(-1)
        
    if t%100 == 0: 
        print("Iter: " + str(t) +  " Score: " + \
              str((sum(y_) + len(y_))/len(y_))+ " pi: " + str(pi_[t]))

Iter: 0 Score: 1.0 pi: 1






Iter: 100 Score: 0.2871287128712871 pi: 1










Iter: 200 Score: 0.27860696517412936 pi: 1




In [None]:
# test 
correct_count = 0
count = 0
for t in range(train_len, len(x_values), 1):
    count += 1
    K = []
    max_value = 0
    for j in range(1, 4, 1):
        val = np.matmul(x_values[t].T, beta[j]) - h/2
        if val >= max_value:
            max_value = val            
    for i in range(1, 4, 1):
        if np.matmul(x_values[t].T, beta[i]) >= max_value:
            K.append(i)
        
    max_arm = 1
    max_pi_value = 0
    for k in K:
        if np.matmul(x_values[t].T, beta_new[k]) >= max_pi_value:
            max_arm = k
            max_pi_value = np.matmul(x_values[t].T, beta_new[k])
    
    pi_[t] = max_arm
    if max_arm == true_labels[t]: correct_count += 1

In [None]:
correct_count/count