In [1]:
from bandit import AgentBandit
from models.classification import Classification
from models.regression import Regression
from sklearn import preprocessing

import numpy as np
import pandas as pd
import torch.nn as nn
import utils.datasets as DB

In [2]:
datasets = DB.Datasets()
datasets.download_UCI()

2023-02-28 20:33:16,629 [INFO] Folder for UCI Mushroom found in the data folder. If the files are corrupted please delete the folder at location ../data/UCI_Mushroom and re-run this command


In [3]:
col_names = ['class','cap-shape','cap-surface','cap-color','bruises','odor','gill-attachment',
         'gill-spacing','gill-size','gill-color','stalk-shape','stalk-root',
         'stalk-surf-above-ring','stalk-surf-below-ring','stalk-color-above-ring','stalk-color-below-ring',
         'veil-type','veil-color','ring-number','ring-type','spore-color','population','habitat']
mushrooms = pd.read_csv('data/UCI_Mushroom/agaricus-lepiota.data', header=None, names=col_names)

labels = mushrooms.pop(mushrooms.columns[0]).to_numpy()
context = mushrooms.copy()

In [4]:
context.head()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surf-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-color,population,habitat
0,x,s,n,t,p,f,c,n,k,e,...,s,w,w,p,w,o,p,k,s,u
1,x,s,y,t,a,f,c,b,k,e,...,s,w,w,p,w,o,p,n,n,g
2,b,s,w,t,l,f,c,b,n,e,...,s,w,w,p,w,o,p,n,n,m
3,x,y,w,t,p,f,c,n,n,e,...,s,w,w,p,w,o,p,k,s,u
4,x,s,g,f,n,f,w,b,k,t,...,s,w,w,p,w,o,e,n,a,g


In [5]:
# Make labels numerical
le = preprocessing.LabelEncoder()
labels_numerical = le.fit_transform(labels)

# Make each feature numerical
for colname in context:
    le = preprocessing.LabelEncoder()
    context[colname] = le.fit_transform(context[colname])
context_numerical = context.to_numpy()
print(context_numerical.shape)

(8124, 22)


In [6]:
agent_args = {
        'model_class'    : Regression, 
        'model_name'     : 'test_rl',
        'input_dim'      : 22+2,
        'output_dim'     : 1,
        'hl_type'        : nn.Linear,
        'hl_units'       : 100,
        'batch_size'     : 64,
        'buffer_size'    : 4096,
        'scheduler'      : None,
        'data_contexts'  : context_numerical,
        'data_labels'    : labels_numerical,
        'training_steps' : 64,
        'n_samples'      : 1,
        'epsilon'        : 0.2,
        'learning_rate'  : 1e-3
}

In [7]:
agent = AgentBandit(**agent_args)

In [None]:
for epoch in range(5):
    print(epoch)
    mushroom = np.random.randint(0, len(labels))
    agent.learn(mushroom)

In [10]:
agent.cum_regret

[0, 35, 70, 65, 65]

In [12]:
agent.reward_buffer

array([  5., -35., -35., ...,   0.,   0.,   0.])