In [None]:
import os
import sys
sys.path.append(os.path.join(os.getcwd(), os.pardir))

import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import PolynomialFeatures
from statsmodels.tsa.api import SimpleExpSmoothing

from bandits.arms import GaussianMixtureArm
from bandits.context import Context
from bandits.banditPlayer import BanditPlayer
from bandits.banditLearner import SGDLearner, XGBLearner, OptimisticSGDLearner

In [None]:
diag_down = np.array([[-1, 1], [1, -1]])
diag_up = np.array([[-1, -1], [1, 1]])
left = np.array([[-1, -1], [-1, 1]])
right = np.array([[1, -1], [1, 1]])
top = np.array([[-1, 1], [1, 1]])
bottom = np.array([[-1, -1], [1, -1]])

a0 = GaussianMixtureArm(
    centres = np.array(diag_up), 
    stds= np.array([1.2, 1.2]),
    factor=1,
    noise=.01,
)
a1 = GaussianMixtureArm(
    centres = np.array(diag_down), 
    stds= np.array([1, 1]),
    factor=1,
    noise=.01,
)



In [None]:
n = 20000

In [None]:
player = BanditPlayer({"a0": a0, "a1": a1})
context = Context(n, 2)
learner1 = SGDLearner(2)
regrets1 = []
learner2 = XGBLearner(n_learners=2, n_trees=100)
regrets2 = []
learner3 = OptimisticSGDLearner(2, alpha=0.995)
regrets3 = []
pe31 = []
pe32 = []

# SGD learner needs higher order features
poly = PolynomialFeatures(4)

for a in learner1.learners.keys():
    learner1.update(poly.fit_transform(context.contexts[0, :].reshape([1, -1])), a, [0])
    learner2.update(context.contexts[0, :].reshape([1, -1]), a, [0])
    learner3.update(poly.fit_transform(context.contexts[0, :].reshape([1, -1])), a, [0])


In [None]:
np.random.seed(270)
for i in range(context.contexts.shape[0]):
    state = context.contexts[i, :].reshape([1, -1])
    
    if (i % 1000) == 0:
        print(i)
    
    action1 = learner1.choose(poly.fit_transform(state))
    reward1, regret1 = player.play_one(state, action1)
    learner1.update(poly.fit_transform(state), action1, [reward1])
    regrets1.append(regret1)
    
    action2 = learner2.choose(state)
    reward2, regret2 = player.play_one(state, action2)
    learner2.update(state, action2, [reward2])
    regrets2.append(regret2)
    
    action3 = learner3.choose(poly.fit_transform(state))
    reward3, regret3 = player.play_one(state, action3)
    learner3.update(poly.fit_transform(state), action3, [reward3])
    regrets3.append(regret3)
    pe31.append(learner3.pred_error["a0"])
    pe32.append(learner3.pred_error["a1"])
    
    
    if i == 4000: # Random changes to one centre
        player.arms["a0"].centres = right
    
    if i == 8000: # Random changes to the other centre
        player.arms["a1"].centres = diag_up   
    
    if i == 12000:  # Switch arms
        player.arms["a_temp"] = player.arms.pop("a1")
        player.arms["a1"] = player.arms.pop("a0")      
        player.arms["a0"] = player.arms.pop("a_temp")
  

In [None]:
frac_opt_actions1 = SimpleExpSmoothing(np.array(regrets1) == 0).fit(smoothing_level=0.006).fittedvalues
frac_opt_actions2 = SimpleExpSmoothing(np.array(regrets2) == 0).fit(smoothing_level=0.006).fittedvalues
frac_opt_actions3 = SimpleExpSmoothing(np.array(regrets3) == 0).fit(smoothing_level=0.006).fittedvalues

In [None]:
fig = go.Figure(layout_title="Fraction of optimal choice (Exponentially smoothed)",
               layout_xaxis_title="Index",
               layout_yaxis_title="Cumulative fraction"
               )
fig.add_trace(go.Scatter(x=np.arange(len(regrets1))+1, y=frac_opt_actions1,
                        line={"color": "#aa9900", "dash": "solid"}, 
                         name=f"SGDLearner", mode="lines"))
fig.add_trace(go.Scatter(x=np.arange(len(regrets2))+1, y=frac_opt_actions2,
                        line={"color": "#9900aa", "dash": "solid"}, 
                         name=f"XGBLearner", mode="lines"))
fig.add_trace(go.Scatter(x=np.arange(len(regrets3))+1, y=frac_opt_actions3,
                        line={"color": "#33bb77", "dash": "solid"}, 
                         name=f"OptimisticSGDLearner", mode="lines"))
fig.add_trace(go.Scatter(x=np.arange(len(regrets3))+1, y=pe31,
                        line={"color": "#33bb77", "dash": "dash"}, 
                         name=f"Pred Error 1", mode="lines"))
fig.add_trace(go.Scatter(x=np.arange(len(regrets3))+1, y=pe32,
                        line={"color": "#33bb77", "dash": "dot"}, 
                         name=f"Pred Error 2", mode="lines"))
fig.layout={'yaxis': {"range":[0, 1]}}
fig.show()