In [None]:
import logging
import os
import sys
sys.path.append(os.path.join(os.getcwd(), os.pardir))

import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import PolynomialFeatures
from statsmodels.tsa.api import SimpleExpSmoothing

from bandits.arms import GaussianMixtureArm
from bandits.context import Context
from bandits.banditPlayer import BanditPlayer
from bandits.banditLearner import (SGDLearner, XGBLearner, OptimisticSGDLearner, AdaptiveRandomForestLearner, 
                                   PerceptronLearner, BaggedLinearRegressor)

In [None]:
logging.basicConfig(level=logging.INFO)

In [None]:
diag_down = np.array([[-1, 1], [1, -1]])
diag_up = np.array([[-1, -1], [1, 1]])
left = np.array([[-1, -1], [-1, 1]])
right = np.array([[1, -1], [1, 1]])
top = np.array([[-1, 1], [1, 1]])
bottom = np.array([[-1, -1], [1, -1]])

a0 = GaussianMixtureArm(
    centres = np.array(diag_up), 
    stds= np.array([1, 1]),
    factor=1,
    noise=.05,
)
a1 = GaussianMixtureArm(
    centres = np.array(diag_down), 
    stds= np.array([1, 1]),
    factor=1,
    noise=.05,
)


def update_sudden(player: BanditPlayer, start: tuple, stop: tuple, n=4000):
    arms = list(player.arms.keys())
    assert len(start) == len(arms)
    assert len(stop) == len(arms)
    def update(i):
        if i == 0:
            print("init")
            for ii, centres in enumerate(start):
                player.arms[arms[ii]].centres = centres
        if i == n:
            print("switch")
            for ii, centres in enumerate(stop):
                player.arms[arms[ii]].centres = centres
    return update
            

def update_gradual(player: BanditPlayer, start: tuple, stop: tuple, n1=4000, n2=6000):
    arms = list(player.arms.keys())
    assert len(start) == len(arms)
    assert len(stop) == len(arms)
    def update(i):
        if i == 0:
            print("init")
            for ii, centres in enumerate(start):
                player.arms[arms[ii]].centres = centres
        if n1 <= i <= n2:
            if i == n1:
                print("Start shift")
            alpha = (i - n1) / (n2 - n1)
            for ii, centres in enumerate(stop):
                player.arms[arms[ii]].centres = (1 - alpha) * start[ii] + alpha * stop[ii]
    return update
            
    
def update_eps(learner, i):        
    if i < 300:
        learner.eps = 0.9
    learner.eps = 0.1
    
    


In [None]:
n = 4000

In [None]:
player = BanditPlayer({"a0": a0, "a1": a1})
context = Context(n, 2)
learner1 = SGDLearner(2)
regrets1 = []
poly = PolynomialFeatures(4)
for a in learner1.learners.keys():
    learner1.update(poly.fit_transform(context.contexts[0, :].reshape([1, -1])), a, [0])
learner4 = AdaptiveRandomForestLearner(2, n_trees=21)
regrets4 = []
learner5 = PerceptronLearner(2)
regrets5 = []
learner6 = BaggedLinearRegressor(2)
regrets6 = []

# SGD learner needs higher order features
poly = PolynomialFeatures(4)


In [None]:
np.random.seed(271)

update = update_sudden(player, (left, right), (right, left), n=int(n/2))
# update = update_gradual(player, (diag_down, diag_up), (diag_up, diag_down), n1=int(n/3), n2=int(2/3*n))

In [None]:
for i in range(context.contexts.shape[0]):
    state = context.contexts[i, :].reshape([1, -1])
    
    if (i % 1000) == 0:
        print(i)
    
    action1 = learner1.choose(poly.fit_transform(state))
    reward1, regret1 = player.play_one(state, action1)
    learner1.update(poly.fit_transform(state), action1, [reward1])
    regrets1.append(regret1)
    
    action4 = learner4.choose(state)
    reward4, regret4 = player.play_one(state, action4)
    learner4.update(state, action4, reward4)
    regrets4.append(regret4)
    update_eps(learner4, i)
    
    action5 = learner5.choose(state)
    reward5, regret5 = player.play_one(state, action5)
    learner5.update(state, action5, reward5)
    regrets5.append(regret5)
    update_eps(learner5, i)
    
    action6 = learner6.choose(state)
    reward6, regret6 = player.play_one(state, action6)
    learner6.update(state, action6, reward6)
    regrets6.append(regret6)
    update_eps(learner6, i)
    
    update(i)
  

In [None]:
frac_opt_actions1 = SimpleExpSmoothing(np.array(regrets1) == 0).fit(smoothing_level=0.006).fittedvalues
frac_opt_actions4 = SimpleExpSmoothing(np.array(regrets4) == 0).fit(smoothing_level=0.006).fittedvalues
frac_opt_actions5 = SimpleExpSmoothing(np.array(regrets5) == 0).fit(smoothing_level=0.006).fittedvalues
frac_opt_actions6 = SimpleExpSmoothing(np.array(regrets6) == 0).fit(smoothing_level=0.006).fittedvalues

In [None]:
fig = go.Figure(layout_title=f"Fraction of optimal choice (Exponentially smoothed), reversal at {int(n/2)}",
               layout_xaxis_title="Index",
               layout_yaxis_title="Cumulative fraction"
               )
fig.add_trace(go.Scatter(x=np.arange(len(regrets1))+1, y=frac_opt_actions1,
                        line={"color": "#aa9900", "dash": "solid"}, 
                         name=f"SGDLearner", mode="lines"))
fig.add_trace(go.Scatter(x=np.arange(len(regrets4))+1, y=frac_opt_actions4,
                        line={"color": "#167ab3", "dash": "solid"}, 
                         name=f"Adaptive RF learner", mode="lines"))
fig.add_trace(go.Scatter(x=np.arange(len(regrets5))+1, y=frac_opt_actions5,
                        line={"color": "#565659", "dash": "solid"}, 
                         name=f"Perceptron", mode="lines"))
fig.add_trace(go.Scatter(x=np.arange(len(regrets6))+1, y=frac_opt_actions6,
                        line={"color": "#12a9c1", "dash": "solid"}, 
                         name=f"Bagged Linear", mode="lines"))
fig.layout={'yaxis': {"range":[0, 1]}}
fig.show()