In [15]:
import sys
sys.path.append('../')

import matplotlib.pyplot as plt
import numpy as np
import tempfile
import seaborn as sns
sns.set(style="darkgrid")

import logging
logging.getLogger().setLevel(logging.INFO)

from banditpylib import trials_to_dataframe
from banditpylib.bandits import LinearBandit
from banditpylib.arms import GaussianArm
from banditpylib.protocols import SinglePlayerProtocol
from banditpylib.learners.linear_bandit_learner import LinUCB
from banditpylib.learners.mab_learner import UCB

In [16]:
num_arms = 100
horizon = 2000
delta = 1 / horizon
lambda_reg = 1e-3

In [17]:
features = [np.random.normal(0, size=5) for _ in range(num_arms)]
for i in range(len(features)):
    features[i] = features[i] / np.linalg.norm(features[i])

theta = np.random.normal(0, size=5)

In [18]:
bandit = LinearBandit(features, theta)
learners = [LinUCB(features, delta, lambda_reg), UCB(num_arms)]

In [19]:
intermediate_horizons = list(range(0, horizon+1, 50))
temp_file = tempfile.NamedTemporaryFile()

In [20]:
game = SinglePlayerProtocol(bandit, learners)

In [None]:
game.play(200, temp_file.name, intermediate_horizons=intermediate_horizons, horizon=horizon)

INFO:absl:start linucb's play with linear_bandit
INFO:absl:linucb's play with linear_bandit runs 21.67 seconds.
INFO:absl:start ucb's play with linear_bandit


In [None]:
trials_df = trials_to_dataframe(temp_file.name)

In [None]:
trials_df.head()

In [None]:
sns.lineplot(x='total_actions', y='regret', hue='learner', data=trials_df)
plt.show()