In [1]:
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import tempfile
import json
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
%matplotlib inline

import logging
logging.getLogger().setLevel(logging.INFO)

from banditpylib import trials_to_dataframe
from banditpylib.arms import GaussianArm
from banditpylib.bandits import MultiArmedBandit
from banditpylib.protocols import SinglePlayerProtocol
from banditpylib.learners.mab_fcbai_learner import ExpGap, LilUCBHeuristic, TrackAndStop, BatchRacing, BatchTrackAndStop
from banditpylib.utils import argmax_or_min_tuple, argmax_or_min, argmax_or_min_tuple_second

In [2]:
#means =  [0.7, 0.4, 0.1]
#means = [0.6, 0.5, 0.5]
#means = [0.5, 0.45, 0.43, 0.4] #w∗(µ1) = [0.417 0.390 0.136 0.057]
#means =  [0.6, 0.51, 0.5, 0.49, 0.48] #w∗(µ2) = [0.336, 0.251, 0.177, 0.132, 0.104]
#means =  [0.3, 0.21, 0.2, 0.19, 0.18] #w∗(µ2) = [0.336, 0.251, 0.177, 0.132, 0.104]

In [None]:
confidence = 0.95
means =  [0.7, 0.4, 0.1]
max_pulls=50000
std=1
batch_size=20
rho=0.5

batch_sizes = [20, 50, 100, 500, 1000]
rhos= [0.05, 0.25, 0.5, 0.75, 0.95]

# for batch_size in batch_sizes:
# for rho in rhos:
arms = [GaussianArm(mu=mean, std=std) for mean in means]
bandit = MultiArmedBandit(arms=arms)
learners = [
            #ExpGap(arm_num=len(arms), confidence=confidence, threshold=3,  name='Exponential-Gap Elimination'),
#             BatchRacing(arm_num=len(arms), confidence=confidence, max_pulls=max_pulls, 
#                             k=1, b=batch_size, r=int(batch_size/2), name='BatchRacing'),
#             BatchTrackAndStop(arm_num=len(arms), confidence=confidence, batch_size=batch_size, rho=rho, tracking_rule="C", 
#                          max_pulls=max_pulls,  name='Batch Track and stop C-Tracking'),

            LilUCBHeuristic(arm_num=len(arms), confidence=confidence, max_pulls=max_pulls, name='Heuristic lilUCB'),
            TrackAndStop(arm_num=len(arms), confidence=confidence, tracking_rule="C", 
                        max_pulls=max_pulls,  name='Track and stop C-Tracking'),
            TrackAndStop(arm_num=len(arms), confidence=confidence, tracking_rule="D", 
                        max_pulls=max_pulls,  name='Track and stop D-Tracking')

           ]

# For each setup, we run 20 trials
trials = 5
temp_file = tempfile.NamedTemporaryFile()

game = SinglePlayerProtocol(bandit=bandit, learners=learners)
# Start playing the game
# Add `debug=True` for debugging purpose
game.play(trials=trials, output_filename=temp_file.name)

trials_df = trials_to_dataframe(temp_file.name)
#trials_df.to_csv("csv_files/trial_df_rho_" + str(rho) + "_batch_size_" + str(batch_size) + "_.csv", index=False)  # `index=False` ensures that the index is not saved in the CSV.

INFO:absl:start Heuristic lilUCB's play with multi_armed_bandit
INFO:absl:Heuristic lilUCB's play with multi_armed_bandit runs 2.68 seconds.
INFO:absl:start Track and stop C-Tracking's play with multi_armed_bandit


In [None]:
# game = SinglePlayerProtocol(bandit=bandit, learners=learners)
# # Start playing the game
# # Add `debug=True` for debugging purpose
# game.play(trials=trials, output_filename=temp_file.name)

In [None]:
# trials_df = trials_to_dataframe(temp_file.name)
# trials_df.to_csv("trial_df_rho_" + str(rho) + "_batch_size_" + str(batch_size) + "_.csv", index=False)  # `index=False` ensures that the index is not saved in the CSV.

In [None]:
# trials_df.tail(100)

In [None]:
trials_df['confidence'] = confidence

In [None]:
fig = plt.figure()
ax = plt.subplot(111)
sns.barplot(x='confidence', y='total_actions', hue='learner', data=trials_df)
plt.ylabel('pulls')
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [None]:
fig = plt.figure()
ax = plt.subplot(111)
sns.barplot(x='confidence', y='rounds', hue='learner', data=trials_df)
plt.ylabel('rounds')
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))