From the code for the following article [Sub-sampling for Efficient Non-Parametric Bandit Exploration](https://arxiv.org/abs/2010.14323)

In [1]:
from IPython.core.display import HTML
display(HTML('<style>.container { width:95% !important; } </style>'))

%load_ext autoreload
%autoreload 2

In [2]:
from MAB import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Parameters and aliases for all algorithms

In [3]:
# Enter the parameters of each algorithms
param = {'non_parametric_TS': {}}

mapping = {'B': BetaBernoulliMAB, 'G': GaussianMAB, 'LG': LogGaussianMAB, 'Exp': ExponentialMAB, 'TG': TruncGaussianMAB}
mapping_name = {'B': 'Bernoulli', 'G': 'Gaussian', 'LG': 'LogGaussian', 'Exp': 'Exponential', 'TG': 'Truncated Gaussian'}

### Bounded arms

In [4]:
bandit = 'B'  # 'B', 'G', 'LG', 'Exp', 'TG'
p = [0.7, 0.5]  # parameters for the arms distribution
T = 1000  # Horizon
N = 25  # Number of MC runs
step = 25  # If results are saved trajectories are stored for all rounds such that t%step=0

In [None]:
model = mapping[bandit](p)

r_1, _ = model.MC_regret('non_parametric_TS', N, T, {'upper_bound': 1.0}, step)
r_2, _ = model.MC_regret('non_parametric_TS', N, T, {'upper_bound': None}, step)

all_r = [r_1, r_2]
all_r.append(model.Cp * np.log(1 + np.arange(T)))

res = pd.DataFrame(all_r).T
res.columns = ['NP-TS', 'NP-TS (unknown bound)', 'lower bound']
res['lower bound'].iloc[0] = 0

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
Computing 25 simulations:  28%|██▊       | 7/25 [00:02<00:06,  2.73it/s]

In [None]:
with sns.axes_style('darkgrid'):
    fig, ax = plt.subplots(figsize=(11, 7), nrows=1, ncols=1)

    res.plot(ax=ax)
    
    ax.legend(loc='lower right')
    ax.set_title('Average cumulative regret over {} runs for {:s} bandit'.format(N, mapping_name[bandit]))

    plt.tight_layout()
    plt.show()

### Unbounded arms (Gaussian)

In [None]:
bandit = 'G'  # 'B', 'G', 'LG', 'Exp', 'TG'
p = [[0.1, 0.3], [0.2, 0.25]]  # parameters for the arms distribution
T = 1000  # Horizon
N = 25  # Number of MC runs
step = 25  # If results are saved trajectories are stored for all rounds such that t%step=0

In [None]:
model = mapping[bandit](p)

r_1, _ = model.MC_regret('non_parametric_TS', N, T, {'upper_bound': 1.0}, step)
r_2, _ = model.MC_regret('non_parametric_TS', N, T, {'upper_bound': None}, step)

all_r = [r_1, r_2]
all_r.append(model.Cp * np.log(1 + np.arange(T)))

res = pd.DataFrame(all_r).T
res.columns = ['NP-TS', 'NP-TS (unknown bound)', 'lower bound']
res['lower bound'].iloc[0] = 0

In [None]:
with sns.axes_style('darkgrid'):
    fig, ax = plt.subplots(figsize=(11, 7), nrows=1, ncols=1)

    res.plot(ax=ax)
    
    ax.legend(loc='lower right')
    ax.set_title('Average cumulative regret over {} runs for {:s} bandit'.format(N, mapping_name[bandit]))

    plt.tight_layout()
    plt.show()

### Unbounded arms (Exponential)

In [None]:
bandit = 'Exp'  # 'B', 'G', 'LG', 'Exp', 'TG'
p = [1.0, 0.8]  # parameters for the arms distribution
T = 1000  # Horizon
N = 25  # Number of MC runs
step = 25  # If results are saved trajectories are stored for all rounds such that t%step=0

In [None]:
model = mapping[bandit](p)

r_1, _ = model.MC_regret('non_parametric_TS', N, T, {'upper_bound': 1.0}, step)
r_2, _ = model.MC_regret('non_parametric_TS', N, T, {'upper_bound': None}, step)

all_r = [r_1, r_2]
all_r.append(model.Cp * np.log(1 + np.arange(T)))

res = pd.DataFrame(all_r).T
res.columns = ['NP-TS', 'NP-TS (unknown bound)', 'lower bound']
res['lower bound'].iloc[0] = 0

In [None]:
with sns.axes_style('darkgrid'):
    fig, ax = plt.subplots(figsize=(11, 7), nrows=1, ncols=1)

    res.plot(ax=ax)
    
    ax.legend(loc='lower right')
    ax.set_title('Average cumulative regret over {} runs for {:s} bandit'.format(N, mapping_name[bandit]))

    plt.tight_layout()
    plt.show()

### Unbounded arms (Lognormal --> heavy tail)

In [None]:
bandit = 'LG'  # 'B', 'G', 'LG', 'Exp', 'TG'
p = [[0.1, 0.3], [0.2, 0.25]]  # parameters for the arms distribution
T = 1000  # Horizon
N = 25  # Number of MC runs
step = 25  # If results are saved trajectories are stored for all rounds such that t%step=0

In [None]:
model = mapping[bandit](p)

r_1, _ = model.MC_regret('non_parametric_TS', N, T, {'upper_bound': 1.0}, step)
r_2, _ = model.MC_regret('non_parametric_TS', N, T, {'upper_bound': None}, step)

all_r = [r_1, r_2]
all_r.append(model.Cp * np.log(1 + np.arange(T)))

res = pd.DataFrame(all_r).T
res.columns = ['NP-TS', 'NP-TS (unknown bound)', 'lower bound']
res['lower bound'].iloc[0] = 0

In [None]:
with sns.axes_style('darkgrid'):
    fig, ax = plt.subplots(figsize=(11, 7), nrows=1, ncols=1)

    res.plot(ax=ax)
    
    ax.legend(loc='lower right')
    ax.set_title('Average cumulative regret over {} runs for {:s} bandit'.format(N, mapping_name[bandit]))

    plt.tight_layout()
    plt.show()