# Sample Complexity

## Initialization

In [1]:
## Load libraries (Cython, Numpy and Pyplot)
%load_ext Cython
import numpy as np
import sys
import matplotlib.pyplot as plt
sys.path.append(sys.path[0] + "/..")

from src.bandits import TLMMAB
from src.strategies_arm_identification import OSRLSC, MTTrackAndStopD
from src.utils import RewardBernoulliDistribution
np.random.seed(1)

## Setup algorithms
Specify means and parameters

In [6]:
# Set up tasks (X=2, G=3, H=2). Optimal g is g0
means = np.array([
    [   # Task 1
        [0.5, 0.45],   #g0
        [0.35, 0.33],  #g1
        [0.1, 0.05]    #g2
    ],
    [   # Task 2
        [0.5, 0.45],   #g0
        [0.1, 0.05],   #g1
        [0.35, 0.33]   #g2
    ]
])

# Risk and number of simulations
risk = 0.1
nsims = 1

# Strategies set up
strategies = [(OSRLSC, "OSRL-SC", {"delta_g": risk}),
              (MTTrackAndStopD, "Track and Stop", {"delta": risk})]

## Simulations

In [7]:
# Data
data = {strategy[1]: []  for strategy in strategies}

for n in range(nsims):
    for strategy in strategies:
        bandit_model = TLMMAB(means=means,
                              optimal_g=0,
                              strategy=strategy[0],
                              reward=RewardBernoulliDistribution,
                              offline=True,
                              **strategy[2])
        result = bandit_model.simulate()
        data[strategy[1]].append(result)

## Results

In [16]:
for name in data.keys():
    z = [x.T for x in data[name]]
    print('[{}]: Avg Time {} - Std  {} - Max {} - Min {}'.format(name, np.mean(z), np.std(z), np.max(z), np.min(z)))

[OSRL-SC]: Avg Time 3108.0 - Std  0.0 - Max 3108.0 - Min 3108.0
[Track and Stop]: Avg Time 7571.0 - Std  0.0 - Max 7571.0 - Min 7571.0
