from __future__ import division
import time
import numpy as np
class Bandit(object):
def generate_reward(self, i):
raise NotImplementedError
class BernoulliBandit(Bandit):
def __init__(self, n, probas=None):
assert probas is None or len(probas) == n
self.n = n
if probas is None:
self.probas = [np.random.random() for _ in range(self.n)]
self.probas = probas
self.best_proba = max(self.probas)
def generate_reward(self, i):
# The player selected the i-th machine.
if np.random.random() < self.probas[i]:
return 1
return 0
