# UCB1 BanditArm class

In [None]:
import numpy as np

# new class: takes into account exponential weighted moving average
class BanditArm:

    def __init__(self, all_p, period, alpha):
        self.all_p = all_p
        self.p = self.all_p[0]
        self.p_estimate = 0.
        self.N = 0.
        
        self.rewards = []

        self.period = period
        self.alpha = alpha
        
        self.ewma = 0.

    def pull(self):
        x = np.random.random() < self.p
        self.rewards.append(x)
        if len(self.rewards) > 0 and len(self.rewards) % self.period == 0:
            if self.ewma == 0:
                self.ewma = sum(self.rewards)/self.period
            else:
                self.ewma = self.ewma * (1-self.alpha) + np.mean(self.rewards[-3:]) * self.alpha
        return x

    def update(self, x):
        self.N += 1.
        self.p_estimate = ((self.N - 1) * self.p_estimate + x) / self.N

    def update_p(self, cursor_p):
        self.p = self.all_p[cursor_p]

# Problem Conditions

In [None]:
num_trials = 3000

EPSILON = 0.1

periods = [3,4,5,6,7,8]

alphas = [0.3,0.4,0.5,0.6,0.7,0.8]

"""
a = [0.2, 0.8, 0.3]
b = [0.9, 0.7, 0.1]
c = [0.3, 0.4, 0.4]
""" 

a_p, a_rate = [0.25, 0.5, 0.75], 1
b_p, b_rate = [0.75, 0.25, 0.5], 1.5
c_p, c_rate = [0.5, 0.75, 0.25], 1.2

probabilities = [a_p, b_p, c_p]

rates = [a_rate, b_rate, c_rate]

random_expected_win_rate = np.mean([np.mean([p[_] for p in probabilities]) for _ in range(len(probabilities))])

max_expected_win_rate = np.mean([np.max([x[_] for x in probabilities]) for _ in range(len(probabilities))])

max_uplift = max_expected_win_rate / random_expected_win_rate

random_expected_win_rate, max_expected_win_rate, max_uplift