In [61]:
import numpy as np


class BanditArm:

    def __init__(self, p):
        self.p = p                          # true win probability
        self.p_estimate = 0.                # estimate win probability
        self.N = 0.                         # total collected

    def pull(self):
        return np.random.random() < self.p  # success if random number (0, 1) < p

    def update(self, x):
        self.N += 1.
        self.p_estimate = ((self.N - 1) * self.p_estimate + x) / self.N

In [62]:
num_trials = 1000

performance_avg = 0

total_performance_eps = []

EPSILON = 0.1

bandit_probabilities = [0.25, 0.5, 0.75]

bandits = [BanditArm(p) for p in bandit_probabilities]

for _ in range(100):
    
    rewards = np.zeros(num_trials)

    for i in range(num_trials):

        if np.random.random() < EPSILON:

            j = np.random.randint(len(bandits))

        else:

            j = np.argmax([b.p_estimate for b in bandits])

        x = bandits[j].pull()

        rewards[i] = x

        bandits[j].update(x)
        
    performance = np.cumsum(rewards) / (np.arange(num_trials) + 1)
    
    total_performance_eps.append(performance[-1])
        
    if np.sum(performance) == 0:
        
        performance_avg = performance
        
    else:
        
        performance_avg = np.mean([performance_avg, performance], axis=0)

In [36]:
performance_avg

array([0.61694028, 0.74497687, 0.62164853, 0.52824787, 0.52248216,
       0.53841689, 0.60435038, 0.64490203, 0.67716176, 0.70939637,
       0.73510457, 0.75684729, 0.75429922, 0.76912856, 0.78399797,
       0.74168847, 0.75593517, 0.75207472, 0.72226115, 0.69376122,
       0.6964327 , 0.70880242, 0.70987143, 0.71674653, 0.7151683 ,
       0.72611104, 0.71757333, 0.7142317 , 0.71882924, 0.70788702,
       0.70318714, 0.68680203, 0.69244561, 0.68069084, 0.68263399,
       0.67319185, 0.6651323 , 0.67205879, 0.670827  , 0.67591907,
       0.67753224, 0.67095676, 0.67823985, 0.68451914, 0.68782906,
       0.69248533, 0.69268829, 0.68905033, 0.69283994, 0.68148065,
       0.68683931, 0.68804329, 0.69328427, 0.69857903, 0.70405911,
       0.70929771, 0.71432795, 0.71413479, 0.71367268, 0.71218429,
       0.71276298, 0.71229151, 0.71485865, 0.71328268, 0.71681857,
       0.71683947, 0.71351422, 0.71391939, 0.71442341, 0.70979585,
       0.70650431, 0.71054327, 0.70303368, 0.70188663, 0.70085

In [37]:
import plotly.graph_objs as go

In [38]:
data = list()

data.append(go.Scatter(
            y=performance_avg,
        ))

figure = go.Figure({'data': data,
              'layout': {'legend': {'x': 1, 'y': 0.8},
                         'margin': {'t': 30},
                         'height': 385}})

figure.show()

In [59]:
class BanditArm:

    def __init__(self, p):
        self.p = p                              # true win probability
        self.a = 1                              # rewards
        self.b = 1                              # penalties
        self.N = 0                              # total collected

    def pull(self):
        # draw a 1 with probability p
        return np.random.random() < self.p      # success if random number (0, 1) < p

    def sample(self):
        return np.random.beta(self.a, self.b)   # sample probability estimate from beta distribution

    def update(self, x):
        self.a += x
        self.b += 1 - x
        self.N += 1

In [60]:
performance_avg = 0

total_performance_ts = []

bandits = [BanditArm(p) for p in bandit_probabilities]

for _ in range(100):
    
    rewards = np.zeros(num_trials)
    
    total_plays = 0

    for i in range(num_trials):

        j = np.argmax([b.sample() for b in bandits])

        x = bandits[j].pull()

        total_plays += 1

        rewards[i] = x

        bandits[j].update(x)
        
    performance = np.cumsum(rewards) / (np.arange(num_trials) + 1)
    
    total_performance_ts.append(performance[-1])
        
    if np.sum(performance) == 0:
        
        performance_avg = performance
        
    else:
        
        performance_avg = np.mean([performance_avg, performance], axis=0)

In [48]:
data = list()

data.append(go.Scatter(
            y=performance_avg,
        ))

figure = go.Figure({'data': data,
              'layout': {'legend': {'x': 1, 'y': 0.8},
                         'margin': {'t': 30},
                         'height': 385}})

figure.show()

In [86]:
import plotly.figure_factory as ff
import pandas as pd

df = pd.DataFrame(dict(eps=total_performance_eps, ts=total_performance_ts))
df = df.unstack().reset_index()
df.columns = ['algo', 'useless', 'perf']
del df['useless']

df

Unnamed: 0,algo,perf
0,eps,0.713
1,eps,0.725
2,eps,0.748
3,eps,0.720
4,eps,0.730
...,...,...
195,ts,0.750
196,ts,0.731
197,ts,0.755
198,ts,0.747


In [87]:
import plotly.express as px

In [88]:
fig = px.violin(df, y='algo', x='perf', orientation='h', color='algo', box=True)
fig.show()