In [1]:
import pandas as pd
import numpy as np

In [2]:
# Simulate experiment
p = [0.2, 0.5, 0.45, 0.3, 0.1, 0.45]
n = 1000

def sim_experiment(p, n):
    
    #flat priors
    a = 1 
    b = 1
    counts = np.zeros(len(p))
    
    results = []
    beta = []
    counts_rate = []

    for i in range(0, 6):
        #get number of successes for arm i
        k = np.random.binomial(n, p[i])
        counts[i] += 1
        #update the prior
        a = 1 + k
        b = 1 + n - k
        #save parameters of beta distribution
        beta.append((a, b))
        #calculate posterior mean and sd
        post_mean = a/(a+b)
        post_sd = (a*b/((a+b)**2*(a+b+1)))**0.5
        counts_rate.append(counts)
        results.append((i, p[i], k, post_mean, post_sd))

    return results, beta, counts_rate[0]

#run the experiment
results, beta, counts_rate = sim_experiment(p, n)
results

[(0, 0.2, 210, 0.21057884231536927, 0.012873934089766234),
 (1, 0.5, 511, 0.5109780439121756, 0.01578391859751511),
 (2, 0.45, 448, 0.4481037924151697, 0.015702454661330464),
 (3, 0.3, 289, 0.2894211576846307, 0.014319267679893881),
 (4, 0.1, 96, 0.0968063872255489, 0.00933666913018483),
 (5, 0.45, 466, 0.4660678642714571, 0.015751326841668942)]

In [3]:
counts_rate

array([1., 1., 1., 1., 1., 1.])

B

In [15]:
#get post probabilities that arm i is best
samples = np.array([np.random.beta(beta[i][0],beta[i][1], size=10000) for i in range(0, 6)])
# Count how many times each arm has the max sample
best_counts = (samples == samples.max(axis=0)).sum(axis=1)
posterior_best_probs = best_counts / 10000
pd.DataFrame(posterior_best_probs)

Unnamed: 0,0
0,0.0
1,0.9727
2,0.0025
3,0.0
4,0.0
5,0.0248


C

In [21]:
# Simulate experiment
def thompson_experiment(p, n):
    #flat priors
    a = np.ones(len(p))
    b = np.ones(len(p))
    counts = np.zeros(len(p))

    results_thompson = []
    betas_thompson = []
    counts_rate = []

    for _ in range(6000):
        #get number of successes for arm i
        k = np.argmax(np.random.beta(a, b))
        counts[k] += 1

        y = np.random.binomial(1, p[k])
        #update the prior
        a[k] += y
        b[k] += (1 - y)
        #save parameters of beta distribution

    post_means = a / (a + b)
    post_sds  = np.sqrt(a * b / ((a + b)**2 * (a + b + 1)))
    results_thompson.append((post_means, post_sds))
    counts_rate.append(counts)

    betas_thompson.append((a, b))

    return results_thompson, betas_thompson[0], counts_rate[0]

p = [0.2, 0.5, 0.45, 0.3, 0.1, 0.45]
n = 1000

results_thompson, betas_thompson, counts_rate = thompson_experiment(p, n)
results_thompson

[(array([0.20833333, 0.50023496, 0.45258621, 0.34883721, 0.20689655,
         0.46313964]),
  array([0.08122329, 0.00766334, 0.02308246, 0.05109714, 0.07395728,
         0.01467857]))]

In [19]:
counts_rate

array([  31., 5146.,  338.,   38.,   11.,  436.])

In [22]:
#get post probabilities that arm i is best
samples = np.array([np.random.beta(betas_thompson[0][i],betas_thompson[1][i], size=10000) for i in range(0, 6)])
# Count how many times each arm has the max sample
best_counts = (samples == samples.max(axis=0)).sum(axis=1)
posterior_best_probs = best_counts / 10000

fractions = counts_rate / 6000
posterior_best_probs

array([6.000e-04, 9.597e-01, 2.390e-02, 2.400e-03, 9.000e-04, 1.250e-02])

In [23]:
fractions

array([0.00366667, 0.709     , 0.077     , 0.014     , 0.0045    ,
       0.19183333])

D

In [24]:
iterations = []
results_iter = []
rates = []
top_mean = []
regret_ran = []
for _ in range(100):
    results,betas, counts_rate = sim_experiment(p, n)
    # results_iter.append(np.argmax(results[0]))

    probs = np.sum(counts_rate*p/6)
    regret_ran.append(max(p) - probs)

    #to get highest posterior probility
    #get post probabilities that arm i is best
    samples = np.array([np.random.beta(betas[i][0],betas[i][1], size=10000) for i in range(0, 6)])
    
    # Count how many times each arm has the max sample
    best_counts = (samples == samples.max(axis=0)).sum(axis=1)

    iterations.append(np.argmax(best_counts) + 1)

    rate_iter = best_counts.max() / 100
    rates.append(rate_iter)

iterations_rand = np.array(iterations)
iterations_rand

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [25]:
#get dataframe of the fraction of times each arm appears in list
df = pd.DataFrame()
df['true_prob'] = p
# create column of the fraction of times each arm value is in list iterations
df['fraction'] = [list(iterations_rand).count(i)/100 for i in range(1,7)]
df


Unnamed: 0,true_prob,fraction
0,0.2,0.0
1,0.5,0.99
2,0.45,0.01
3,0.3,0.0
4,0.1,0.0
5,0.45,0.0


In [26]:
iterations = []
results_iter = []
rates = []
regret_thom = []
for _ in range(100):
    results_thompson,betas_thompson, counts_rate = thompson_experiment(p, n)
    
    probs = np.sum(counts_rate*p/6000)
    regret_thom.append(max(p) - probs)

    #get post probabilities that arm i is best
    samples = np.array([np.random.beta(betas_thompson[0][i],betas_thompson[1][i], size=(10000)) for i in range(6)])
    
    # Count how many times each arm has the max sample
    best_counts = (samples == samples.max(axis=0)).sum(axis=1)
    results_iter.append(best_counts)

    # iterations.append(best_counts.max())
    iterations.append(np.argmax(best_counts)+1)
    rate_iter = best_counts.max() / 10000
    rates.append(rate_iter)

iterations_thom = np.array(iterations)
iterations_thom

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [27]:
#get dataframe of the fraction of times each arm appears in list
df = pd.DataFrame()
df['true_prob'] = p
# create column of the fraction of times each arm value is in list iterations
df['fraction'] = [list(iterations_thom).count(i)/100 for i in range(1,7)]
df

Unnamed: 0,true_prob,fraction
0,0.2,0.0
1,0.5,1.0
2,0.45,0.0
3,0.3,0.0
4,0.1,0.0
5,0.45,0.0


E

In [28]:
np.mean(regret_ran)

np.float64(0.16666666666666674)

In [29]:
np.mean(regret_thom)

np.float64(0.012434750000000001)