In [1]:
import pandas as pd
import numpy as np

In [2]:
# Simulate experiment
p = [0.2, 0.5, 0.45, 0.3, 0.1, 0.45]
n = 1000

def sim_experiment(p, n):

    #flat priors
    a = 1 
    b = 1

    results = []
    beta = []
    for i in range(0, 6):
        #get number of successes for arm i
        k = np.random.binomial(n, p[i])
        #update the prior
        a = 1 + k
        b = 1 + n - k
        #save parameters of beta distribution
        beta.append((a, b))
        #calculate posterior mean and sd
        post_mean = a/(a+b)
        post_sd = (a*b/((a+b)**2*(a+b+1)))**0.5
        results.append((i, p[i], k, post_mean, post_sd))

    return results, beta

#run the experiment
results, beta = sim_experiment(p, n)
results

[(0, 0.2, 189, 0.18962075848303392, 0.012377605224842433),
 (1, 0.5, 493, 0.4930139720558882, 0.015786183346825328),
 (2, 0.45, 451, 0.45109780439121755, 0.015712032693149178),
 (3, 0.3, 285, 0.28542914171656686, 0.014260059567858552),
 (4, 0.1, 104, 0.10479041916167664, 0.009671028942732168),
 (5, 0.45, 430, 0.4301397205588822, 0.01563286225263242)]

B

In [4]:
#get post probabilities that arm i is best
samples = np.array([np.random.beta(beta[i][0],beta[i][1], size=100000) for i in range(0, 6)])
# Count how many times each arm has the max sample
best_counts = (samples == samples.max(axis=0)).sum(axis=1)
posterior_best_probs = best_counts / 100000
pd.DataFrame(posterior_best_probs)

Unnamed: 0,0
0,0.0
1,0.96866
2,0.02926
3,0.0
4,0.0
5,0.00208


C

In [6]:
# Simulate experiment
def thompson_experiment(p, n):
    #flat priors
    a = np.ones(len(p))
    b = np.ones(len(p))
    counts = np.zeros(len(p))

    results_thompson = []
    betas_thompson = []
    counts_rate = []

    for _ in range(6000):
        #get number of successes for arm i
        k = np.argmax(np.random.beta(a, b))
        counts[k] += 1

        y = np.random.binomial(1, p[k])
        #update the prior
        a[k] += y
        b[k] += (1 - y)
        #save parameters of beta distribution

    post_means = a / (a + b)
    post_sds  = np.sqrt(a * b / ((a + b)**2 * (a + b + 1)))
    results_thompson.append((post_means, post_sds))
    counts_rate.append(counts)
    betas_thompson.append((a, b))

    return results_thompson, betas_thompson[0], counts_rate[0]

p = [0.2, 0.5, 0.45, 0.3, 0.1, 0.45]
n = 1000

results_thompson, betas_thompson, counts_rate = thompson_experiment(p, n)
results_thompson

[(array([0.07692308, 0.50147221, 0.42211055, 0.14285714, 0.07692308,
         0.4246988 ]),
  array([0.07121693, 0.00678216, 0.03492372, 0.07460471, 0.07121693,
         0.02708732]))]

In [7]:
counts_rate

array([  11., 5432.,  197.,   19.,   11.,  330.])

In [20]:
#get post probabilities that arm i is best
samples = np.array([np.random.beta(betas_thompson[0][i],betas_thompson[1][i], size=100000) for i in range(0, 6)])
# Count how many times each arm has the max sample
best_counts = (samples == samples.max(axis=0)).sum(axis=1)
posterior_best_probs = best_counts / 100000

fractions = counts_rate / 6000
posterior_best_probs

array([1.9000e-04, 9.8258e-01, 1.3590e-02, 2.3000e-04, 2.2000e-04,
       3.1900e-03])

In [9]:
fractions

array([0.00183333, 0.90533333, 0.03283333, 0.00316667, 0.00183333,
       0.055     ])

D

In [31]:
iterations = []
results_iter = []
rates = []
top_mean = []
for _ in range(100):
    results,betas = sim_experiment(p, n)
    
    #to get highest posterior probility
    #get post probabilities that arm i is best
    samples = np.array([np.random.beta(betas[i][0],betas[i][1], size=100) for i in range(0, 6)])
    
    # Count how many times each arm has the max sample
    best_counts = (samples == samples.max(axis=0)).sum(axis=1)
    results_iter.append(best_counts)
    iterations.append(np.argmax(best_counts) + 1)

    rate_iter = best_counts.max() / 100
    rates.append(rate_iter)

iterations = np.array(iterations)
iterations

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [59]:
#get dataframe of the fraction of times each arm appears in list
df = pd.DataFrame()
df['true_prob'] = p
# create column of the fraction of times each arm value is in list iterations
df['fraction'] = [list(iterations).count(i)/100 for i in range(1,7)]
df


Unnamed: 0,true_prob,fraction
0,0.2,0.0
1,0.5,0.99
2,0.45,0.0
3,0.3,0.0
4,0.1,0.0
5,0.45,0.01


In [None]:
# samples = np.random.beta(
#     betas_thompson[0][np.newaxis, :],
#     betas_thompson[1][np.newaxis, :],
#     size=(100, 6)
# )
# best_idx = np.argmax(samples, axis=1)   # length-M array of ints in 0..K-1

# # 3) count and normalize
# counts = np.bincount(best_idx, minlength=6)
# posterior_best_prob = counts / 100
# results_iter.append(best_idx)

In [60]:
iterations = []
results_iter = []
rates = []
for _ in range(100):
    results_thompson,betas_thompson, counts_rate = thompson_experiment(p, n)
    
    #get post probabilities that arm i is best
    samples = np.array([np.random.beta(betas_thompson[0][i],betas_thompson[1][i], size=(100)) for i in range(6)])
    
    # Count how many times each arm has the max sample
    best_counts = (samples == samples.max(axis=0)).sum(axis=1)
    results_iter.append(best_counts)

    # iterations.append(best_counts.max())
    iterations.append(np.argmax(best_counts)+1)
    rate_iter = best_counts.max() / 100
    rates.append(rate_iter)

iterations = np.array(iterations)
iterations

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 6, 2])

In [61]:
#get dataframe of the fraction of times each arm appears in list
df = pd.DataFrame()
df['true_prob'] = p
# create column of the fraction of times each arm value is in list iterations
df['fraction'] = [list(iterations).count(i)/100 for i in range(1,7)]
df

Unnamed: 0,true_prob,fraction
0,0.2,0.0
1,0.5,0.98
2,0.45,0.0
3,0.3,0.0
4,0.1,0.0
5,0.45,0.02
