In [1]:
import pandas as pd
import numpy as np

In [150]:
# Simulate experiment
p = [0.2, 0.5, 0.45, 0.3, 0.1, 0.45]
n = 1000

def sim_experiment(p, n):

    #flat priors
    a = 1 
    b = 1

    results = []
    beta = []
    for i in range(0, 6):
        #get number of successes for arm i
        k = np.random.binomial(n, p[i])
        #update the prior
        a = 1 + k
        b = 1 + n - k
        #save parameters of beta distribution
        beta.append((a, b))
        #calculate posterior mean and sd
        post_mean = a/(a+b)
        post_sd = (a*b/((a+b)**2*(a+b+1)))**0.5
        results.append((i, p[i], k, post_mean, post_sd))

    return results, beta

#run the experiment
results, beta = sim_experiment(p, n)
results

[(0, 0.2, 191, 0.19161676646706588, 0.012427247155351472),
 (1, 0.5, 530, 0.5299401197604791, 0.015759394458153053),
 (2, 0.45, 458, 0.45808383233532934, 0.01573214967156243),
 (3, 0.3, 284, 0.2844311377245509, 0.01424504481257016),
 (4, 0.1, 102, 0.1027944111776447, 0.009589153487896764),
 (5, 0.45, 445, 0.44510978043912175, 0.01569230126017336)]

B

In [151]:
#get post probabilities that arm i is best
samples = np.array([np.random.beta(beta[i][0],beta[i][1], size=100000) for i in range(0, 6)])
# Count how many times each arm has the max sample
best_counts = (samples == samples.max(axis=0)).sum(axis=1)
posterior_best_probs = best_counts / 100000
pd.DataFrame(posterior_best_probs)

Unnamed: 0,0
0,0.0
1,0.99913
2,0.00077
3,0.0
4,0.0
5,0.0001


C

In [152]:
# Simulate experiment
def thompson_experiment(p, n):
    #flat priors
    a = np.ones(len(p))
    b = np.ones(len(p))
    counts = np.zeros(len(p))

    results_thompson = []
    betas_thompson = []
    counts_rate = []

    for _ in range(6000):
        #get number of successes for arm i
        k = np.argmax(np.random.beta(a, b))
        counts[k] += 1

        y = np.random.binomial(1, p[k])
        #update the prior
        a[k] += y
        b[k] += (1 - y)
        #save parameters of beta distribution

    post_means = a / (a + b)
    post_sds  = np.sqrt(a * b / ((a + b)**2 * (a + b + 1)))
    results_thompson.append((post_means, post_sds))
    counts_rate.append(counts)
    betas_thompson.append((a, b))

    return results_thompson, betas_thompson[0], counts_rate[0]

p = [0.2, 0.5, 0.45, 0.3, 0.1, 0.45]
n = 1000

results_thompson, betas_thompson, counts_rate = thompson_experiment(p, n)
results_thompson

[(array([0.22222222, 0.48620772, 0.4529991 , 0.35869565, 0.21212121,
         0.43728223]),
  array([0.07856742, 0.00773992, 0.0148875 , 0.04973403, 0.07011038,
         0.02068675]))]

In [153]:
counts_rate

array([  25., 4167., 1115.,   90.,   31.,  572.])

In [154]:
#get post probabilities that arm i is best
samples = np.array([np.random.beta(betas_thompson[0][i],betas_thompson[1][i], size=100000) for i in range(0, 6)])
# Count how many times each arm has the max sample
best_counts = (samples == samples.max(axis=0)).sum(axis=1)
posterior_best_probs = best_counts / 100000

fractions = counts_rate / 6000
posterior_best_probs

array([1.620e-03, 9.540e-01, 2.420e-02, 6.990e-03, 4.200e-04, 1.277e-02])

In [155]:
fractions

array([0.00416667, 0.6945    , 0.18583333, 0.015     , 0.00516667,
       0.09533333])

D

In [156]:
iterations = []
results_iter = []
rates = []
top_mean = []
for _ in range(100):
    results,betas = sim_experiment(p, n)
    # results_iter.append(np.argmax(results[0]))

    #to get highest posterior probility
    #get post probabilities that arm i is best
    samples = np.array([np.random.beta(betas[i][0],betas[i][1], size=100) for i in range(0, 6)])
    
    # Count how many times each arm has the max sample
    best_counts = (samples == samples.max(axis=0)).sum(axis=1)
    iterations.append(np.argmax(best_counts) + 1)

    rate_iter = best_counts.max() / 100
    rates.append(rate_iter)

iterations_rand = np.array(iterations)
iterations_rand

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       6, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [157]:
#get dataframe of the fraction of times each arm appears in list
df = pd.DataFrame()
df['true_prob'] = p
# create column of the fraction of times each arm value is in list iterations
df['fraction'] = [list(iterations_rand).count(i)/100 for i in range(1,7)]
df


Unnamed: 0,true_prob,fraction
0,0.2,0.0
1,0.5,0.95
2,0.45,0.02
3,0.3,0.0
4,0.1,0.0
5,0.45,0.03


In [None]:
# samples = np.random.beta(
#     betas_thompson[0][np.newaxis, :],
#     betas_thompson[1][np.newaxis, :],
#     size=(100, 6)
# )
# best_idx = np.argmax(samples, axis=1)   # length-M array of ints in 0..K-1

# # 3) count and normalize
# counts = np.bincount(best_idx, minlength=6)
# posterior_best_prob = counts / 100
# results_iter.append(best_idx)

In [158]:
iterations = []
results_iter = []
rates = []
regret = []
for _ in range(100):
    results_thompson,betas_thompson, counts_rate = thompson_experiment(p, n)
    
    #get post probabilities that arm i is best
    samples = np.array([np.random.beta(betas_thompson[0][i],betas_thompson[1][i], size=(100)) for i in range(6)])
    # reg = np.array(max([p[i] - np.mean(samples[i]) for i in range(6)]))
    # regret.append(reg)
    
    # Count how many times each arm has the max sample
    best_counts = (samples == samples.max(axis=0)).sum(axis=1)
    results_iter.append(best_counts)

    probs = best_counts*p/100
    regret.append(np.mean(max(p) - probs[probs!=0]))

    # iteration_rand2 = iterations_rand - 1
    # p_values = np.array([p[i] for i in iteration_rand2])
    # max(p)-np.mean(p_values)

    # iterations.append(best_counts.max())
    iterations.append(np.argmax(best_counts)+1)
    rate_iter = best_counts.max() / 100
    rates.append(rate_iter)

iterations_thom = np.array(iterations)
iterations_thom

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [159]:
np.mean(regret)

np.float64(0.2758050833333333)

In [160]:
best_counts*p

array([ 0.  , 49.  ,  0.  ,  0.  ,  0.1 ,  0.45])

In [161]:
probs = best_counts*p/100
np.mean(max(p) - probs[probs!=0])


np.float64(0.3348333333333333)

In [162]:
#get dataframe of the fraction of times each arm appears in list
df = pd.DataFrame()
df['true_prob'] = p
# create column of the fraction of times each arm value is in list iterations
df['fraction'] = [list(iterations_thom).count(i)/100 for i in range(1,7)]
df

Unnamed: 0,true_prob,fraction
0,0.2,0.0
1,0.5,1.0
2,0.45,0.0
3,0.3,0.0
4,0.1,0.0
5,0.45,0.0


E

In [163]:
iteration_rand2 = iterations_rand - 1
p_values = np.array([p[i] for i in iteration_rand2])
max(p)-np.mean(p_values)


np.float64(0.0025000000000000022)

In [164]:
iterations_thom2 = iterations_thom - 1
p_values = np.array([p[i] for i in iterations_thom2])
max(p)-np.mean(p_values)


np.float64(0.0)