In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from bandit import bandit
from thompson_sampling.run import test_thompson_sampler
import time


In [3]:
from numpy.random import random, seed

In [4]:
seed(187)

nturns = 100
samplers = 100
n_bandits = 4
winning_probs_list = [random(n_bandits) for _ in range(samplers)]
bandit_lists = [ [bandit(winning_prob) for winning_prob in winning_probs] for winning_probs in winning_probs_list]

In [5]:
expected_outcomes = []
best_expected_outcomes = []
start = time.time()
turn_list = []
for bandits, winning_probs in zip(bandit_lists,winning_probs_list):
    turns, wins = test_thompson_sampler(turns=nturns,bandits=bandits)
    avg_prob = sum([turns[a]*b for a,b in enumerate(winning_probs)])/sum(turns.values())
    expected_outcomes.append(avg_prob)
    best_expected_outcomes.append(max(winning_probs))
    turn_list.append(turn_list)
end = time.time()
time_taken = end - start

print(f"Thompson sampling time taken {time_taken}")

Thompson sampling time taken 176.6212980747223


In [6]:
best_expected_outcomes

[0.704413700943969,
 0.8106541406532113,
 0.7085187856067915,
 0.7340770274783581,
 0.9824805150669632,
 0.8842912737013646,
 0.9380451202958093,
 0.7812450644370248,
 0.378511337816244,
 0.9774486929163755,
 0.6451596527953556,
 0.8427746546880042,
 0.7531391384465752,
 0.5573972209323005,
 0.9392301781829648,
 0.9718385082901624,
 0.9330899409087482,
 0.30210418666082717,
 0.7387231329771144,
 0.5014896454152625,
 0.8403982714008605,
 0.898961685688072,
 0.9447895848047487,
 0.7871092752232495,
 0.6368332226042498,
 0.8769564636192226,
 0.8695777766759161,
 0.9619860043221045,
 0.8905692684431081,
 0.8410231307976425,
 0.5047245973945675,
 0.9905070179367266,
 0.8278007368730697,
 0.9469156279728933,
 0.9446870726556595,
 0.9391311932379519,
 0.9133926959949076,
 0.7883197096229988,
 0.3728777246314223,
 0.4149763741071708,
 0.9915408764180512,
 0.898637331350076,
 0.9676756684605198,
 0.522114392566741,
 0.8053117165423243,
 0.8115703531727302,
 0.9148037157746306,
 0.72381150896054

In [7]:
expected_outcomes

[0.7028686013508317,
 0.7907082232678703,
 0.7060469338052926,
 0.7108533592911692,
 0.9824805150669633,
 0.8755806124995367,
 0.9145943450378833,
 0.7759555099171149,
 0.3548042954634561,
 0.9389616852627796,
 0.6451596527953556,
 0.8145760959085159,
 0.4235970303996929,
 0.554321343715208,
 0.8227357864417337,
 0.5573620149130656,
 0.9330899409087482,
 0.24852762000765605,
 0.5824949785660575,
 0.4899959504602948,
 0.4116808964521364,
 0.7694841030900937,
 0.9227588961245613,
 0.476833862422088,
 0.5424143631250433,
 0.8624773855897793,
 0.4761697408309819,
 0.8583185256948778,
 0.888262704361638,
 0.83998890048811,
 0.48979086954266404,
 0.8268329489778103,
 0.6864558604938593,
 0.684508450900508,
 0.9402950033219827,
 0.9246226495409602,
 0.8686237432968986,
 0.7750485521421487,
 0.35264800393834506,
 0.40492991732840733,
 0.9451085825307425,
 0.898637331350076,
 0.9676756684605198,
 0.5097724758979765,
 0.50838251098338,
 0.7576543472017928,
 0.8317630939370616,
 0.723811508960545

In [8]:
frac_diffs = [ (e - a)/a for e,a in zip(expected_outcomes,best_expected_outcomes)]

In [9]:
frac_diffs

[-0.0021934547710624566,
 -0.024604719059683006,
 -0.0034887597219909173,
 -0.031636554908910534,
 1.1300203999969268e-16,
 -0.009850443469116085,
 -0.024999623952556702,
 -0.006770672559348055,
 -0.0626323176726003,
 -0.03937496457104438,
 0.0,
 -0.03345919175740687,
 -0.4375580702479437,
 -0.00551828588586746,
 -0.1240317809704552,
 -0.42648700359313846,
 0.0,
 -0.17734466789538939,
 -0.2114840424469243,
 -0.022919107223939326,
 -0.5101359552228669,
 -0.14403014573293538,
 -0.02331809011711355,
 -0.3941961079205393,
 -0.14826308698703305,
 -0.01651060073117864,
 -0.45241270694473357,
 -0.10776401960263386,
 -0.002589988407642284,
 -0.0012297287335623465,
 -0.02958787411787083,
 -0.165242715089346,
 -0.1707474638318465,
 -0.27711780154493026,
 -0.0046492319634798066,
 -0.01544889979318963,
 -0.04901391580457586,
 -0.016834740168042706,
 -0.05425296110963373,
 -0.02420970784271421,
 -0.04682842129014968,
 0.0,
 0.0,
 -0.02363833834974555,
 -0.3687133807438386,
 -0.0664341738952879,
 -0

In [10]:
expected_outcomes[14]

0.8227357864417337

In [11]:
best_expected_outcomes[14]

0.9392301781829648

In [12]:
winning_probs_list[14]

array([0.24241791, 0.49119313, 0.93923018, 0.48168181])

In [13]:
turn_list[14]

[[...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...],
 [...]]