In [36]:
!pip install kaggle-environments --upgrade -q

In [686]:
from kaggle_environments import make
from scipy.stats import norm

import os
if not os.path.exists('tmp'):
    os.mkdir('tmp')
    
base_path = '/Users/sergmiller/Documents/my/bandits'

In [300]:
def p_val(x):
    return 2 * min(norm.cdf(-x), norm.cdf(x))

In [318]:
class Agent:
    def __init__(self, text=None, file=None):
        self.text = text
        self.file = file
    
def init_agent(a : Agent):
    if a.file is None:
        a.file = "tmp/b_{}.py".format(np.random.random())
        assert a.text is not None
        write_to(a.file, a.text)

def write_to(f_name, text):
    with open(f_name, "w") as f:
        f.write(text)

def compare(t1 : Agent, t2 : Agent, T=10):
    init_agent(t1)
    init_agent(t2)
    res1 = np.zeros(T)
    res2 = np.zeros(T)
    for i in range(T):
        env = make("mab", debug=True)
        res = env.run([t1.file, t2.file])
        res1[i] = res[-1][0]['reward']
        res2[i] = res[-1][1]['reward']
    delta = res1 - res2
    mu_z = np.mean(delta)
    sigma = np.std(delta)
    z = mu_z / sigma * T ** 0.5
    p = p_val(z)
    return (p, mu_z, sigma, res1, res2)

In [688]:
with open(base_path + '/templates/gittins.py', 'r') as f: 
    gittins = f.read()
        
gittins_with_random = gittins.format("{}", "{}", "f += np.random.random(f.shape) * 1e-12")
gittins_with_count_my = gittins_with_random.format("{}", "gittins -= self._decay * self._successes")
gittins_with_count_my_exp = gittins_with_random.format("{}", "gittins -= self._decay * self._successes")
gittins_with_count_rival = gittins_with_random.format("{}", "gittins *= (1 - self._decay * self._rival_moves)")
gittins_with_count_rival_mu = gittins_with_random.format("{}", "gittins -= self._decay * self._rival_moves * mu")
gittins_with_count_my_and_rival =  gittins_with_random.format("{}", 
    "gittins = (gittins - self._decay * self._successes) * (1 - self._decay * self._rival_moves)")
gittins_with_count_my_and_rival_mu =  gittins_with_random.format("{}", 
    "gittins -= (self._decay * self._successes + self._decay * self._rival_moves * mu)")
gittins_with_count_rival_drift = gittins_with_random.format("{}", "gittins += self._rival_drift * self._rival_moves")
gittins_with_my_and_count_rival_drift = gittins_with_random.format("{}", 
    "gittins += (self._rival_drift * self._rival_moves - self._decay * self._successes)")
# gittins_with_random_and_custom_params = gittins_with_random.format("beta,p,q=0.5, 0.5, 1", "{}")

In [343]:
compare(Agent(text=gittins_with_count_my), Agent(text=gittins_with_count_my_exp), T=10)[:3]

(0.606474205705885, 6.3, 38.67570296710843)

In [344]:
compare(Agent(text=gittins_with_count_my), Agent(text=gittins_with_count_my_and_rival), T=10)[:3]

(1.2283089875543891e-08, 48.0, 26.649577857819814)

In [347]:
compare(Agent(text=gittins), Agent(text=gittins_with_count_rival_mu), T=10)[:3]

(0.0018893865255212212, 41.7, 42.44066446228192)

In [349]:
compare(Agent(text=gittins), Agent(text=gittins_with_count_rival), T=10)[:3]

(1.1293516115470137e-05, 62.3, 44.86880876510986)

In [362]:
compare(Agent(text=gittins), Agent(text=gittins_with_count_rival_drift), T=10)[:3]

(0.26882824503842084, -11.0, 31.457908385650818)

In [366]:
compare(Agent(text=gittins), Agent(text=gittins_with_my_and_count_rival_drift), T=10)[:3]

(0.47533192912355393, -5.6, 24.80806320533709)

In [404]:
compare(Agent(text=gittins), Agent(text=gittins_with_count_my), T=10)[:3]

(0.00023052734356843025, -29.9, 25.672748197261626)

In [384]:
def f(input):
    beta, p, q, eps, drift = input["beta"], input["p"], input["q"], input["eps"], input["drift"]
    gittins_custom = gittins_with_my_and_count_rival_drift.format(
        "beta,p,q,eps,rival_drift={},{},{},{},{}".format(beta, p, q, eps, drift))
    res = compare(Agent(text=gittins_with_random), Agent(text=gittins_custom), T=10)[:3]
    print(input, res)
    return res[1]

In [382]:
import hyperopt

In [383]:
search_space = {
  "beta": hyperopt.hp.uniform('beta', 0.05, 0.95),
  "p": hyperopt.hp.uniform('p', 0.05, 0.95),
  "q": hyperopt.hp.uniform('q', 0.05, 0.95),
  "eps": hyperopt.hp.uniform('eps', 0, 1e-1),
  "drift": hyperopt.hp.normal('drift', -1e-3, 1e-3),
}

trials = hyperopt.Trials()
best = hyperopt.fmin(
  fn=f,
  space=search_space,
  algo=hyperopt.tpe.suggest,
  max_evals=500,
  verbose=True,
  trials=trials)
print(best)


{'beta': 0.7760118084944925, 'drift': 0.0006101374043450849, 'eps': 0.09216854718415736, 'p': 0.2382199067710455, 'q': 0.7217980278681799}
(0.1782005533239749, -12.9, 30.299999999999997)        
{'beta': 0.8774414143800185, 'drift': 0.0002996220980536783, 'eps': 0.09368269799273073, 'p': 0.5683826438371754, 'q': 0.36799990709395164}
(0.24507897477827856, -9.1, 24.75661527753744)                        
{'beta': 0.06847566165010933, 'drift': -0.0035108516418922444, 'eps': 0.04848637129657493, 'p': 0.06634599968919988, 'q': 0.41298996779010394}
(0.9874736348171315, 0.2, 40.28349537962167)                          
{'beta': 0.16050128960150736, 'drift': -0.0012808303826059284, 'eps': 0.0325453842952085, 'p': 0.7137780515907629, 'q': 0.30161566882141283}
(0.0020578310743403594, -34.2, 35.09358915813542)                     
{'beta': 0.11750555170102675, 'drift': -0.0009281112912669579, 'eps': 0.04141655206174636, 'p': 0.6376868222428131, 'q': 0.9444175581698229}
(0.00012320960339458486, -3

KeyboardInterrupt: 

In [386]:
bb = {
    'beta': 0.3648259880510159,
    'drift': -0.0014398709738894131,
    'eps': 0.04635953447584462,
    'p': 0.31270631588673564,
    'q': 0.9205017213361296}

In [394]:
def init_best(d):
    return gittins_with_my_and_count_rival_drift.format(
        "beta,p,q,eps,rival_drift={},{},{},{},{}".format(d['beta'], d['p'], d['q'], d['eps'], d['drift']))

In [387]:
gittins_bb = init_best(bb)

In [395]:
bb_delta = {
    'beta': 0.4613730907562291,
    'drift': -0.0011519703027092387,
    'eps': 0.04165963371245352,
    'p': 0.5451383211748958,
    'q': 0.5821590019751394}
gittins_bb_delta = init_best(bb_delta)

In [689]:
def bench(a : Agent):
    known_agents = [
        ('gittins', Agent(text=gittins)),
        ('gittins_with_random', Agent(text=gittins_with_random)),
        ('gittins_with_count_my', Agent(text=gittins_with_count_my)),
        ('gittins_with_count_rival_drift', Agent(text=gittins_with_count_rival_drift)),
        ('gittins_with_my_and_count_rival_drift', Agent(text=gittins_with_my_and_count_rival_drift)),
        ('gittins_bb', Agent(text=gittins_bb)),
        ('gittins_bb_delta', Agent(text=gittins_bb_delta)),
        ('softmax_ucb', Agent(file=base_path + 'kernels/softmax_ucb.py')),
        ('multiarmed_bandit_agent', Agent(file=base_path + 'kernels/multiarmed_bandit_agent.py')),
        ('upper_confidence', Agent(file=base_path + 'kernels/upper_confidence.py')),
        ('ucb_decay', Agent(file=base_path + 'kernels/ucb_decay.py')),
        ('bayesian_ucb', Agent(file=base_path + 'kernels/bayesian_ucb.py')),
        ('thompson', Agent(file=base_path + 'kernels/thompson.py')),
        ('neural', Agent(text=neural)),
#         ('max_likelihood', Agent(file=path + 'kernels/max_likelihood.py')), works too long
        ('optimized_ucb', Agent(file=base_path + 'kernels/optimized_ucb.py')),
    ]
    
    res = []
    for k in known_agents:
        res.append([k[0], compare(a, k[1], T=10)[:3]])
        print(res[-1])
    return res

In [392]:
bench(Agent(text=gittins_bb))

[(2.5255240922780004e-06, 25.9, 17.403735231265728),
 (0.029991876133032967, 23.0, 33.51417610504546),
 (0.32992270880110197, 7.8, 25.317187837514656),
 (0.0001944034405806845, 31.5, 26.7329384841996),
 (0.5074584383928025, 7.0, 33.397604704529336)]

In [396]:
bench(Agent(text=gittins_bb_delta))

[(0.010729090227799871, 38.3, 47.47009584991376),
 (2.9591986278665966e-07, 27.7, 17.088300090997933),
 (0.30022686164525036, 5.2, 15.873247934811575),
 (0.0021999381701743137, 32.6, 33.66957083183568),
 (0.04388293234712658, 21.1, 33.110270309980855)]

In [398]:
a = Agent(text=gittins_bb_delta)
init_agent(a)
a.file

'tmp/b_0.13843056616831617.py'

In [400]:
compare(Agent(file='tmp/b_0.13843056616831617.py'), Agent(text=gittins))

(0.008097833972786038,
 28.2,
 33.67729205265768,
 array([554., 652., 556., 634., 625., 627., 597., 781., 625., 644.]),
 array([548., 621., 590., 589., 568., 596., 603., 686., 607., 605.]))

In [407]:
a = Agent(text=gittins_with_count_my)
init_agent(a)
a.file

'tmp/b_0.36080881811064225.py'

In [410]:
compare(Agent(file='tmp/b_0.8473257048600334.py'), Agent(text=gittins))

(0.07969100837095777,
 12.3,
 22.194819215303376,
 array([659., 653., 607., 647., 633., 586., 626., 608., 586., 592.]),
 array([609., 627., 614., 612., 600., 595., 638., 589., 581., 609.]))

In [411]:
compare(Agent(text=gittins_with_count_my), Agent(text=gittins))

(0.010155020119002756,
 20.6,
 25.342454498331453,
 array([546., 581., 608., 615., 599., 609., 591., 684., 651., 691.]),
 array([573., 547., 602., 575., 569., 600., 609., 636., 611., 647.]))

In [409]:
compare(Agent(file='tmp/b_0.36080881811064225.py'), Agent(text=gittins))

(3.4674259775474264e-05,
 26.5,
 20.239812252093643,
 array([586., 649., 630., 600., 649., 673., 563., 627., 616., 629.]),
 array([577., 637., 608., 561., 638., 634., 542., 550., 610., 600.]))

In [412]:
compare(Agent(file='tmp/b_0.36080881811064225.py'), Agent(text=gittins), T=20)

(0.006451355673915332,
 16.25,
 26.679345944006947,
 array([638., 570., 647., 660., 671., 626., 682., 592., 634., 663., 605.,
        651., 616., 628., 597., 620., 643., 582., 617., 582.]),
 array([621., 593., 610., 631., 683., 591., 621., 560., 669., 615., 598.,
        591., 591., 612., 587., 631., 622., 559., 602., 612.]))

In [415]:
compare(Agent(file='kernels/multiarmed_bandit_agent.py'), Agent(text=gittins), T=20)

(0.0007908909690796149,
 20.9,
 27.851211822827384,
 array([664., 690., 587., 618., 669., 526., 611., 661., 642., 615., 560.,
        618., 659., 626., 599., 680., 632., 641., 627., 654.]),
 array([640., 672., 558., 593., 614., 565., 618., 644., 612., 587., 577.,
        612., 637., 565., 614., 602., 634., 619., 569., 629.]))

In [414]:
compare(Agent(file='kernels/softmax_ucb.py'), Agent(text=gittins), T=20)

(0.7089664212117914,
 2.35,
 28.157192686771882,
 array([628., 627., 590., 582., 598., 568., 611., 657., 650., 585., 710.,
        543., 657., 676., 566., 659., 626., 700., 661., 612.]),
 array([620., 635., 585., 572., 609., 577., 613., 619., 593., 607., 666.,
        589., 663., 668., 583., 657., 632., 660., 722., 589.]))

In [416]:
compare(Agent(file='kernels/upper_confidence.py'), Agent(text=gittins), T=20)

(4.9853040265783087e-64,
 -123.45,
 32.679466029909364,
 array([520., 548., 546., 530., 515., 520., 627., 597., 551., 540., 537.,
        510., 563., 560., 525., 604., 500., 547., 589., 497.]),
 array([645., 683., 615., 677., 655., 640., 689., 735., 676., 649., 650.,
        711., 686., 701., 691., 673., 650., 642., 694., 633.]))

In [417]:
compare(Agent(file='kernels/multiarmed_bandit_agent.py'), Agent(text=gittins_with_random), T=20)

(1.4081626254579164e-05,
 33.05,
 34.03597361616089,
 array([663., 670., 649., 650., 682., 681., 578., 685., 663., 626., 629.,
        561., 641., 521., 643., 704., 668., 582., 641., 709.]),
 array([594., 596., 659., 602., 603., 621., 567., 676., 601., 583., 636.,
        535., 585., 576., 601., 685., 628., 593., 600., 644.]))

In [418]:
compare(Agent(file='kernels/softmax_ucb.py'), Agent(text=gittins_with_random), T=20)

(0.04821592751221526,
 14.3,
 32.37298256262466,
 array([583., 641., 584., 666., 636., 590., 639., 607., 633., 626., 618.,
        648., 584., 673., 666., 613., 590., 666., 579., 634.]),
 array([597., 584., 582., 659., 587., 619., 642., 604., 558., 620., 618.,
        589., 586., 661., 652., 615., 644., 617., 581., 575.]))

In [422]:
bench(Agent(text=gittins_bb_delta))

[(4.2854171320643786e-08, 54.1, 31.226431112120384),
 (2.0465870932658617e-24, 41.4, 12.839003076563227),
 (0.9525250069252739, 0.6, 31.86910729844813),
 (0.0009244556884093576, 37.0, 35.32138162643132),
 (0.9779150724415098, 0.2, 22.846443924602355),
 (0.13794316903592332, -11.3, 24.087548650703333),
 (0.23564468912492498, 12.4, 33.064180014027265),
 (1.1411492310898283e-22, 34.0, 10.972693379476162),
 (0.3037168800018688, -9.6, 29.516774891576485),
 (6.083455761527968e-52, 128.5, 26.796454989419775)]

In [428]:
import torch
from torch import nn

In [426]:
!pip install torch

Collecting torch
  Downloading torch-1.7.1-cp39-none-macosx_10_9_x86_64.whl (110.0 MB)
[K     |████████████████████████████████| 110.0 MB 5.3 MB/s eta 0:00:01    |████████████                    | 41.4 MB 3.2 MB/s eta 0:00:22
Collecting typing-extensions
  Downloading typing_extensions-3.7.4.3-py3-none-any.whl (22 kB)
Installing collected packages: typing-extensions, torch
Successfully installed torch-1.7.1 typing-extensions-3.7.4.3


In [690]:
with open(base_path + '/templates/neural.py', 'r') as f:
    neural = f.read()

In [572]:
# neural_with_lr = neural.format("lr = {}".format(1e-3))
# hidden = 16
compare(Agent(text=neural), Agent(text=gittins_with_random), T=20)

(0.04817399799390361,
 -33.05,
 74.80606593051127,
 array([628., 570., 670., 548., 514., 589., 615., 616., 643., 499., 603.,
        449., 652., 576., 573., 673., 551., 636., 523., 672.]),
 array([655., 605., 651., 742., 648., 574., 597., 574., 600., 572., 629.,
        661., 674., 571., 613., 661., 616., 582., 616., 620.]))

In [575]:
# hidden=32
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.3143794305537898,
 -16.1,
 50.60523688315272,
 array([619., 631., 592., 612., 513., 606., 672., 666., 640., 595.]),
 array([622., 631., 583., 654., 590., 722., 639., 644., 582., 640.]))

In [583]:
# hidden=64
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.0006475889972293117,
 -41.4,
 38.382808651790974,
 array([594., 522., 606., 585., 577., 531., 588., 593., 586., 619.]),
 array([613., 612., 616., 553., 629., 606., 628., 675., 661., 622.]))

In [581]:
# hidden=128
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.0004958468418106363,
 -31.1,
 28.236324123369883,
 array([538., 601., 656., 545., 593., 587., 570., 539., 598., 601.]),
 array([582., 643., 641., 586., 611., 639., 627., 522., 615., 673.]))

In [585]:
# hidden=64, without layernorm
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.10396468619078034,
 -23.9,
 46.483222779837455,
 array([537., 566., 619., 623., 615., 661., 625., 635., 615., 580.]),
 array([649., 657., 660., 650., 596., 612., 622., 671., 619., 579.]))

In [591]:
# hidden=64, without layernorm with gittins baseline
compare(Agent(text=neural), Agent(text=gittins_with_random), T=20)

(0.000383703897760926,
 29.3,
 36.9,
 array([660., 655., 614., 553., 627., 626., 698., 666., 653., 713., 666.,
        619., 628., 668., 608., 603., 611., 656., 618., 633.]),
 array([643., 592., 563., 573., 572., 569., 679., 588., 604., 643., 620.,
        583., 678., 635., 588., 564., 677., 598., 603., 617.]))

In [593]:
# hidden=128, without layernorm with gittins baseline
compare(Agent(text=neural), Agent(text=gittins_with_random), T=20)

(5.259240510589902e-06,
 27.1,
 26.6118394704312,
 array([625., 587., 626., 607., 604., 724., 592., 650., 647., 605., 643.,
        602., 625., 580., 660., 609., 646., 623., 712., 586.]),
 array([590., 536., 573., 572., 553., 647., 571., 666., 636., 589., 561.,
        557., 599., 553., 652., 625., 628., 616., 691., 596.]))

In [597]:
# hidden=128, without layernorm with gittins baseline, 3 layers
compare(Agent(text=neural), Agent(text=gittins_with_random), T=20)

(0.0009306281273881362,
 23.85,
 32.216882220351486,
 array([567., 668., 647., 606., 626., 607., 635., 653., 612., 548., 673.,
        728., 636., 562., 565., 603., 689., 553., 559., 578.]),
 array([567., 634., 618., 589., 610., 630., 583., 568., 611., 560., 630.,
        662., 610., 509., 629., 564., 656., 518., 524., 566.]))

In [598]:
# hidden=128, without layernorm with gittins baseline, 3 layers, sigmoid
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.006920246178702476,
 18.5,
 21.66217902243447,
 array([658., 629., 578., 635., 577., 628., 650., 667., 619., 643.]),
 array([665., 596., 560., 604., 595., 601., 593., 646., 589., 650.]))

In [600]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.004622009254888536,
 31.0,
 34.61213659975356,
 array([738., 562., 621., 636., 679., 619., 682., 658., 685., 588.]),
 array([653., 556., 624., 595., 597., 647., 673., 631., 630., 552.]))

In [612]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.009804144836376547,
 16.0,
 19.590814173994914,
 array([630., 694., 586., 618., 605., 634., 661., 689., 657., 666.]),
 array([589., 669., 615., 603., 607., 632., 639., 658., 635., 633.]))

In [608]:
# hidden=256, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.2261938354909282,
 13.1,
 34.22995763947131,
 array([588., 599., 711., 651., 601., 552., 591., 658., 577., 616.]),
 array([606., 576., 692., 605., 629., 574., 615., 615., 498., 603.]))

In [610]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves, lr=1e-2
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(8.173946298618923e-06,
 -149.1,
 105.7028381832768,
 array([665., 473., 439., 643., 449., 555., 651., 499., 448., 547.]),
 array([686., 705., 647., 582., 701., 694., 726., 686., 749., 684.]))

In [616]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves(my + rival)
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.07436803382292292,
 10.3,
 18.254040648579696,
 array([558., 592., 669., 614., 709., 609., 622., 609., 628., 648.]),
 array([553., 575., 668., 610., 691., 588., 654., 600., 584., 632.]))

In [620]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves(my + rival) + rewards
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.0013064904372689834,
 31.0,
 30.495901363953813,
 array([634., 577., 663., 643., 631., 688., 656., 680., 645., 683.]),
 array([655., 567., 605., 584., 636., 665., 590., 647., 631., 610.]))

In [622]:
# hidden=128, without layernorm with gittins baseline, 3 layers, sigmoid, 5 last moves(my + rival) + rewards
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.02036659303085346,
 -23.9,
 32.58358482426389,
 array([652., 599., 618., 551., 609., 600., 642., 637., 570., 675.]),
 array([651., 661., 590., 620., 599., 611., 658., 651., 641., 710.]))

In [624]:
# hidden=128, without layernorm with gittins baseline, 3 layers, sigmoid, 5 last moves(my + rival) + rewards,lr = 1e-2
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.3570434791327334,
 -9.3,
 31.931332574761107,
 array([686., 651., 591., 652., 548., 634., 655., 680., 597., 562.]),
 array([683., 705., 610., 682., 566., 632., 684., 613., 581., 593.]))

In [626]:
# hidden=256, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves(my + rival) + rewards
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.006018923380864479,
 19.9,
 22.91047795223836,
 array([628., 630., 692., 607., 652., 637., 674., 696., 654., 622.]),
 array([600., 639., 661., 614., 623., 643., 678., 652., 617., 566.]))

In [628]:
# hidden=512, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves(my + rival) + rewards
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.7230291358409504,
 3.5,
 31.228992939254383,
 array([545., 609., 594., 651., 616., 564., 715., 640., 664., 637.]),
 array([527., 594., 595., 658., 639., 623., 693., 621., 599., 651.]))

In [630]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves(my + rival) + rewards -softsign
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.8955202509678022,
 -1.9,
 45.752486271239945,
 array([604., 631., 575., 604., 559., 605., 589., 662., 572., 650.]),
 array([688., 588., 577., 619., 594., 556., 581., 628., 639., 600.]))

In [633]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves(my + rival) + rewards, mse loss
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.4814618952066887,
 -7.0,
 31.445190411253673,
 array([581., 602., 568., 538., 566., 672., 619., 634., 595., 669.]),
 array([579., 575., 587., 567., 609., 641., 649., 666., 625., 616.]))

In [635]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves(my + rival) + rewards, mse loss,lr=1e-2
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(1.3027992858897352e-09,
 -100.8,
 52.539128275981135,
 array([513., 508., 629., 538., 609., 560., 569., 473., 527., 545.]),
 array([649., 571., 691., 557., 670., 654., 730., 678., 613., 666.]))

In [637]:
# hidden=128, without layernorm with gittins baseline, 2 layers, sigmoid, 5 last moves(my + rival) + rewards (again)
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.07408167061938936,
 15.6,
 27.619558287561368,
 array([670., 618., 603., 668., 594., 609., 595., 602., 718., 635.]),
 array([627., 627., 598., 674., 568., 647., 592., 567., 667., 589.]))

In [645]:
a = Agent(text=neural)
init_agent(a)
a.file

'tmp/b_0.4204600914015785.py'

In [649]:
res = bench(Agent(file='tmp/b_0.4204600914015785.py'))

(0.3972262309976057, 9.7, 36.232720019341635)
(0.03945976742325765, 16.7, 25.64390765854533)
(0.008419059869329705, -28.4, 34.08577415873079)
(0.4787347617112808, 6.9, 30.804058174208148)
(0.0009292589432535868, -24.4, 23.303218661807215)
(3.6377097512975197e-07, -40.5, 25.17637781731121)
(0.0001163659494051545, -23.6, 19.365949499056327)
(0.6908953278170049, 3.7, 29.42464953062313)
(0.04314162037527695, -23.3, 36.43363830308469)
(5.835892518606307e-30, 136.3, 37.90527667752868)


In [651]:
compare(Agent(file='/Users/sergmiller/Documents/my/bandits/kernels/ucb_decay.py'), Agent(text=gittins_with_random))

(0.15744713612524386,
 -13.4,
 29.973988723558296,
 array([633., 550., 509., 646., 593., 638., 633., 588., 630., 577.]),
 array([627., 579., 597., 651., 621., 661., 610., 594., 614., 577.]))

In [652]:
compare(Agent(file='/Users/sergmiller/Documents/my/bandits/kernels/bayesian_ucb.py'), Agent(text=gittins_with_random))

(1.5001900765283778e-05,
 52.3,
 38.207460004559316,
 array([735., 668., 624., 660., 659., 674., 607., 565., 650., 650.]),
 array([623., 600., 584., 579., 698., 631., 582., 498., 577., 597.]))

In [653]:
compare(Agent(file='/Users/sergmiller/Documents/my/bandits/kernels/thompson.py'), Agent(text=gittins_with_random))

(0.015175617949015455,
 25.4,
 33.07929866245655,
 array([653., 614., 607., 643., 636., 643., 664., 571., 643., 657.]),
 array([629., 590., 656., 585., 571., 578., 641., 580., 619., 628.]))

In [666]:
# hidden=128, without layernorm with gittins delta_bb baseline, 2 layers, sigmoid, 5 last moves(my + rival) + rewards (again)
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.0045348001696681964,
 34.9,
 38.88302971734584,
 array([670., 647., 588., 611., 739., 669., 662., 636., 677., 634.]),
 array([621., 594., 578., 629., 624., 636., 657., 609., 596., 640.]))

In [667]:
compare(Agent(text=neural), Agent(text=gittins_bb_delta), T=20)

(0.4150601933334106,
 -5.05,
 27.710061349625338,
 array([618., 552., 648., 582., 655., 640., 595., 628., 644., 623., 567.,
        666., 631., 608., 621., 630., 650., 630., 575., 590.]),
 array([647., 555., 613., 565., 618., 645., 622., 604., 649., 614., 622.,
        658., 621., 666., 658., 641., 688., 599., 566., 603.]))

In [672]:
# gittins delta_bb baseline, 1 layer, sigmoid, 5 last moves(my + rival) + rewards (again)
compare(Agent(text=neural), Agent(text=gittins_bb_delta), T=20)

(0.9650311781006329,
 0.3,
 30.602450882241445,
 array([644., 587., 580., 579., 567., 580., 693., 613., 615., 540., 507.,
        696., 671., 671., 694., 575., 622., 658., 578., 664.]),
 array([655., 636., 602., 598., 582., 602., 700., 561., 589., 539., 496.,
        654., 614., 683., 661., 600., 675., 627., 570., 684.]))

In [670]:
# gittins delta_bb baseline, 1 layer, sigmoid, 5 last moves(my + rival) + rewards (again)
compare(Agent(text=neural), Agent(text=gittins_with_random), T=10)

(0.0005995478898804283,
 -39.1,
 36.02901608426186,
 array([630., 603., 557., 527., 579., 562., 562., 607., 564., 537.]),
 array([615., 602., 606., 630., 654., 644., 581., 628., 603., 556.]))

In [673]:
a = Agent(text=neural)
init_agent(a)
a.file

'tmp/b_0.31264246571420906.py'

In [674]:
res = bench(Agent(file='tmp/b_0.31264246571420906.py'))

['gittins', (2.1475984606729273e-06, 56.2, 37.50146663798631)]
['gittins_with_random', (1.3715233030447107e-06, 34.4, 22.526428922490133)]
['gittins_with_count_my', (0.0003151042021039252, 19.1, 16.76573887426379)]
['gittins_with_count_rival_drift', (1.6838757781366005e-10, 47.1, 23.317161062187655)]
['gittins_with_my_and_count_rival_drift', (0.11963707274256954, 18.5, 37.590557324945316)]
['gittins_bb', (0.773553859300014, 3.6, 39.565641660410364)]
['gittins_bb_delta', (0.4212070972968093, 7.4, 29.093641917092473)]
['softmax_ucb', (0.002660131961789012, 20.9, 21.9974998579384)]
['multiarmed_bandit_agent', (0.04146487952384929, 11.8, 18.301912468373352)]
['upper_confidence', (7.38117376486062e-30, 127.4, 35.494224882366424)]
['ucb_decay', (1.3001477544135825e-06, 38.7, 25.28655769376291)]
['bayesian_ucb', (0.6258482932879363, -5.0, 32.428382630035685)]
['thompson', (0.7459054527070903, 1.8, 17.565876010037186)]


In [675]:
compare(Agent(text=neural), Agent(file='/Users/sergmiller/Documents/my/bandits/kernels/bayesian_ucb.py'), T=20)

(0.17912212685919382,
 -11.0,
 36.616935972306585,
 array([624., 691., 663., 656., 519., 600., 593., 612., 714., 582., 628.,
        616., 609., 618., 576., 573., 698., 630., 563., 672.]),
 array([619., 632., 700., 619., 571., 639., 617., 617., 667., 637., 653.,
        672., 611., 653., 545., 612., 690., 678., 597., 628.]))

In [676]:
compare(Agent(text=gittins_bb_delta), Agent(file='/Users/sergmiller/Documents/my/bandits/kernels/bayesian_ucb.py'), T=20)

(0.09443938833273353,
 -10.45,
 27.943648652242967,
 array([636., 618., 515., 603., 634., 565., 588., 663., 689., 616., 563.,
        610., 586., 644., 596., 596., 634., 586., 595., 618.]),
 array([678., 633., 524., 613., 667., 572., 589., 628., 637., 620., 563.,
        643., 576., 654., 584., 670., 634., 629., 637., 613.]))

In [684]:
# compare(Agent(text=gittins_bb_delta), Agent(file='/Users/sergmiller/Documents/my/bandits/kernels/max_likelihood.py'), T=10)

In [682]:
compare(Agent(text=gittins_bb_delta), Agent(file='/Users/sergmiller/Documents/my/bandits/kernels/optimized_ucb.py'), T=10)



(1.4805707294350223e-25,
 80.8,
 24.453220646777797,
 array([633., 628., 656., 625., 675., 702., 666., 719., 664., 670.]),
 array([568., 550., 553., 595., 570., 614., 572., 624., 617., 567.]))