In [1]:
import numpy as np
import pandas as pd
import matplotlib
import dask
%matplotlib inline
matplotlib.style.use('ggplot')
%config InlineBackend.figure_format = 'retina'

In [2]:
import matplotlib.pyplot as plt

In [3]:
from bandits import BernoulliBandit
from solvers import Solver, EpsilonGreedy, UCB1, BayesianUCB, ThompsonSampling
from main import plot_results, experiment

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/opt/conda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/conda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/opt/conda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 478, in start
    self.io_loop.start()
  File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()

In [None]:
%time b,s=experiment(1000,100000)

In [None]:
plt.figure(figsize=(25,5))
plt.plot(s.regrets)

In [None]:
s.regret

In [None]:
plt.figure(figsize=(25,5))
plt.stem(s.counts)

In [4]:
def experiment(K, N, sol):                                                                                                               
    """                                                                                                                             
    Run a small experiment on solving a Bernoulli bandit with K slot machines,                                                      
    each with a randomly initialized reward probability.                                                                            
                                                                                                                                    
    Args:                                                                                                                           
        K (int): number of slot machiens.                                                                                           
        N (int): number of time steps to try.                                                                                       
    """                                                                                                                             

    b = BernoulliBandit(K)                                                                                                          
    #print('Randomly generated Bernoulli bandit has reward probabilities:\n', b.probas)                                             
    print('The best machine has index: {} and proba: {}'.format(max(range(K), key=lambda i: b.probas[i]), max(b.probas)))           
    
    if sol == 0:
        test_solvers = [ThompsonSampling(b,1,1)]
    elif sol == 1:
        test_solvers = [BayesianUCB(b,3,1,1)]
    elif sol == 2:
        test_solvers = [UCB1(b)]
    elif sol == 3:
        test_solvers = [EpsilonGreedy(b,0.01)]
    else:
        test_solvers = [EpsilonGreedy(b, 0.01),UCB1(b), BayesianUCB(b, 3, 1, 1),ThompsonSampling(b, 1, 1)]                                                                                                  
    
    names = [r'$\epsilon$' + '-Greedy','UCB1','Bayesian UCB','Thompson Sampling']                                                   
                                                                                                                                    
    for s in test_solvers:                                                                                                          
        s.run(N)                                                                                                                    
                                                                                                                                     
     #plot_results(test_solvers, names, "results_K{}_N{}.png".format(K, N))                                                          
    return b, s 

In [7]:
%time b,s=dask.compute(experiment(1000,100000,sol=1))

The best machine has index: 335 and proba: 0.9975161749784565


KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(25,5))
plt.plot(s.regrets)

In [None]:
plt.figure(figsize=(25,5))
plt.stem(s.counts)