In [63]:
from pymc import rbeta
import scipy.stats as stats
import numpy as np



class Bandits(object):

    """
    This class represents N bandits machines.

    parameters:
        p_array: a (n,) Numpy array of probabilities >0, <1.

    methods:
        pull( i ): return the results, 0 or 1, of pulling 
                   the ith bandit.
    """

    def __init__(self, p_array):
        self.p = p_array
        self.optimal = np.argmax(p_array)

    def pull(self, i):
        # i is which arm to pull
        return np.random.rand() < self.p[i]

    def __len__(self):
        return len(self.p)


class BayesianStrategy(object):

    """
    Implements a online, learning strategy to solve
    the Multi-Armed Bandit problem.
    
    parameters:
        bandits: a Bandit class with .pull method
    
    methods:
        sample_bandits(n): sample and train on n pulls.

    attributes:
        N: the cumulative number of samples
        choices: the historical choices as a (N,) array
        bb_score: the historical score as a (N,) array
    """

    def __init__(self, bandits):

        self.bandits = bandits
        n_bandits = len(self.bandits)
        self.wins = np.zeros(n_bandits)
        self.trials = np.zeros(n_bandits)
        self.N = 0
        self.choices = []
        self.bb_score = []

    def sample_bandits(self, n=1):

        bb_score = np.zeros(n)
        choices = np.zeros(n)

        for k in range(n):
            # sample from the bandits's priors, and select the largest sample
            choice = np.argmax(rbeta(1 + self.wins, 1 + self.trials - self.wins))

            # sample the chosen bandit
            result = self.bandits.pull(choice)
            #print result

            # update priors and score
            self.wins[choice] += result
            self.trials[choice] += 1
            bb_score[k] = result
            self.N += 1
            choices[k] = choice

        self.bb_score = np.r_[self.bb_score, bb_score]
        self.choices = np.r_[self.choices, choices]
        return self.choices, self.wins, self.trials

In [64]:
hidden_prob = np.array([0.5, 0.5])
bandits = Bandits(hidden_prob)
bayesian_strat = BayesianStrategy(bandits)
bayesian_strat.sample_bandits(40)

(array([ 0.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
         1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  0.,  1.,
         1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  1.]),
 array([  3.,  17.]),
 array([  6.,  34.]))

In [9]:
draw_samples = [1, 1, 3, 10, 10, 25, 50, 100, 200, 600]

for j, i in enumerate(draw_samples):
    print (j,i)

(0, 1)
(1, 1)
(2, 3)
(3, 10)
(4, 10)
(5, 25)
(6, 50)
(7, 100)
(8, 200)
(9, 600)


In [39]:
wins = np.array([1,2])
trials= np.array([1,2])
print rbeta(1 + wins, 1 + trials - wins)
np.argmax(rbeta(1 + wins, 1 + trials - wins))

[ 0.96998417  0.97988798]


1

In [26]:
np.array([1,2])

array([1, 2])

In [32]:
beta = stats.beta

In [36]:
np.random.random(10)

array([ 0.09344673,  0.99404444,  0.62616652,  0.81179358,  0.90511153,
        0.517745  ,  0.18589692,  0.63750941,  0.52943421,  0.62672183])