In [5]:
#%%file bandits.py
from random import random, choice, uniform, betavariate
from math import log, exp, expm1

class Bandit(object):
    """The primary bandit interface.  Don't use this unless you really
    want uniform random arm selection (which defeats the whole purpose, really)
    Used as a control to test against and as an interface to define methods against.
    """

    @classmethod
    def fromdict(cls, dict_spec):
        extra_args = dict([(key, value) for key, value in dict_spec.items() if key not in ["arms", "pulls", "reward", "values", "bandit_type", "confidence"]])

        bandit = globals()[dict_spec["bandit_type"]](**extra_args)
        bandit.arms = dict_spec["arms"]
        bandit.pulls = dict_spec["pulls"]
        bandit.reward = dict_spec["reward"]
        bandit.values = dict_spec["values"]
        bandit.confidence = dict_spec.get("confidence", [0.0] * len(bandit.arms))
        return bandit

    def __init__(self):
        self.arms = []
        self.pulls = []
        self.reward = []
        self.values = []
        self.confidence = []

    def add_arm(self, arm_id, value=None):
        self.arms.append(arm_id)
        self.pulls.append(0)
        self.reward.append(0.0)
        self.confidence.append(0.0)
        self.values.append(value)

    def pull_arm(self, arm_id):
        ind = self.arms.index(arm_id)
        if ind > -1:
            self.pulls[ind] += 1

    def reward_arm(self, arm_id, reward):
        ind = self.arms.index(arm_id)
        if ind > -1:
            self.reward[ind] += reward
        self._update(ind, reward)

    def _update(self, arm_index, reward):
        n = max(1, self.pulls[arm_index])
        current = self.confidence[arm_index]
        self.confidence[arm_index] = ((n - 1) / float(n)) * current + (1 / float(n)) * reward

    def suggest_arm(self):
        """Uniform random for default bandit.
        Just uses random.choice to choose between arms
        """
        return self[random.choice(self.arms)]

    def __getitem__(self, key):
        ind = self.arms.index(key)
        if ind > -1:
            arm = {
                    "id":self.arms[ind],
                    "pulls":self.pulls[ind],
                    "reward":self.reward[ind],
                    "value":self.values[ind]
                    }
            return arm
        else:
            raise KeyError("Arm is not found in this bandit")

    def __str__(self):
        output = '%s  ' % self.__class__.__name__
        output += '; '.join(['%s:%s' % (key, val) for key, val in self.__dict__.items()])
        return output

class EpsilonGreedyBandit(Bandit):
    """Epsilon Greedy Bandit implementation.  Aggressively favors the present winner.
    Will assign winning arm 1.0 - epsilon of the time, uniform random between arms
    epsilon % of the time.
    Will "exploit" the present winner more often with lower values of epsilon, "explore"
    other candidates more often with higher values of epsilon.
    :param epsilon: The percentage of the time to "explore" other arms, E.G a value of
                    0.1 will perform random assignment for %10 of traffic
    :type epsilon: float
    """

    def __init__(self, epsilon=0.1):
        super(EpsilonGreedyBandit, self).__init__()
        self.epsilon = epsilon

    def suggest_arm(self):
        """Get an arm according to the EpsilonGreedy Strategy
        """
        random_determination = random()
        if random_determination > self.epsilon:
            key = self._ind_max()
        else:
            key = choice(self.arms)

        return self[key]

    def _ind_max(self):
        return self.arms[self.confidence.index(max(self.confidence))]

    def __str__(self):
        return Bandit.__str__(self)

    def __repr(self):
        return Bandit.__str__(self)

def all_same(items):
    return all(x == items[0] for x in items)

class NaiveStochasticBandit(Bandit):
    """A naive weighted random Bandit.  Favors the winner by giving it greater weight
    in random selection.
    Winner will eventually flatten out the losers if margin is great enough
    """

    def __init__(self):
        super(NaiveStochasticBandit, self).__init__()

    def _compute_weights(self):
        weights = []
        for ind, n in enumerate(self.pulls):
            reward = self.reward[ind]
            try:
                weights.append(1.0 * (float(reward)/float(n)))
            except ZeroDivisionError:
                weights.append(1.0/len(self.arms))
        return weights

    def suggest_arm(self):
        """Get an arm according to the Naive Stochastic Strategy
        """
        weights = self._compute_weights()
        random_determination = uniform(0.0, 1.0)

        cum_weight = 0.0
        for ind, weight in enumerate(weights):
            cum_weight += weight
            if cum_weight > random_determination:
                return self[self.arms[ind]]
        return self[self.arms[0]]


class SoftmaxBandit(NaiveStochasticBandit):

    def __init__(self, tau=0.1):
        super(SoftmaxBandit, self).__init__()
        self.tau = tau

    def _compute_weights(self):
        weights = []
        total_reward = sum([exp(x / self.tau) for x in self.confidence])
        for ind, n in enumerate(self.pulls):
            weights.append(exp(self.confidence[ind] / self.tau) / total_reward)
        return weights


class AnnealingSoftmaxBandit(SoftmaxBandit):

    def __init__(self):
        super(AnnealingSoftmaxBandit, self).__init__()
        self.tau = 1

    def _compute_weights(self):
        t = sum(self.pulls) + 1
        self.tau = 1 / log(t +  0.0000001)

        weights = []
        total_reward = sum([exp(x / self.tau) for x in self.confidence])
        for ind, n in enumerate(self.pulls):
            weights.append(exp(self.confidence[ind] / self.tau) / total_reward)
        return weights

class ThompsonBandit(NaiveStochasticBandit):

    def __init__(self, prior=(1.0,1.0)):
        super(ThompsonBandit, self).__init__()
        self.prior = prior

    def _compute_weights(self):
        sampled_theta = []
        for ind, n in enumerate(self.arms):
            dist = betavariate(self.prior[0] + self.reward[ind], self.prior[1]+self.pulls[ind]-self.reward[ind])
            sampled_theta += [dist]
        return sampled_theta

    def suggest_arm(self):
        weights = self._compute_weights()
        return self[self.arms[weights.index(max(weights))]]

    def reward_arm(self, arm_id, reward):
        if reward != 1.0:
            reward = 1.0
        super(ThompsonBandit, self).reward_arm(arm_id, reward)

In [33]:
arms=['df','d']
arm_id='d'
arms.index(arm_id)
yy=[1,2]
list(set(yy))

[1, 2]

In [2]:
class Mab(object):
    """State object for mab extension
    """
    def __init__(self, app):
        self.bandits = {}
        self.reward_endpts = []
        self.pull_endpts = []
        self.debug_headers = app.config.get('MAB_DEBUG_HEADERS', True)
        self.cookie_name = app.config.get('MAB_COOKIE_NAME', "MAB")
        self.bandit_storage = None

In [None]:
def choose_arm(bandit):
    """Route decorator for registering an impression conveinently
    :param bandit: The bandit/experiment to register for
    :type bandit: string
    """
    def decorator(func):
        #runs @ service init
        if not hasattr(func, 'bandits'):
            func.bandits = []
        func.bandits.append(bandit)

        @wraps(func)
        def wrapper(*args, **kwargs):
            #runs at endpoint hit
            add_args = []
            for bandit in func.bandits:
                #Fetch from request first here?
                arm_id, arm_value = suggest_arm_for(bandit)
                add_args.append((bandit, arm_value))
            kwargs.update(add_args)
            return func(*args, **kwargs)
        return wrapper
    return decorator
def reward_endpt(bandit, reward_val=1):
    """Route decorator for rewards.
    :param bandit: The bandit/experiment to register rewards
                   for using arm found in cookie.
    :type bandit: string
    :param reward: The amount of reward this endpoint should
                   give its winning arm
    :type reward: float
    """
    def decorator(func):
        if not hasattr(func, 'rewards'):
            func.rewards = []
        func.rewards.append((bandit, reward_val))

        @wraps(func)
        def wrapper(*args, **kwargs):
            for bandit, reward_amt in func.rewards:
                if bandit in request.bandits.keys():
                    request.bandits_reward.add((bandit, request.bandits[bandit], reward_amt))
            return func(*args, **kwargs)
        return wrapper
    return decorator
def suggest_arm_for(key):
    """Get an experimental outcome by id.  The primary way the implementor interfaces with their
    experiments.
    Suggests arms if not in cookie, using cookie val if present
    :param key: The bandit/experiment to get a suggested arm for
    :type key: string
    :param also_pull: Should we register a pull/impression at the same time as suggesting
    :raises KeyError: in case requested experiment does not exist
    """
    app = current_app
    try:
        #Try to get the selected bandits from cookie
        arm = app.extensions['mab'].bandits[key][request.bandits[key]]
        return arm["id"], arm["value"]
    except (AttributeError, TypeError, KeyError) as err:
        #Assign an arm for a new client
        try:
            arm = app.extensions['mab'].bandits[key].suggest_arm()
            request.bandits[key] = arm["id"]
            request.bandits_save = True
            return arm["id"], arm["value"]
        except KeyError:
            raise MABConfigException("Bandit %s not found" % key)

def add_bandit(app, name, bandit=None):
    """Attach a bandit for an experiment
    :param name: The name of the experiment, will be used for lookups
    :param bandit: The bandit to use for this experiment
    :type bandit: Bandit
    """
    saved_bandits = app.extensions['mab'].bandit_storage.load()
    if name in saved_bandits.keys():
        app.extensions['mab'].bandits[name] = saved_bandits[name]
    else:
        app.extensions['mab'].bandits[name] = bandit

class MABConfigException(Exception):
    """Raised when internal state in MAB setup is invalid"""
    pass

In [11]:
class MonteCarloTest(object):
    """Tests to ensure that over many iterations, a winner
    eventually converges"""
    def __init__(self):
        self.true_arm_probs = dict(green=0.2, red=0.2, blue=0.93)
    def draw(self, arm_name):
        if random.random() > self.true_arm_probs[arm_name]:
            return 0.0
        return 1.0

    def run_algo(self, bandit, num_sims, horizon):
        chosen_arms = [0.0 for i in range(num_sims * horizon)]
        rewards = [0.0 for i in range(num_sims * horizon)]
        cumulative_rewards = [0.0 for i in range(num_sims * horizon)]
        sim_nums = [0.0 for i in range(num_sims * horizon)]
        times = [0.0 for i in range(num_sims * horizon)]

        for sim in range(num_sims):
            sim = sim + 1

            for t in range(horizon):
                t = t + 1
                index = (sim - 1) * horizon + t - 1
                sim_nums[index] = sim
                times[index] = t

                chosen_arm = bandit.suggest_arm()
                chosen_arms[index] = chosen_arm['id']
                bandit.pull_arm(chosen_arm['id'])
                reward = self.draw(chosen_arm['id'])
                rewards[index] = reward

                if t == 1:
                    cumulative_rewards[index] = reward
                else:
                    cumulative_rewards[index] = cumulative_rewards[index - 1] + reward

                if reward:
                    bandit.reward_arm(chosen_arm['id'], reward)

        return [sim_nums, times, chosen_arms, rewards, cumulative_rewards]

    def save_results(self, results, output_stream):
        for sim in range(len(results[0])):
            output_stream.write("  ".join([str(results[j][sim]) for j in range(len(results))]) + "\n")
            sys.stdout.flush()
import flask_mab.bandits as bandits

def makeBandit(bandit_type,**kwargs):
    bandit_cls = getattr(bandits, bandit_type)
    bandit = bandit_cls(**kwargs)
    bandit.add_arm("green","#00FF00")
    bandit.add_arm("red","#FF0000")
    bandit.add_arm("blue","#0000FF")
    return bandit
bandit_name = 'EpsilonGreedyBandit'

true_arm_probs

{'blue': 0.93, 'green': 0.2, 'red': 0.2}

In [7]:
make_exp = makeBandit(bandit_name, epsilon=0.3)

In [8]:
make_exp

<flask_mab.bandits.EpsilonGreedyBandit at 0x1094bd810>

In [23]:
import random
import time
t1=time.time()
exp=MonteCarloTest()
results = exp.run_algo(makeBandit('EpsilonGreedyBandit', epsilon=0.3), 3000, 250)
print time.time()-t1

3.69192814827


In [30]:
len(results[4])#[100]  SoftmaxBandit,AnnealingSoftmaxBandit,ThompsonBandit

750000

In [22]:
t1=time.time()
exp=MonteCarloTest()
results = exp.run_algo(makeBandit('SoftmaxBandit', tau=0.3), 3000, 250)
print time.time()-t1


5.00602483749


In [18]:
t1=time.time()
exp=MonteCarloTest()
results = exp.run_algo(makeBandit('AnnealingSoftmaxBandit', tau=0.3), 3000, 250)
print time.time()-t1

5.53866410255


In [20]:
t1=time.time()
exp=MonteCarloTest()
results = exp.run_algo(makeBandit('ThompsonBandit'), 3000, 250)
print time.time()-t1

12.5467870235


In [15]:
def myhasattr(obj, name, _marker=object()):
    """Make sure we don't mask exceptions like hasattr().
    We don't want exceptions other than AttributeError to be masked,
    since that too often masks other programming errors.
    Three-argument getattr() doesn't mask those, so we use that to
    implement our own hasattr() replacement.
    """
    return getattr(obj, name, _marker) is not _marker
class OldStyle(object):
    bar = "bar"
    def __getattr__(self, name):
        if name == "error":
            raise ValueError("whee!")
        else:
            raise AttributeError(name)
myhasattr(OldStyle(), "error")

ValueError: whee!