In [3]:
%%file default_settings.py
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

DEFAULT_SETTINGS = {
            # redis
            'expire': 3600 * 24 * 30,
            'redis': {
                'host': 'localhost',
                'port': 6379,
                'db': 0
            },
            # recommendation engine settings
            'recommendation_count': 10,
            'recommendation': {
                'update_interval_sec': 600,
                'search_depth': 100,
                'max_history': 1000,
            },
}

Writing default_settings.py


In [42]:
from redis import Redis
from default_settings import DEFAULT_SETTINGS

# redis key
PREFIX = 'MAB'
MAB_EXP_BASE = '%s:ALGO:ABEXPERIMENT:{}:{}' % PREFIX
HASH_FIELD_EXP_TAG="TAG"
class BanditStorage(object):
    """The main bandit redis storage.
    Sets up all the necessary tracking for the bandit experiments
    """
    _CACHE_MAB_EXP = {}  # class cache
    _CLI = None

    def __init__(self, settings=DEFAULT_SETTINGS):
        """Attach MAB logic to redis
        :param redis: An optional redis application
        """
        DEFAULT_SETTINGS.update(settings)
        self.settings = DEFAULT_SETTINGS
            
    @property
    def client(self):
        if BanditStorage._CLI is None:
            BanditStorage._CLI = Redis(host=self.settings.get('redis').get('host'),
                                    port=int(self.settings.get('redis').get('port')),
                                    db=int(self.settings.get('redis').get('db')), )
        return BanditStorage._CLI

    @classmethod
    def get_key_mab_exp_arms(cls, exp_id,arms="arms"):
        return MAB_EXP_BASE.format(str(exp_id),str(arms))
    @classmethod
    def get_key_mab_exp_pulls(cls, exp_id,pulls="pulls"):
        return MAB_EXP_BASE.format(str(exp_id),str(pulls))
    @classmethod
    def get_key_mab_exp_reward(cls, exp_id,reward="reward"):
        return MAB_EXP_BASE.format(str(exp_id),str(reward))
    @classmethod
    def get_key_mab_exp_values(cls, exp_id,values="values"):
        return MAB_EXP_BASE.format(str(exp_id),str(values))
    @classmethod
    def get_key_mab_exp_confidence(cls, exp_id,confidence="confidence"):
        return MAB_EXP_BASE.format(str(exp_id),str(confidence))
    @classmethod
    def get_key_mab_exp_mutil(cls, exp_id,tag="confidence"):
        return MAB_EXP_BASE.format(str(exp_id),str(tag))

    @property
    def expire(self):
        return self.settings.get('expire')

    def touch(self, key):
        self.client.expire(key, self.expire)

    def register_exp_tags(self, exp_id, tag,val):
        """
        register exp_id
        :param exp_id: str
        :param tag: str
        :rtype : None
        """
        key = BanditStorage.get_key_mab_exp_mutil(exp_id,tag)
        return self.client.hset(key, HASH_FIELD_EXP_TAG,val)#self.client.hset(key, HASH_FIELD_EXP_TAG, val)

    def register_exp(self, exp_id, tag,val):
        """
        register exp_id
        :param exp_id: str
        :param tag: str
        :rtype : None
        """
        key = BanditStorage.get_key_mab_exp_mutil(exp_id,tag)
        print key
        return self.client.sadd(key, val)
    def get_exp_tag(self, exp_id,tag):
        key = BanditStorage.get_key_mab_exp_mutil(exp_id,tag)
        
        return self.client.hget(key,HASH_FIELD_EXP_TAG)
    def remove_exp_tags(self, exp_id,tag,value):
        """
        Clear the contents of a set
        """
        key = BanditStorage.get_key_mab_exp_mutil(exp_id,tag)
        self.client.srem(key,value)
    def get_exp(self, exp_id,tag):
        key = BanditStorage.get_key_mab_exp_mutil(exp_id,tag)
        
        return self.client.smembers(key)
print BanditStorage().register_exp('testmab','arms','alt1')

MAB:ALGO:ABEXPERIMENT:testmab:arms
0


In [44]:
BanditStorage().remove_exp_tags('testmab','pulls',0)

In [32]:
if  BanditStorage().get_exp_tag('testmab','pulls') is not None:
    print 'f'
else:
    print 'p'

p


In [45]:
BanditStorage().register_exp_tags('testmab','pulls',0)

1L

In [48]:
int(BanditStorage().get_exp_tag('testmab','pulls'))+1

1

In [24]:
print BanditStorage().register_exp('testmab','arms','alt2')

MAB:ALGO:ABEXPERIMENT:testmab:arms
1


In [25]:
print BanditStorage().get_exp('testmab','arms')

set(['alt2', 'alt1'])


In [115]:
#%%file bandits.py
from random import random, choice, uniform, betavariate
from math import log, exp, expm1
from default_settings import DEFAULT_SETTINGS


class Bandit(object):
    """The primary bandit interface.  Don't use this unless you really
    want uniform random arm selection (which defeats the whole purpose, really)
    Used as a control to test against and as an interface to define methods against.
    """
    _r = None

    def __init__(self, settings,exp_id):
        DEFAULT_SETTINGS.update(settings)
        self.settings = DEFAULT_SETTINGS
        self.exp_id=exp_id

    @property
    def repository(self):
        if self._r is None:
            self._r = BanditStorage(self.settings)
        return self._r

    @classmethod
    def fromdict(cls, dict_spec):
        extra_args = dict([(key, value) for key, value in dict_spec.items() if key not in ["arms", "pulls", "reward", "values", "bandit_type", "confidence"]])

        bandit = globals()[dict_spec["bandit_type"]](**extra_args)
        bandit.arms = dict_spec["arms"]
        bandit.pulls = dict_spec["pulls"]
        bandit.reward = dict_spec["reward"]
        bandit.values = dict_spec["values"]
        bandit.confidence = dict_spec.get("confidence", [0.0] * len(bandit.arms))
        return bandit

    def __init__(self):
        self.arms = []
        self.pulls = []
        self.reward = []
        self.values = []
        self.confidence = []

    def add_arm(self, arm_id, value=None):
        self.repository.register_exp(self.exp_id,"arms",arm_id)
        self.repository.register_exp_tags(self.exp_id,"pulls",0)
        self.repository.register_exp_tags(self.exp_id,"reward",0.0)
        self.repository.register_exp_tags(self.exp_id,"confidence",0.0)
        self.repository.register_exp_tags(self.exp_id,"values",value)
        #self.arms.append(arm_id)
        #self.pulls.append(0)
        #self.reward.append(0.0)
        #self.confidence.append(0.0)
        #self.values.append(value)

    def pull_arm(self,arm_id):
        if  BanditStorage().get_exp_tag(self.exp_id,'pulls') is not None:
            pull_cnt = int(BanditStorage().get_exp_tag(exp_id,'pulls'))+1
            BanditStorage().register_exp_tags(self.exp_id,'pulls',pull_cnt)
        else:
            BanditStorage().register_exp_tags(self.exp_id,'pulls',0)
        #ind = self.arms.index(arm_id)
        #if ind > -1:
        #    self.pulls[ind] += 1

    def reward_arm(self,arm_id, reward):
        curr_reward=float(self.repository.get_exp_tag(self.exp_id,'reward'))
        curr_pulls=int(self.repository.get_exp_tag(self.exp_id,'pulls'))
        curr_confidence =float(self.repository.get_exp_tag(self.exp_id,'confidence')) 
        current=curr_confidence
        n = max(1, curr_pulls)
        new_confidence=((n - 1) / float(n)) * current + (1 / float(n)) * reward
        new_reward = curr_reward +reward
        #update reward
        self.repository.register_exp_tags(self.exp_id,'reward',new_reward)
        #update confidence
        self.repository.register_exp_tags(self.exp_id,'confidence',new_confidence)

        
        #ind = self.arms.index(arm_id)
        #if ind > -1:
        #   self.reward[ind] += reward
        #self._update(ind, reward)

    def _update(self, arm_index, reward):
        n = max(1, self.pulls[arm_index])
        current = self.confidence[arm_index]
        self.confidence[arm_index] = ((n - 1) / float(n)) * current + (1 / float(n)) * reward

    def suggest_arm(self):
        """Uniform random for default bandit.
        Just uses random.choice to choose between arms
        """
        arms=list(self.repository.get_exp(self.exp_id,'arms'))#self.arms
        return self[random.choice(arms)]

    def __getitem__(self, key):
        """
        ind = self.arms.index(key)
        if ind > -1:
            arm = {
                    "id":self.arms[ind],
                    "pulls":self.pulls[ind],
                    "reward":self.reward[ind],
                    "value":self.values[ind]
                    }        
        
        """
        if key:
            arm = {
                "id":key,
                "pulls":self.repository.get_exp_tag(self.exp_id,'pulls'),
                "reward":self.repository.get_exp_tag(self.exp_id,'reward'),
                "value":self.repository.get_exp_tag(self.exp_id,'values')
            }        
        
            return arm
        else:
            raise KeyError("Arm is not found in this bandit")

    def __str__(self):
        output = '%s  ' % self.__class__.__name__
        output += '; '.join(['%s:%s' % (key, val) for key, val in self.__dict__.items()])
        return output

class EpsilonGreedyBandit(Bandit):
    """Epsilon Greedy Bandit implementation.  Aggressively favors the present winner.
    Will assign winning arm 1.0 - epsilon of the time, uniform random between arms
    epsilon % of the time.
    Will "exploit" the present winner more often with lower values of epsilon, "explore"
    other candidates more often with higher values of epsilon.
    :param epsilon: The percentage of the time to "explore" other arms, E.G a value of
                    0.1 will perform random assignment for %10 of traffic
    :type epsilon: float
    """

    def __init__(self, epsilon=0.1):
        super(EpsilonGreedyBandit, self).__init__()
        self.epsilon = epsilon

    def suggest_arm(self):
        """Get an arm according to the EpsilonGreedy Strategy
        """
        random_determination = random()
        if random_determination > self.epsilon:
            key = self._ind_max()
        else:
            arms=list(self.repository.get_exp(self.exp_id,'arms'))
            key = choice(arms)

        return self[key]

    def _ind_max(self):
        arms=list(self.repository.get_exp(self.exp_id,'arms'))
        confidence=
        return self.arms[self.confidence.index(max(self.confidence))]

    def __str__(self):
        return Bandit.__str__(self)

    def __repr(self):
        return Bandit.__str__(self)

def all_same(items):
    return all(x == items[0] for x in items)

class NaiveStochasticBandit(Bandit):
    """A naive weighted random Bandit.  Favors the winner by giving it greater weight
    in random selection.
    Winner will eventually flatten out the losers if margin is great enough
    """

    def __init__(self):
        super(NaiveStochasticBandit, self).__init__()

    def _compute_weights(self):
        weights = []
        for ind, n in enumerate(self.pulls):
            reward = self.reward[ind]
            try:
                weights.append(1.0 * (float(reward)/float(n)))
            except ZeroDivisionError:
                weights.append(1.0/len(self.arms))
        return weights

    def suggest_arm(self):
        """Get an arm according to the Naive Stochastic Strategy
        """
        weights = self._compute_weights()
        random_determination = uniform(0.0, 1.0)

        cum_weight = 0.0
        for ind, weight in enumerate(weights):
            cum_weight += weight
            if cum_weight > random_determination:
                return self[self.arms[ind]]
        return self[self.arms[0]]


class SoftmaxBandit(NaiveStochasticBandit):

    def __init__(self, tau=0.1):
        super(SoftmaxBandit, self).__init__()
        self.tau = tau

    def _compute_weights(self):
        weights = []
        total_reward = sum([exp(x / self.tau) for x in self.confidence])
        for ind, n in enumerate(self.pulls):
            weights.append(exp(self.confidence[ind] / self.tau) / total_reward)
        return weights


class AnnealingSoftmaxBandit(SoftmaxBandit):

    def __init__(self):
        super(AnnealingSoftmaxBandit, self).__init__()
        self.tau = 1

    def _compute_weights(self):
        t = sum(self.pulls) + 1
        self.tau = 1 / log(t +  0.0000001)

        weights = []
        total_reward = sum([exp(x / self.tau) for x in self.confidence])
        for ind, n in enumerate(self.pulls):
            weights.append(exp(self.confidence[ind] / self.tau) / total_reward)
        return weights

class ThompsonBandit(NaiveStochasticBandit):

    def __init__(self, prior=(1.0,1.0)):
        super(ThompsonBandit, self).__init__()
        self.prior = prior

    def _compute_weights(self):
        sampled_theta = []
        for ind, n in enumerate(self.arms):
            dist = betavariate(self.prior[0] + self.reward[ind], self.prior[1]+self.pulls[ind]-self.reward[ind])
            sampled_theta += [dist]
        return sampled_theta

    def suggest_arm(self):
        weights = self._compute_weights()
        return self[self.arms[weights.index(max(weights))]]

    def reward_arm(self, arm_id, reward):
        if reward != 1.0:
            reward = 1.0
        super(ThompsonBandit, self).reward_arm(arm_id, reward)

SyntaxError: invalid syntax (<ipython-input-115-f06b716850df>, line 155)

In [315]:
#search_key = _key("{0}:{1}:{2}".format(stat_type, exp_key, stat_range))
import redis
RE=redis.StrictRedis(db=3)
keys = RE.smembers(_key("testmab"))

ResponseError: WRONGTYPE Operation against a key holding the wrong kind of value

In [355]:
%%file banditsRedis.py
from random import random, choice, uniform, betavariate
from math import log, exp, expm1
import json
import redis

DEFAULT_PREFIX="MAB_EXP"
def _key(k):
    return "{0}:{1}".format(DEFAULT_PREFIX, k)

class BanditEncoder(json.JSONEncoder):
    """Json serializer for Bandits"""
    def default(self, obj):
        if isinstance(obj, Bandit):
            dict_repr = obj.__dict__
            dict_repr['bandit_type'] = obj.__class__.__name__
            return dict_repr
        return json.JSONEncoder.default(self, obj)

class BanditDecoder(json.JSONDecoder):
    """Json Marshaller for Bandits"""
    def decode(self, obj):
        dict_repr = json.loads(obj)
        for key in dict_repr.keys():
            if 'bandit_type' not in dict_repr[key].keys():
                raise TypeError("Serialized object is not a valid bandit")
            dict_repr[key] = Bandit.fromdict(dict_repr[key])
        return dict_repr



class Bandit(object):
    """The primary bandit interface.  Don't use this unless you really
    want uniform random arm selection (which defeats the whole purpose, really)
    Used as a control to test against and as an interface to define methods against.
    """        
    @classmethod
    def fromdict(cls, dict_spec):
        extra_args = dict([(key, value) for key, value in dict_spec.items() if key not in ["arms", "pulls", "reward", "values", "bandit_type", "confidence"]])

        bandit = globals()[dict_spec["bandit_type"]](**extra_args)
        bandit.arms = dict_spec["arms"]
        bandit.pulls = dict_spec["pulls"]
        bandit.reward = dict_spec["reward"]
        bandit.values = dict_spec["values"]
        bandit.confidence = dict_spec.get("confidence", [0.0] * len(bandit.arms))
        return bandit

    def __init__(self,exp_name):
        self.redis=redis.StrictRedis(db=3)
        self.exp_name=exp_name
        #pipe = self.redis.pipeline()
        self.arms_key="p:{0}:{1}:mab".format(self.exp_name, "arms")
        self.pulls_key = "p:{0}:{1}:mab".format(self.exp_name, "pulls")
        self.reward_key = "p:{0}:{1}:mab".format(self.exp_name, "reward")
        self.confidence_key="p:{0}:{1}:mab".format(self.exp_name, "confidence")
        self.values_key = "p:{0}:{1}:mab".format(self.exp_name, "values")
        
        
        if self.redis.hget(self.arms_key,'mab_exp') is not None:
            self.arms= eval(self.redis.hget(self.arms_key,'mab_exp'))
        else:
            self.arms = []

        if  self.redis.hget(self.pulls_key,'mab_exp') is not None:
            self.pulls=eval(self.redis.hget(self.pulls_key,'mab_exp'))
        else:
            self.pulls = []
        if  self.redis.hget(self.reward_key,'mab_exp') is not None:
            self.reward=eval(self.redis.hget(self.reward_key,'mab_exp'))
        else:
            self.reward = []
        if  self.redis.hget(self.values_key,'mab_exp') is not None:
            self.values=eval(self.redis.hget(self.values_key,'mab_exp'))
        else:
            self.values = []
        
        if  self.redis.hget(self.confidence_key,'mab_exp') is not None:
            self.confidence=eval(self.redis.hget(self.confidence_key,'mab_exp'))
        else:
            self.confidence = []


        
    def BanditEncoder(self):
        dict_repr = self.__dict__
        dict_repr['bandit_type'] = self.__class__.__name__
        return dict_repr
    def BanditDecoder(exp_dict):
        dict_repr = Bandit.fromdict(exp_dict)
        return dict_repr
    def add_arm(self, arm_id, value=None):

        self.arms.append(arm_id)
        self.pulls.append(0)
        self.reward.append(0.0)
        self.confidence.append(0.0)
        self.values.append(value)
        
        self.redis.hset(self.confidence_key,'mab_exp',self.confidence)
        self.redis.hset(self.arms_key,'mab_exp',self.arms)
        self.redis.hset(self.pulls_key,'mab_exp',self.pulls)
        self.redis.hset(self.reward_key,'mab_exp',self.reward)
        self.redis.hset(self.values_key,'mab_exp',self.values)
        
    

    def pull_arm(self, arm_id):
        ind = self.arms.index(arm_id)
        if ind > -1:
            self.pulls[ind] += 1

        self.redis.hset(self.confidence_key,'mab_exp',self.confidence)
        self.redis.hset(self.arms_key,'mab_exp',self.arms)
        self.redis.hset(self.pulls_key,'mab_exp',self.pulls)
        self.redis.hset(self.reward_key,'mab_exp',self.reward)
        self.redis.hset(self.values_key,'mab_exp',self.values)
        
    def reward_arm(self, arm_id, reward):
        ind = self.arms.index(arm_id)
        if ind > -1:
            self.reward[ind] += reward
        self._update(ind, reward)
        
        self.redis.hset(self.arms_key,'mab_exp',self.arms)
        self.redis.hset(self.pulls_key,'mab_exp',self.pulls)
        self.redis.hset(self.reward_key,'mab_exp',self.reward)
        self.redis.hset(self.values_key,'mab_exp',self.values)
        


    def _update(self, arm_index, reward):
        n = max(1, self.pulls[arm_index])
        current = self.confidence[arm_index]
        self.confidence[arm_index] = ((n - 1) / float(n)) * current + (1 / float(n)) * reward
        self.redis.hset(self.confidence_key,'mab_exp',self.confidence)
        
    def suggest_arm(self):
        """Uniform random for default bandit.
        Just uses random.choice to choose between arms
        """
        return self[random.choice(self.arms)]
    def obj_by_name(self, obj_name, slim=False):
        key="p:{0}:{1}:mab".format(self.exp_name, obj_name)
        return self.redis.hget(key,'mab_exp')#objectified
    def __getitem__(self, key):
        ind = self.arms.index(key)
        if ind > -1:
            arm = {
                    "id":self.arms[ind],
                    "pulls":self.pulls[ind],
                    "reward":self.reward[ind],
                    "value":self.values[ind]
                    }
            return arm
        else:
            raise KeyError("Arm is not found in this bandit")

    def __str__(self):
        output = '%s  ' % self.__class__.__name__
        output += '; '.join(['%s:%s' % (key, val) for key, val in self.__dict__.items()])
        return output

class EpsilonGreedyBandit(Bandit):
    """Epsilon Greedy Bandit implementation.  Aggressively favors the present winner.
    Will assign winning arm 1.0 - epsilon of the time, uniform random between arms
    epsilon % of the time.
    Will "exploit" the present winner more often with lower values of epsilon, "explore"
    other candidates more often with higher values of epsilon.
    :param epsilon: The percentage of the time to "explore" other arms, E.G a value of
                    0.1 will perform random assignment for %10 of traffic
    :type epsilon: float
    """

    def __init__(self,exp_name, epsilon=0.1):
        super(EpsilonGreedyBandit, self).__init__(exp_name)
        self.epsilon = epsilon
        self.exp_name=exp_name

    def suggest_arm(self):
        """Get an arm according to the EpsilonGreedy Strategy
        """
        random_determination = random()
        if random_determination > self.epsilon:
            key = self._ind_max()
        else:
            key = choice(self.arms)

        return self[key]

    def _ind_max(self):
        return self.arms[self.confidence.index(max(self.confidence))]

    def __str__(self):
        return Bandit.__str__(self)

    def __repr(self):
        return Bandit.__str__(self)

def all_same(items):
    return all(x == items[0] for x in items)

class NaiveStochasticBandit(Bandit):
    """A naive weighted random Bandit.  Favors the winner by giving it greater weight
    in random selection.
    Winner will eventually flatten out the losers if margin is great enough
    """

    def __init__(self,exp_name):
        super(NaiveStochasticBandit, self).__init__(exp_name)
        self.exp_name=exp_name

    def _compute_weights(self):
        weights = []
        for ind, n in enumerate(self.pulls):
            reward = self.reward[ind]
            try:
                weights.append(1.0 * (float(reward)/float(n)))
            except ZeroDivisionError:
                weights.append(1.0/len(self.arms))
        return weights

    def suggest_arm(self):
        """Get an arm according to the Naive Stochastic Strategy
        """
        weights = self._compute_weights()
        random_determination = uniform(0.0, 1.0)

        cum_weight = 0.0
        for ind, weight in enumerate(weights):
            cum_weight += weight
            if cum_weight > random_determination:
                return self[self.arms[ind]]
        return self[self.arms[0]]


class SoftmaxBandit(NaiveStochasticBandit):

    def __init__(self, exp_name,tau=0.1):
        super(SoftmaxBandit, self).__init__(exp_name)
        self.tau = tau
        self.exp_name=exp_name

    def _compute_weights(self):
        weights = []
        total_reward = sum([exp(x / self.tau) for x in self.confidence])
        for ind, n in enumerate(self.pulls):
            weights.append(exp(self.confidence[ind] / self.tau) / total_reward)
        return weights


class AnnealingSoftmaxBandit(SoftmaxBandit):

    def __init__(self,exp_name):
        super(AnnealingSoftmaxBandit, self).__init__(exp_name)
        self.tau = 1
        self.exp_name=exp_name

    def _compute_weights(self):
        t = sum(self.pulls) + 1
        self.tau = 1 / log(t +  0.0000001)

        weights = []
        total_reward = sum([exp(x / self.tau) for x in self.confidence])
        for ind, n in enumerate(self.pulls):
            weights.append(exp(self.confidence[ind] / self.tau) / total_reward)
        return weights

class ThompsonBandit(NaiveStochasticBandit):

    def __init__(self,exp_name, prior=(1.0,1.0)):
        super(ThompsonBandit, self).__init__(exp_name)
        self.prior = prior
        self.exp_name=exp_name

    def _compute_weights(self):
        sampled_theta = []
        for ind, n in enumerate(self.arms):
            dist = betavariate(self.prior[0] + self.reward[ind], self.prior[1]+self.pulls[ind]-self.reward[ind])
            sampled_theta += [dist]
        return sampled_theta

    def suggest_arm(self):
        weights = self._compute_weights()
        return self[self.arms[weights.index(max(weights))]]

    def reward_arm(self, arm_id, reward):
        if reward != 1.0:
            reward = 1.0
        super(ThompsonBandit, self).reward_arm(arm_id, reward)
        


Overwriting banditsRedis.py


In [None]:
def makeBandit(bandit_type,**kwargs):
    bandit_cls = getattr(bandits, bandit_type)
    bandit = bandit_cls(**kwargs)
    return bandit
bandit_name = 'EpsilonGreedyBandit'
make_exp = makeBandit(bandit_name, epsilon=0.3)

In [268]:
REDIS=redis.StrictRedis(db=3)
pipe = REDIS.pipeline()
pipe.hget(_key('testmab'), 'mab_exp')

# reverse here and use lpush to keep consistent with using lrange
#for alternative in reversed(self.alternatives):
#    pipe.lpush("{0}:alternatives".format(self.key()), alternative.name)

pipe.execute()

["{'confidence': [1.0, 0.0, 0.0], 'exp_name': 'testmab', 'redis': StrictRedis<ConnectionPool<Connection<host=localhost,port=6379,db=3>>>, 'arms': ['green', 'red', 'blue'], 'bandit_type': 'EpsilonGreedyBandit', 'values': ['green', 'red', 'blue'], 'epsilon': 0.1, 'pulls': [1, 0, 0], 'reward': [1.0, 0.0, 0.0]}"]

In [345]:
t=color_bandit.suggest_arm()
color_bandit.pull_arm(t['id'])
re=color_bandit.reward_arm(t['id'],1.0)
print t['id']

REDIS=redis.StrictRedis(db=3)
#pipe = REDIS.pipeline()
#pipe.hget(_key('testmab'), 'mab_exp')

# reverse here and use lpush to keep consistent with using lrange
#for alternative in reversed(self.alternatives):
#    pipe.lpush("{0}:alternatives".format(self.key()), alternative.name)
print REDIS.hget("p:{0}:{1}:mab".format("leepand", "arms"),'mab_exp')
#pipe.execute()

blue
[1, 1, 0]


In [343]:
print REDIS.hget("p:{0}:{1}:mab".format("leepand3", "arms"),'mab_exp')


['green', 'red', 'blue', 'green333']


In [346]:
print REDIS.hget("p:{0}:{1}:mab".format("leepand3", "confidence"),'mab_exp')


[0.0, 0.0, 1.0, 0.0]


In [338]:
color_bandit2 = EpsilonGreedyBandit('leepand2',0.9)
color_bandit2.suggest_arm()

{'id': 0, 'pulls': 0, 'reward': 0.0, 'value': 'green'}

In [291]:
import json
REDIS=redis.StrictRedis(db=3)
pipe = REDIS.pipeline()
pipe.hget(_key('testmab'), 'mab_exp2')

xx=pipe.execute()
x=REDIS.hget(_key('testmab'), 'mab_exp')





In [295]:
type(eval(xx[0]))

list

In [287]:
from ast import literal_eval
my_dict = literal_eval(xx[0])

SyntaxError: invalid syntax (<unknown>, line 1)

In [322]:
t=color_bandit.suggest_arm()
color_bandit.pull_arm(t['id'])
re=color_bandit.reward_arm(t['id'],1.0)
print t['id']

REDIS=redis.StrictRedis(db=3)
pipe = REDIS.pipeline()
pipe.hget(_key('testmab'), 'mab_exp')

# reverse here and use lpush to keep consistent with using lrange
#for alternative in reversed(self.alternatives):
#    pipe.lpush("{0}:alternatives".format(self.key()), alternative.name)

pipe.execute()

green


["{'confidence': [1.0, 0.0, 0.0], 'exp_name': 'testmab', 'redis': StrictRedis<ConnectionPool<Connection<host=localhost,port=6379,db=3>>>, 'arms': ['green', 'red', 'blue'], 'bandit_type': 'EpsilonGreedyBandit', 'values': ['green', 'red', 'blue'], 'epsilon': 0.1, 'pulls': [2, 0, 0], 'reward': [2.0, 0.0, 0.0]}"]

In [340]:
color_bandit = EpsilonGreedyBandit('leepand3',0.9)
color_bandit.add_arm("green","green")
color_bandit.add_arm("red","red")
color_bandit.add_arm("blue","blue")
chosen_arms=[]

In [351]:
color_bandit = EpsilonGreedyBandit('leepand3',0.9)
color_bandit.obj_by_name('confidence')

'[0.0, 0.0, 1.0, 0.0]'

In [200]:
color_bandit.BanditEncoder()

{'arms': ['green', 'red', 'blue'],
 'bandit_type': 'EpsilonGreedyBandit',
 'confidence': [0.0, 0.0, 0.0],
 'epsilon': 0.1,
 'exp_name': 0.9,
 'pulls': [0, 0, 0],
 'redis': StrictRedis<ConnectionPool<Connection<host=localhost,port=6379,db=3>>>,
 'reward': [0.0, 0.0, 0.0],
 'values': ['green', 'red', 'blue']}

In [192]:
color_bandit.BanditEncoder()

{'arms': ['green', 'red', 'blue'],
 'bandit_type': 'EpsilonGreedyBandit',
 'confidence': [0.0, 0.0, 0.0],
 'epsilon': 0.1,
 'exp_name': 0.9,
 'pulls': [0, 0, 0],
 'reward': [0.0, 0.0, 0.0],
 'values': ['green', 'red', 'blue']}

In [193]:
color_bandit.BanditDecoder()

AttributeError: 'EpsilonGreedyBandit' object has no attribute 'items'

In [102]:
import bandits
import datetime
DEFAULT_PREFIX="MAB_EXP"
def _key(k):
    return "{0}:{1}".format(DEFAULT_PREFIX, k)

def makeBandit(bandit_type,**kwargs):
    bandit_cls = getattr(bandits, bandit_type)
    bandit = bandit_cls(**kwargs)
    return bandit

class Experiment(object):

    def __init__(self, name,
                 algorithm='EpsilonGreedyBandit',
                 epsilon=0.3,
                 tau=0.3,
                 redis=None):
        self.name=name
        self._algorithm =algorithm
        self.redis=redis
        self.make_exp = makeBandit(self._algorithm, epsilon=epsilon)
    def add_arm(self,arm_id,value=None):
        self.make_exp.add_arm(arm_id,value)
    def record_participation(self, exp_id, dt=None):
        """Record a user's participation in a test along with a given variation"""
        if dt is None:
            date = datetime.now()
        else:
            date = dt

        experiment_key = self.experiment.name

        pipe = self.redis.pipeline()

        pipe.sadd(_key("p:{0}:years".format(experiment_key)), date.strftime('%Y'))
        pipe.sadd(_key("p:{0}:months".format(experiment_key)), date.strftime('%Y-%m'))
        pipe.sadd(_key("p:{0}:days".format(experiment_key)), date.strftime('%Y-%m-%d'))

        pipe.execute()

        keys = [
            _key("p:{0}:_all:all".format(experiment_key)),
            _key("p:{0}:_all:{1}".format(experiment_key, date.strftime('%Y'))),
            _key("p:{0}:_all:{1}".format(experiment_key, date.strftime('%Y-%m'))),
            _key("p:{0}:_all:{1}".format(experiment_key, date.strftime('%Y-%m-%d'))),
            _key("p:{0}:{1}:all".format(experiment_key, self.name)),
            _key("p:{0}:{1}:{2}".format(experiment_key, self.name, date.strftime('%Y'))),
            _key("p:{0}:{1}:{2}".format(experiment_key, self.name, date.strftime('%Y-%m'))),
            _key("p:{0}:{1}:{2}".format(experiment_key, self.name, date.strftime('%Y-%m-%d'))),
        ]

    def particpate(self):
        return self.make_exp.suggest_arm()['id']
    def convert(self,arm_id):
        self.make_exp.pull_arm(arm_id)
        self.make_exp.reward_arm(arm_id)
        
bandit_name = 'EpsilonGreedyBandit'
make_exp = makeBandit(bandit_name, epsilon=0.3)

In [65]:
make_exp.add_arm("green","#00FF00")
make_exp.add_arm("red","#FF0000")
make_exp.add_arm("blue","#0000FF")

In [316]:
t=make_exp.suggest_arm()
make_exp.pull_arm(t['id'])
re=make_exp.reward_arm(t['id'],1.0)
print t['id']

IndexError: list index out of range

In [67]:
t

{'id': 'green', 'pulls': 0, 'reward': 0.0, 'value': '#00FF00'}

In [69]:
t2=make_exp.suggest_arm()
t2

{'id': 'green', 'pulls': 0, 'reward': 0.0, 'value': '#00FF00'}

In [70]:
t2['id']

'green'

In [116]:
make_exp.confidence

[]

In [81]:
make_exp.reward

[5.0, 1.0, 0.0, 0.0, 1.0]

In [84]:
dict_repr = make_exp.__dict__

In [85]:
dict_repr

{'arms': ['green', 'red', 'green', 'red', 'blue'],
 'confidence': [1.0, 1.0, 0.0, 0.0, 1.0],
 'epsilon': 0.3,
 'pulls': [5, 1, 0, 0, 1],
 'reward': [5.0, 1.0, 0.0, 0.0, 1.0],
 'values': ['#00FF00', '#FF0000', '#00FF00', '#FF0000', '#0000FF']}

In [86]:
dict_repr['bandit_type'] = make_exp.__class__.__name__

In [87]:
dict_repr

{'arms': ['green', 'red', 'green', 'red', 'blue'],
 'bandit_type': 'EpsilonGreedyBandit',
 'confidence': [1.0, 1.0, 0.0, 0.0, 1.0],
 'epsilon': 0.3,
 'pulls': [5, 1, 0, 0, 1],
 'reward': [5.0, 1.0, 0.0, 0.0, 1.0],
 'values': ['#00FF00', '#FF0000', '#00FF00', '#FF0000', '#0000FF']}

In [162]:
dict_repr2.confidence

[1.0, 1.0, 0.0, 0.0, 1.0]

In [167]:
dict_repr2.__dict__

{'arms': ['green', 'red', 'green', 'red', 'blue'],
 'confidence': [1.0, 1.0, 0.0, 0.0, 1.0],
 'epsilon': 0.3,
 'pulls': [9, 2, 0, 0, 2],
 'reward': [9.0, 2.0, 0.0, 0.0, 2.0],
 'values': ['#00FF00', '#FF0000', '#00FF00', '#FF0000', '#0000FF']}

In [159]:
dict_repr2.reward

[8.0, 2.0, 0.0, 0.0, 2.0]

In [109]:
dict_repr2={}
dict_repr2 = bandits.Bandit.fromdict(dict_repr)


In [164]:
t=dict_repr2.suggest_arm()
t

{'id': 'green', 'pulls': 8, 'reward': 8.0, 'value': '#00FF00'}

In [166]:
dict_repr2.pull_arm(t['id'])
re=dict_repr2.reward_arm(t['id'],1.0)

In [89]:
import redis
from datetime import datetime
REDIS=redis.StrictRedis(db=3)
pipe = REDIS.pipeline()
pipe.hset('self', 'algorithm','_algorithm')
pipe.hset('self', 'created_at', datetime.now().strftime("%Y-%m-%d %H:%M"))
pipe.hset('self', 'traffic_fraction', '_traffic_fraction')
# reverse here and use lpush to keep consistent with using lrange
#for alternative in reversed(self.alternatives):
#    pipe.lpush("{0}:alternatives".format(self.key()), alternative.name)

pipe.execute()

[1L, 1L, 1L]

In [99]:
REDIS.hget('self', 'created_at')

'2019-03-01 08:41'

In [95]:
DEFAULT_PREFIX = 'redis_prefix'
def _key(k):
    return "{0}:{1}".format(DEFAULT_PREFIX, k)


monotonic_zadd = REDIS.register_script("""
    local sequential_id = redis.call('zscore', KEYS[1], ARGV[1])
    if not sequential_id then
        sequential_id = redis.call('zcard', KEYS[1])
        redis.call('zadd', KEYS[1], sequential_id, ARGV[1])
    end
    return sequential_id
""")


def sequential_id(k, identifier):
    """Map an arbitrary string identifier to a set of sequential ids"""
    key = _key(k)
    return int(monotonic_zadd(keys=[key], args=[identifier]))

id_ = sequential_id("e:{0}:users".format('name'), '0012')

In [96]:
id_

0

In [97]:
REDIS.smembers("e:{0}:users".format('name'))

set()

In [98]:
_key('k')

'redis_prefix:k'

In [100]:
msetbit = REDIS.register_script("""
    for index, value in ipairs(KEYS) do
        redis.call('setbit', value, ARGV[(index - 1) * 2 + 1], ARGV[(index - 1) * 2 + 2])
    end
    return redis.status_reply('ok')
    """)