Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Most basic MAB functionality with epsilon-greedy.
- Loading branch information
0 parents
commit fbd0cc0
Showing
5 changed files
with
335 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
Copyright (C) 2014 Roy Keyes | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in | ||
all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#slots | ||
###*A multi-armed bandit library for Python* | ||
|
||
Slots is intended to be a basic, very easy-to-use multi-armed bandit library for Python. | ||
|
||
See [slots-notes.md](https://github.com/roycoding/slots/blob/master/slots-notes.md) for design ideas. | ||
|
||
####Author | ||
[Roy Keyes](https://roycoding.github.io) -- roy.coding@gmail | ||
|
||
####License: BSD | ||
See [LICENSE.txt](https://github.com/roycoding/slots/blob/master/LICENSE.txt) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#Multi-armed bandit library notes | ||
|
||
### What does the library need to do? | ||
1. Set up N bandits with probabilities, p_i, and payouts, pay_i. | ||
2. Implement several MAB strategies, with kwargs as parameters, and consistent API. | ||
3. Allow for T trials. | ||
4. Continue with more trials (i.e. save state after trials). | ||
5. Values to save: | ||
1. Current choice | ||
2. number of trials completed for each arm | ||
3. scores for each arm | ||
4. average payout per arm (payout*wins/trials?) | ||
6. Use sane defaults. | ||
7. Be obvious and clean. | ||
|
||
###Library API ideas: | ||
Creating a MAB test instance: | ||
|
||
```Python | ||
# Default: 3 bandits with random p_i and pay_i = 1 | ||
mab = slots.MAB() | ||
|
||
# Set up 4 bandits with random p_i and pay_i | ||
mab = slots.MAB(4) | ||
|
||
# 4 bandits with specified p_i | ||
mab = slots.MAB(probs = [0.2,0.1,0.4,0.1]) | ||
|
||
# 3 bandits with specified pay_i | ||
mab = slots.MAB(payouts = [1,10,15]) | ||
|
||
# Bandits with payouts specified by arrays (i.e. payout data with unknown probabilities) | ||
# payouts is an N * T array, with N bandits and T trials | ||
mab = slots.MAB(live = True, payouts = [[0,0,0,0,1.2,0,0],[0,0.1,0,0,0.1,0.1,0]] | ||
``` | ||
|
||
Running tests with strategy, S | ||
|
||
```Python | ||
# Default: Epsilon-greedy, epsilon = 0.1, num_trials = 1000 | ||
mab.run() | ||
|
||
# Run chosen strategy with specified parameters and trials | ||
map.eps_greedy(eps = 0.2, trials = 10000) | ||
map.run(strategy = 'eps_greedy',params = {'eps':0.2}, trials = 10000) | ||
|
||
# Run strategy, updating old trial data | ||
map.run(continue = True) | ||
``` | ||
|
||
Displaying / retrieving bandit properties | ||
|
||
```Python | ||
# Default: display number of bandits, probabilities and payouts | ||
mab.bandits.info() | ||
|
||
# Display info for bandit i | ||
mab.bandits[i] | ||
|
||
# Retrieve bandits' payouts, probabilities, etc | ||
mab.bandits.payouts | ||
mab.bandits.probs | ||
|
||
# Retrieve count of bandits | ||
mab.bandits.count | ||
``` | ||
|
||
Setting bandit properties | ||
|
||
```Python | ||
# Reset bandits to defaults | ||
map.bandits.reset() | ||
|
||
# Set probabilities or payouts | ||
map.bandits.probs_set([0.1,0.05,0.2,0.15]) | ||
map.bandits.payouts_set([1,1.5,0.5,0.8]) | ||
``` | ||
|
||
Displaying / retrieving test info | ||
|
||
```Python | ||
# Retrieve current "best" bandit | ||
mab.best() | ||
|
||
# Retrieve bandit probability estimates | ||
map.prob_est() | ||
|
||
# Retrieve bandit probability estimate of bandit i | ||
map.prob_est(i) | ||
|
||
# Retrieve bandit payout estimates (p * payout) | ||
map.payout_est() | ||
|
||
# Retrieve current bandit choice | ||
map.current() | ||
|
||
# Retrieve sequence of choices | ||
map.choices | ||
|
||
# Retrieve probabilty estimate history | ||
map.prob_est_sequence | ||
|
||
# Retrieve test strategy info (current strategy) -- a dict | ||
map.strategy_info() | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
''' | ||
slots | ||
A Python library to perform simple multi-armed bandit analyses. | ||
Scenarios: | ||
- Run MAB test on simulated data (N bandits), default epsilon-greedy test. | ||
mab = slots.MAB(probs = [0.1,0.15,0.05]) | ||
mab.run(trials = 10000) | ||
mab.best # Bandit with highest probability after T trials | ||
- Run MAB test on "real" payout data (probabilites unknown). | ||
mab = slots.MAB(payouts = [0,0,0,1,0,0,0,0,0,....]) | ||
mab.run(trials = 10000) # Max is length of payouts | ||
''' | ||
|
||
|
||
import numpy as np | ||
|
||
class MAB(): | ||
''' | ||
Multi-armed bandit test class. | ||
''' | ||
|
||
def __init__(self, num_bandits=None,probs=False,payouts=False,live=False): | ||
''' | ||
Instantiate MAB class, determining | ||
- Number of bandits | ||
- Probabilities of bandit payouts | ||
- Bandit payouts | ||
Parameters (optional): | ||
- Number of bandits (used alone) - integer | ||
- Probabilities of bandit payouts - array of floats | ||
- Amount of bandit payouts | ||
- array of floats | ||
- If 'live' = True, a N*T array of floats indication payout | ||
amount per pull for N bandits and T trials | ||
- Boolean indicating if data is live | ||
''' | ||
|
||
default_num_bandits = 3 | ||
|
||
self.choices = [] | ||
|
||
if not probs: | ||
if not payouts: | ||
if not num_bandits: | ||
num_bandits = default_num_bandits | ||
self.bandits = Bandits(probs = [np.random.rand() for x in | ||
range(num_bandits)], | ||
payouts = np.ones(num_bandits)) | ||
else: | ||
if live: | ||
self.bandits = Bandits(live = True, payouts = payouts) | ||
else: | ||
# Not sure why anyone would do this | ||
self.bandits = Bandits(probs = [np.random.rand() for x in | ||
range(len(payouts))], | ||
payouts = payouts) | ||
else: | ||
if payouts: | ||
self.bandits = Bandits(probs = probs, payouts = payouts) | ||
else: | ||
self.bandits = Bandits(probs = probs, | ||
payouts = np.ones(len(payouts))) | ||
|
||
self.wins = np.zeros(num_bandits) | ||
self.pulls = np.zeros(num_bandits) | ||
|
||
def run(self, trials=100, strategy=None, parameters=None): | ||
''' | ||
Run MAB test with T trials. | ||
Paramters: | ||
trials (integer) - number of trials to run. | ||
strategy (string) - name of selected strategy. | ||
parameters (dict) - parameters for selected strategy. | ||
Currently on epsilon greedy is implemented. | ||
''' | ||
|
||
strategies = {'eps_greedy':self.eps_greedy} | ||
|
||
if trials < 1: | ||
raise Exception('MAB.run: Number of trials cannot be less than 1!') | ||
if not strategy: | ||
strategy = 'eps_greedy' | ||
else: | ||
if strategy not in strategies: | ||
raise Exception('MAB,run: Strategy name invalid. Choose from: ' | ||
+ ', '.join(strategies)) | ||
|
||
# Run strategy | ||
for n in xrange(trials): | ||
choice = strategies[strategy](params=parameters) | ||
self.choices.append(choice) | ||
self.pulls[choice] += 1 | ||
self.wins[choice] += self.bandits.pull(choice) | ||
# print 'DEBUG - run - choice:',choice | ||
# print 'DEBUG - run - choices:',self.choices | ||
# print 'DEBUG - run - pulls:',self.pulls | ||
# print 'DEBUG - run - wins:',self.wins | ||
|
||
def max_mean(self): | ||
""" | ||
Pick the bandit with the current best observed proportion of winning | ||
Input: self | ||
Output: None | ||
""" | ||
return np.argmax( self.wins / ( self.pulls +1 ) ) | ||
|
||
def eps_greedy(self,params): | ||
''' | ||
Run the epsilon-greedy MAB algorithm. | ||
Input: dict of parameters (epsilon) | ||
Output: None | ||
''' | ||
|
||
if params and type(params) == dict: | ||
eps = param | ||
else: | ||
eps = 0.1 | ||
|
||
r = np.random.rand() | ||
if r < eps: | ||
return np.random.choice(list(set(range(len(self.wins)))-{self.max_mean()})) | ||
else: | ||
return self.max_mean() | ||
|
||
def best(self): | ||
''' | ||
Return current 'best' choice of bandit. | ||
Input: self | ||
Output: integer | ||
''' | ||
|
||
if len(self.choices) < 1: | ||
print 'slots: No trials run so far.' | ||
return None | ||
else: | ||
return self.choices[-1] | ||
|
||
|
||
class Bandits(): | ||
''' | ||
Bandit class. | ||
''' | ||
|
||
def __init__(self, probs, payouts, live=False): | ||
''' | ||
Instantiate Bandit class, determining | ||
- Probabilities of bandit payouts | ||
- Bandit payouts | ||
Parameters: | ||
- Probabilities of bandit payouts - array of floats | ||
- Amount of bandit payouts | ||
- array of floats | ||
- If 'live' = True, a N*T array of floats indication payout | ||
amount per pull for N bandits and T trials | ||
- Boolean indicating if data is live | ||
''' | ||
|
||
if not live: | ||
# Only use arrays of equal length | ||
if len(probs) != len(payouts): | ||
raise Exception('Bandits.__init__: Probability and payouts arrays of different lengths!') | ||
self.probs = probs | ||
self.payouts = payouts | ||
self.live = False | ||
else: | ||
self.live = True | ||
self.probs = None | ||
self.payouts = payouts | ||
|
||
def pull(self,i): | ||
''' | ||
Return the payout from a single pull of the bandit i's arm. | ||
''' | ||
|
||
if self.live: | ||
if len(self.payouts[i]) > 0: | ||
return self.payouts[i].pop() | ||
else: | ||
return None | ||
else: | ||
if np.random.rand() < self.probs[i]: | ||
return self.payouts[i] | ||
else: | ||
return 0.0 | ||
|
||
def info(self): | ||
pass |