In [1]:
import numpy as np

In [2]:
class BernoulliBandit:
    def __init__(self, p):
        self.p = p
    def display_ad(self):
        return np.random.binomial(n=1, p=self.p)

In [3]:
adA = BernoulliBandit(0.004)
adB = BernoulliBandit(0.016)
adC = BernoulliBandit(0.02)
adD = BernoulliBandit(0.028)
adE = BernoulliBandit(0.031)
ads = [adA, adB, adC, adD, adE]

## A/B/n testing

In [4]:
n_test = 10000

In [5]:
n_prod = 90000

In [6]:
n_ads = len(ads)

In [7]:
Q = np.zeros(n_ads)
N = np.zeros(n_ads)
total_reward = 0
avg_rewards = []

In [8]:
for i in range(n_test):
    ad_chosen = np.random.randint(n_ads)
    R = ads[ad_chosen].display_ad()
    N[ad_chosen] += 1
    Q[ad_chosen] += (1 / N[ad_chosen]) * (R - Q[ad_chosen])
    total_reward += R
    avg_reward_so_far = total_reward / (i + 1)
    avg_rewards.append(avg_reward_so_far)

In [9]:
best_ad_index = np.argmax(Q)

In [10]:
print("The best performing ad is {}".format(chr(ord('A') + best_ad_index)))

The best performing ad is E


In [11]:
ad_chosen = best_ad_index

In [12]:
for i in range(n_prod):
    R = ads[ad_chosen].display_ad()
    N[ad_chosen] += 1
    Q[ad_chosen] += (1 / N[ad_chosen]) * (R - Q[ad_chosen])
    total_reward += R
    avg_reward_so_far = total_reward / (i + 1)
    avg_rewards.append(avg_reward_so_far)

In [13]:
import pandas as pd

In [14]:
df_reward_comparison = pd.DataFrame(avg_rewards, columns=['A/B/n'])

In [15]:
import cufflinks as cf
import plotly.offline

In [16]:
cf.go_offline()

In [None]:
cf.set_config_file(world_readable=True, theme="white")
df_reward_comparison['A/B/n'].iplot(title="A/B/n Test Avg. Reward {:.4f}".format(avg_reward_so_far), xTitle='Impressions', yTitle='Avg. Reward')

In [34]:
eps = 0.2
n_prod = 100000

In [35]:
n_ads = len(ads)
Q = np.zeros(n_ads)
N = np.zeros(n_ads)
total_reward = 0
avg_rewards = []

In [36]:
ad_chosen = np.random.randint(n_ads)

In [37]:
for i in range(n_prod):
    R = ads[ad_chosen].display_ad()
    N[ad_chosen] += 1
    Q[ad_chosen] += (1 / N[ad_chosen]) * (R - Q[ad_chosen])
    total_reward += R
    avg_rewards_so_far = total_reward / (i + 1)
    avg_rewards.append(avg_rewards_so_far)

    if np.random.uniform() <= eps:
        ad_chosen = np.random.randint(n_ads)
    else:
        ad_chosen = np.argmax(Q)

df_reward_comparison['e-greedy: {}'.format(eps)] = avg_rewards

In [38]:
greedy_list = ['e-greedy: 0.01', 'e-greedy: 0.05', 'e-greedy: 0.1', 'e-greedy: 0.2']

In [None]:
df_reward_comparison[greedy_list].iplot(title="e-Greedy Actions", dash=['solid', 'dash', 'dashdot', 'dot'], xTitle='Impressions', yTitle='Avg Reward')

### UCB

In [44]:
c = 10
n_prod = 100000
n_ads = len(ads)
ad_indices = np.array(range(n_ads))
Q = np.zeros(n_ads)
N = np.zeros(n_ads)
total_reward = 0
avg_rewards = []

In [45]:
for t in range(1, n_prod + 1):
    if any(N==0):
        ad_chosen = np.random.choice(ad_indices[N==0])
    else:
        uncertainty = np.sqrt(np.log(t) / N)
        ad_chosen = np.argmax(Q + c * uncertainty)
    R = ads[ad_chosen].display_ad()
    N[ad_chosen] += 1
    Q[ad_chosen] += (1 / N[ad_chosen]) * (R - Q[ad_chosen])
    total_reward += R
    avg_reward_so_far = total_reward / t
    avg_rewards.append(avg_reward_so_far)
df_reward_comparison['UCB, c={}'.format(c)] = avg_rewards

In [49]:
ucb_list = ['UCB, c=0.1', 'UCB, c=1', 'UCB, c=10']

In [50]:
best_reward = df_reward_comparison.loc[t-1, ucb_list].max()

In [None]:
df_reward_comparison[ucb_list].iplot(title='Action Selection using UCB. Best avg. reward: {:.4f}'.format(best_reward),
                                    dash = ['solid', 'dash', 'dashdot'], xTitle='Impressions', yTitle='Avg. Reward')

## Thompson Sampling

In [None]:
n_prod = 100000
n_ads = len(ads)
alphas = np.ones(n_ads)
betas = np.ones(n_ads)
total_reward = 0
avg_rewards = []
for i in range(n_prod):
    theta_samples = [np.random.beta(alphas[k], betas[k]) for k in range(n_ads)]
    ad_chosen = np.argmax(theta_samples)
    R = ads[ad_chosen].display_ad()
    alphas[ad_chosen] += R
    betas[ad_chosen] += 1 - R
    total_reward += R
    avg_reward_so_far = total_reward / (i+1)
    avg_rewards.append(avg_reward_so_far)
df_reward_comparison['Thompson Sampling'] = avg_rewards

df_reward_comparison['Thompson Sampling'].iplot(title="Thompson Sampling Avg. Reward: {:.4f}".format(avg_reward_so_far), xTitle='Impressions', yTitle='Avg. Reward')