In [2]:
!pip install plotly

Collecting plotly
  Downloading plotly-5.13.1-py2.py3-none-any.whl (15.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.2/15.2 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting tenacity>=6.2.0
  Downloading tenacity-8.2.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.13.1 tenacity-8.2.1
[0m

In [32]:
import numpy as np
import pandas as pd
from scipy.stats import beta, bernoulli
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import plotly.subplots
from plotly import tools
import random
import math

RANDOM_SEED = 329
np.random.seed(RANDOM_SEED)
#init_notebook_mode(connected=True)

In [34]:
def algorithm_performance():
    """
    Function that will show the performance of each algorithm we will be using in this tutorial.
    """

    ## calculate how many time each Ad has been choosen
    count_series = pd.Series(index_list).value_counts(normalize=True)
    print('Ad #0 has been shown', count_series[0]*100, '% of the time.')
    print('Ad #1 has been shown', count_series[1]*100, '% of the time.')

    print('Total Reward (Number of Clicks):', total_reward) ## print total Reward

    x = np.arange (0, n, 1)

    ## plot the calculated award for Ad #0
    data1 = go.Scatter(x=x,
                       y=award[0],
                       name='Calculated award #0',
                       line=dict(color=('rgba(10, 108, 94, .7)'),
                                 width=2))

    ## plot the line with actual award for Ad #0
    data2 = go.Scatter(x=[0, n],
                       y=[true_award[0]] * 2,
                       name='Actual award #0 value',
                       line = dict(color = ('rgb(205, 12, 24)'),
                                   width = 1,
                                   dash = 'dash'))

    ## plot the calculated award for Ad #1
    data3 = go.Scatter(x=x,
                       y=award[1],
                       name='Calculated award #1',
                       line=dict(color=('rgba(187, 121, 24, .7)'),
                                 width=2))

    ## plot the line with actual award for Ad #0
    data4 = go.Scatter(x=[0, n],
                       y=[true_award[1]] * 2,
                       name='Actual award #1 value',
                       line = dict(color = ('rgb(205, 12, 24)'),
                                   width = 1,
                                   dash = 'dash'))

    ## plot the Regret values as a function of trial number
    data5 = go.Scatter(x=x,
                       y=regret_list,
                       name='Regret')

    layout = go.Layout(title='Simulated award Values and Algorithm Regret',
                       xaxis={'title': 'Trial Number'},
                       yaxis1={'title': 'award value'},
                       yaxis2={'title': 'Regret Value'}
                       )
    fig = plotly.subplots.make_subplots(rows=2, cols=1, print_grid=False, shared_yaxes=True, shared_xaxes=True)

    fig.append_trace(data1, 1, 1)
    fig.append_trace(data2, 1, 1)
    fig.append_trace(data3, 1, 1)
    fig.append_trace(data4, 1, 1)
    fig.append_trace(data5, 2, 1)

    fig['layout'].update(layout)
    iplot(fig, show_link=False)

In [24]:
true_award = [0.8, 0.3]
print('True award for Ad #0 is:', true_award[0])
print('True award for Ad #1 is:', true_award[1])

True award for Ad #0 is: 0.8
True award for Ad #1 is: 0.3


In [26]:
# Random selection
n = 1000

In [35]:
regret = 0
total_reward = 0
regret_list = []
award = {0: [], 1: []}
index_list = []

## set the initial values for impressions and clicks
impressions = [0,0]
clicks = [0,0]

for i in range(n):

    random_index = np.random.randint(0,2,1)[0] ## randomly choose the value between [0,1]
    index_list.append(random_index) ## add the value to list

    impressions[random_index] += 1 ## add 1 impression value for the choosen Ad
    did_click = bernoulli.rvs(true_award[random_index]) ## simulate if the person clicked on the ad usind Actual award value

    if did_click:
        clicks[random_index] += did_click ## if person clicked add 1 click value for the choosen Ad

    ## calculate the award values and add them to list
    if impressions[0] == 0:
        award_0 = 0
    else:
        award_0 = clicks[0]/impressions[0]

    if impressions[1] == 0:
        award_1 = 0
    else:
        award_1 = clicks[1]/impressions[1]

    award[0].append(award_0)
    award[1].append(award_1)

    ## calculate the regret and reward
    regret += max(true_award) - true_award[random_index]
    regret_list.append(regret)
    total_reward += did_click

In [36]:
algorithm_performance()

Ad #0 has been shown 50.9 % of the time.
Ad #1 has been shown 49.1 % of the time.
Total Reward (Number of Clicks): 591


In [37]:
## save the reward and regret values for future comparison
random_dict = {'reward':total_reward,
               'regret_list':regret_list,
               'ads_count':pd.Series(index_list).value_counts(normalize=True)}

#Not ideal. Ad 2 should have been chosen more.

In [41]:
## Epsilon Greedy
e = .05 ## set the Epsilon value
n_init = 100 ## number of impressions to choose the winning Ad
impressions = [0,0]
clicks = [0,0]

for i in range(n_init):
    random_index = np.random.randint(0,2,1)[0]

    impressions[random_index] += 1
    did_click = bernoulli.rvs(true_award[random_index])
    if did_click:
        clicks[random_index] += did_click

award_0 = clicks[0] / impressions[0]
award_1 = clicks[1] / impressions[1]
win_index = np.argmax([award_0, award_1]) ## select the Ad number with the highest CTR

print('After', n_init, 'initial trials Ad #', win_index, 'got the highest award =', round(np.max([award_0, award_1]), 2),
      '(Real award value is', true_award[win_index], ').'
      '\nIt will be shown', (1-e)*100, '% of the time.')

After 100 initial trials Ad # 0 got the highest award = 0.86 (Real award value is 0.8 ).
It will be shown 95.0 % of the time.


In [51]:
def MAB_algorithm(n, a0, a1, t_a, regret=None, total_reward=None):
    # a0 = award0
    # a1 = award1
    for i in range(n):
        random_index = np.random.randint(0, 2, 1)[0]  ## randomly choose the value between [0,1]
        index_list.append(random_index)  ## add the value to list

        impressions[random_index] += 1  ## add 1 impression value for the choosen Ad
        did_click = bernoulli.rvs(t_a[random_index])  ## simulate if the person clicked on the ad usind Actual award value

        if did_click:
            clicks[random_index] += did_click  ## if person clicked add 1 click value for the choosen Ad

        ## calculate the award values and add them to list
        if impressions[0] == 0:
            award0 = 0
        else:
            award0 = clicks[0] / impressions[0]

        if impressions[1] == 0:
            award1 = 0
        else:
            award1 = clicks[1] / impressions[1]

        award[0].append(award0)
        award[1].append(award1)

        ## calculate the regret and reward
        regret += max(true_award) - true_award[random_index]
        regret_list.append(regret)
        total_reward += did_click

In [52]:
regret = 0
total_reward = 0
regret_list = []
ctr = {0: [], 1: []}
index_list = []
impressions = [0,0]
clicks = [0,0]

#n = 1000

MAB_algorithm(n=1000, a0=award_0, a1=award_1, t_a = true_award)

TypeError: unsupported operand type(s) for +=: 'NoneType' and 'float'

In [53]:
regret = 0
total_reward = 0
regret_list = []
award = {0: [], 1: []}
index_list = []
impressions = [0,0]
clicks = [0,0]

for i in range(n):

    epsilon_index = random.choices([win_index, 1-win_index], [1-e, e])[0]
    index_list.append(epsilon_index)

    impressions[epsilon_index] += 1
    did_click = bernoulli.rvs(true_award[epsilon_index])
    if did_click:
        clicks[epsilon_index] += did_click

    if impressions[0] == 0:
        award_0 = 0
    else:
        award_0 = clicks[0]/impressions[0]

    if impressions[1] == 0:
        award_1 = 0
    else:
        award_1 = clicks[1]/impressions[1]

    ctr[0].append(award_0)
    ctr[1].append(award_1)

    regret += max(true_award) - true_award[epsilon_index]
    regret_list.append(regret)
    total_reward += did_click

In [54]:
algorithm_performance()

Ad #0 has been shown 95.5 % of the time.
Ad #1 has been shown 4.5 % of the time.
Total Reward (Number of Clicks): 772
