In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('./Ads_Optimisation.csv')


In [2]:
res=dataset.mean()
print(res)

Ad 1     0.1703
Ad 2     0.1295
Ad 3     0.0728
Ad 4     0.1196
Ad 5     0.2695
Ad 6     0.0126
Ad 7     0.1112
Ad 8     0.2091
Ad 9     0.0952
Ad 10    0.0489
dtype: float64


In [4]:
# Implementing Random Selection
import random
N = 10000
d = 10
ads_selected = []
total_reward = 0
for n in range(0, N):
    ad = random.randrange(d)
    ads_selected.append(ad)
    reward = dataset.values[n, ad]
    total_reward = total_reward + reward

In [5]:
total_reward

1256

In [6]:
len(ads_selected)

10000

In [7]:
pd.Series(ads_selected).value_counts(normalize=False)

4    1021
0    1017
7    1009
8    1008
6    1006
3    1003
9     997
1     995
2     976
5     968
dtype: int64

In [8]:
# Implementing UCB
import math
N = 10000
d = 10
ads_selected = []
numbers_of_selections = [0] * d
sums_of_reward = [0] * d
total_reward = 0

for n in range(0, N):
    ad = 0
    max_upper_bound = 0
    for i in range(0, d):
        if (numbers_of_selections[i] > 0):
            average_reward = sums_of_reward[i] / numbers_of_selections[i]
            delta_i = 1.5 * math.sqrt(math.log(n+1) / numbers_of_selections[i])
            upper_bound = average_reward + delta_i
        else:
            upper_bound = 1e400
        if upper_bound > max_upper_bound:
            max_upper_bound = upper_bound
            ad = i
    ads_selected.append(ad)
    numbers_of_selections[ad] += 1
    reward = dataset.values[n, ad]
    sums_of_reward[ad] += reward
    total_reward += reward
    
#    print(numbers_of_selections)
print(pd.Series(ads_selected).value_counts(normalize=False))
print(total_reward)

4    5464
7    1268
0     786
1     537
6     458
3     440
8     338
2     294
9     213
5     202
dtype: int64
2098


In [9]:
total_reward

2098

In [10]:
def argmax(q_values):
    """
    Takes in a list of q_values and returns the index
    of the item with the highest value. Breaks ties randomly.
    returns: int - the index of the highest value in q_values
    """
    top = float("-inf")
    ties = []
    
    top=max(q_values)
    
    j=0
    for i in q_values:
        # if a value in q_values is greater than the highest value, then update top and reset ties to zero
        # if a value is equal to top value, then add the index to ties (hint: do this no matter what)
        # Note: You do not have to follow this exact solution. You can choose to do your own implementation.
        if i==top:
            ties.append(j)
        j+=1
    
    #return ties[0]
    # return a random selection from ties. (hint: look at np.random.choice)
    return np.random.choice(ties)



In [11]:
# Implementing UCB my way
import math
N = 10000
d = 10
ads_selected = []
numbers_of_selections = [0] * d
Q = [0] * d
sums_of_reward = [0] * d
total_reward = 0
arg_ads_selected=[]
for n in range(0, N):
    ad = 0
    upper_bound=[]
    for i in range(0, d):
        if (numbers_of_selections[i] > 0):
            delta_i = math.sqrt(2 * math.log(n+1) / numbers_of_selections[i])
            upper_bound.append(Q[i] + delta_i)
        else:
            upper_bound.append(1e400)
            #continue

    ad=argmax(upper_bound)

    arg_ads_selected.append(ad)
    numbers_of_selections[ad] += 1

    reward=dataset.iloc[n, ad]
    
    rdf=1/numbers_of_selections[ad] * (reward - Q[ad])
    Q[ad] += rdf
    
    sums_of_reward[ad] += reward
    total_reward += reward

print(Q)
print(pd.Series(arg_ads_selected).value_counts(normalize=False))
print(total_reward)

[0.19093539054966244, 0.08955223880597013, 0.08064516129032259, 0.11325301204819264, 0.26853776853776756, 0.010638297872340425, 0.0911764705882353, 0.18659793814433004, 0.11111111111111115, 0.03947368421052634]
4    5772
0    1037
7     970
3     415
8     405
6     340
1     335
2     310
9     228
5     188
dtype: int64
2118


In [12]:
numbers_of_selections

[1037, 335, 310, 415, 5772, 188, 340, 970, 405, 228]