In [1]:
import numpy as np
import pandas as pd

from functools import partial
from scipy.stats import randint, uniform
from sim_lib import simulation

pd.options.mode.chained_assignment = None

In [2]:
def reset_seed():
    np.random.seed(seed=384758917)

In [3]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.08)

In [4]:
reset_seed()

%time output = simulation(policy, n=200_000)

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
CPU times: user 2min 3s, sys: 4.86 s, total: 2min 7s
Wall time: 2min 3s
baseline regret


(2792.237649427154, 0.01396118824713577, 174)

In [8]:
print("baseline regret")
output['regret'], output['regret']/output['rounds'],  output['total_banners']

baseline regret


(2792.237649427154, 0.01396118824713577, 174)

In [9]:
output['history']

Unnamed: 0,impressions,clicks,lifetime,p
132,19843.0,1332.0,10870.812904,0.064972
162,154.0,2.0,18594.827945,0.017514
163,86.0,0.0,5153.010937,0.000849
164,68.0,2.0,5092.571727,0.041281
166,48.0,0.0,5340.55207,0.007253
167,33.0,0.0,1474.181162,0.033849
168,23.0,0.0,4900.260295,0.027273
169,14.0,0.0,5007.022458,0.030857
170,18.0,0.0,8920.324215,0.034653
171,14.0,0.0,1080.025985,0.012549


In [10]:
def thompson(history: pd.DataFrame):
    alphas = 1 + history['clicks']
    betas = 1 + history['impressions'] - history['clicks']
    n = np.argmax(np.random.beta(alphas, betas))
    return history.index[n]

In [11]:
reset_seed()

%time thompson_output = simulation(thompson, n=200_000)

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
CPU times: user 2min 5s, sys: 3.89 s, total: 2min 9s
Wall time: 2min 6s


In [12]:
print(f"Thompson sampling regret")
thompson_output['regret'], thompson_output['regret']/thompson_output['rounds'], thompson_output['total_banners']

Thompson sampling regret


(1471.230717361168, 0.00735615358680584, 179)

In [13]:
thompson_output['history']

Unnamed: 0,impressions,clicks,lifetime,p
154,156.0,0.0,16595.715595,0.000442
169,3736.0,144.0,11415.767481,0.039232
175,4496.0,596.0,11381.152627,0.137817
178,42.0,1.0,4959.290645,0.030834


In [14]:
def thompson_(history: pd.DataFrame, data_persuasiveness: float = 1):
    alphas = 1 + data_persuasiveness * history['clicks']
    betas = 1 + data_persuasiveness * (history['impressions'] - history['clicks'])
    n = np.argmax(np.random.beta(alphas, betas))
    return history.index[n]

for c in np.linspace(0.5, 2, num=7):
    reset_seed()
    policy = partial(thompson_, data_persuasiveness=c)
    %time thompson_output = simulation(policy, n=200_000)
    
    print()
    print(f"Thompson sampling regret with alpha/beta multiplier {c:.4f}")
    print(thompson_output['regret'], thompson_output['regret']/thompson_output['rounds'], thompson_output['total_banners'])
    print()
    print()

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
CPU times: user 2min 18s, sys: 3.17 s, total: 2min 21s
Wall time: 2min 19s

Thompson sampling regret with alpha/beta multiplier 0.5000
2188.987256738951 0.010944936283694756 188


1 impressions have been simulated
10001 impressions ha