In [1]:
import numpy as np
import pandas as pd
import time

from functools import partial
from scipy.stats import randint, uniform, beta
from sim_lib import simulation

pd.options.mode.chained_assignment = None

In [2]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

def ucb(history: pd.DataFrame, C: float):
    total = history['impressions']
    positive = history['clicks']
    ctr = positive / (total + 10)
    ctr += C * np.sqrt(2 * (np.log(positive + 1) + 1) / (total + 1e-6))
    n = np.argmax(ctr)
    return history.index[n]

def thompson_sampler(history: pd.DataFrame):
    total = history['impressions'].values
    positive = history['clicks'].values
    n = beta(np.where(positive > 0, positive, 1), np.where(total > 0, total, 1)).rvs().argmax()
    return history.index[n]

def test_policy(policy, n=200000):
    # seed for homework
    np.random.seed(seed=384758917)

    start = time.time()
    output = simulation(policy, n=n)
    end = time.time()
    
    # display results
    print(f'run takes {end - start:.3f}s')
    print('regret = {:.4f}, E_regret = {:.4f}, total_banners = {}'.format(
        output['regret'], output['regret'] / output['rounds'], output['total_banners']))
    display(output['history'])

In [3]:
# перебор параметров на чуть меньшем n
for C in [1e-3, 5e-2, 1e-2, 5e-1, 1e-1]:
    print(f'ucb regret, C = {C}')
    test_policy(partial(ucb, C=C), n=10000)

ucb regret, C = 0.001
1 impressions have been simulated
run takes 37.757s
regret = 545.4960, E_regret = 0.0545, total_banners = 18


Unnamed: 0,impressions,clicks,lifetime,p
0,1.0,0.0,7599.571491,0.059342
3,1.0,0.0,730.054398,0.123087
5,9641.0,1106.0,9826.394049,0.111589
8,1.0,0.0,2658.518663,0.000177
11,1.0,0.0,1840.920703,0.006756
12,1.0,0.0,3399.839917,0.003828
13,1.0,0.0,11307.15983,0.030773
17,1.0,0.0,918.927311,0.007524


ucb regret, C = 0.05
1 impressions have been simulated
run takes 37.513s
regret = 546.7976, E_regret = 0.0547, total_banners = 18


Unnamed: 0,impressions,clicks,lifetime,p
0,1.0,0.0,7599.571491,0.059342
3,1.0,0.0,730.054398,0.123087
5,9944.0,1147.0,9826.394049,0.111589
8,1.0,0.0,2658.518663,0.000177
11,1.0,0.0,1840.920703,0.006756
12,1.0,0.0,3399.839917,0.003828
13,1.0,0.0,11307.15983,0.030773
17,1.0,0.0,918.927311,0.007524


ucb regret, C = 0.5
1 impressions have been simulated
run takes 38.669s
regret = 56.6752, E_regret = 0.0057, total_banners = 18


Unnamed: 0,impressions,clicks,lifetime,p
0,19.0,0.0,7599.571491,0.059342
3,19.0,0.0,730.054398,0.123087
5,1381.0,165.0,9826.394049,0.111589
8,19.0,0.0,2658.518663,0.000177
11,19.0,0.0,1840.920703,0.006756
12,19.0,0.0,3399.839917,0.003828
13,19.0,0.0,11307.15983,0.030773
17,42.0,1.0,918.927311,0.007524


ucb regret, C = 0.1
1 impressions have been simulated
run takes 38.686s
regret = 24.5710, E_regret = 0.0025, total_banners = 18


Unnamed: 0,impressions,clicks,lifetime,p
0,2.0,0.0,7599.571491,0.059342
3,2.0,0.0,730.054398,0.123087
5,1799.0,212.0,9826.394049,0.111589
8,2.0,0.0,2658.518663,0.000177
11,2.0,0.0,1840.920703,0.006756
12,2.0,0.0,3399.839917,0.003828
13,2.0,0.0,11307.15983,0.030773
17,8.0,1.0,918.927311,0.007524


In [4]:
print('baseline regret')
test_policy(partial(eps_greedy, eps=0.08))

print('-'*50)

print('ucb regret')
test_policy(partial(ucb, C=1e-1)) # используем лучший параметр

print('-'*50)

print('thompson sampler regret')
test_policy(thompson_sampler)

baseline regret
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
run takes 531.489s
regret = 2792.2376, E_regret = 0.0140, total_banners = 174


Unnamed: 0,impressions,clicks,lifetime,p
132,19843.0,1332.0,10870.812904,0.064972
162,154.0,2.0,18594.827945,0.017514
163,86.0,0.0,5153.010937,0.000849
164,68.0,2.0,5092.571727,0.041281
166,48.0,0.0,5340.55207,0.007253
167,33.0,0.0,1474.181162,0.033849
168,23.0,0.0,4900.260295,0.027273
169,14.0,0.0,5007.022458,0.030857
170,18.0,0.0,8920.324215,0.034653
171,14.0,0.0,1080.025985,0.012549


--------------------------------------------------
ucb regret
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
run takes 749.459s
regret = 992.5676, E_regret = 0.0050, total_banners = 185


Unnamed: 0,impressions,clicks,lifetime,p
169,3.0,0.0,12570.660923,0.014629
173,51.0,4.0,2590.948318,0.07785
174,3.0,0.0,1630.594595,0.05994
177,3573.0,387.0,1959.841208,0.109108
178,3.0,0.0,18537.925424,0.055495
181,3.0,0.0,660.380946,0.005876
182,3.0,0.0,3253.772281,0.032968
184,2.0,0.0,3025.997858,0.00822


--------------------------------------------------
thompson sampler regret
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
run takes 607.007s
regret = 1570.4884, E_regret = 0.0079, total_banners = 205


Unnamed: 0,impressions,clicks,lifetime,p
170,378.0,14.0,10386.924814,0.035065
174,520.0,28.0,11181.688405,0.051696
201,83.0,0.0,6381.578383,0.003504
202,747.0,46.0,10181.202625,0.07313
203,78.0,0.0,14687.62125,0.00516
204,83.0,1.0,5459.373402,0.001561
