In [1]:
import numpy as np
import pandas as pd
import time

from functools import partial
from scipy.stats import randint, uniform
from sim_lib import simulation

pd.options.mode.chained_assignment = None


Я на всякий случай решил работать в том же ноутбуке, что давали задание, поэтому просто его скопировал

### Первая реализованная стратегия - жадный epsilon алгоритм, который нам выдали, который для нас является baseline

In [2]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.08)

### Я в начали решил использовать бернулевского бандита, завышая клики на 1, а показы на 5, скажу сразу, что у меня сходу получилось пробить baseline данным алгоритмом, поэтому я еще реализовал UCB алгоритм

In [3]:
def bernouli_strategy(history: pd.DataFrame, k: int = 1, n: int = 5):
    bets = np.random.beta(history['clicks'] + k, history['impressions'] - history['clicks'] + n)
    return history.index[np.argmax(bets)]

### Реализация UCB, для того, чтобы вести подсчет time_stamp я решил, что буду реализовывать это стратегию через класс, а не через функцию, как два предыдущих алгоритма

In [5]:
class UCB_strategy:
    def __init__(self):
        self.time_stamp = 0
        
    def __call__(self, history: pd.DataFrame):
        self.time_stamp += 1
        log_t = np.sqrt(2 * np.log(self.time_stamp) / (history['impressions'] + 1)) 
        bets = (history['clicks'] + 1) / (history['impressions'] + 1) + log_t
        return history.index[np.argmax(bets)]


### Вначале считается baseline

In [6]:
# seed for homework
np.random.seed(seed=384758917)

start = time.time()
output = simulation(policy, n=200000)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


375.7543902397156

In [7]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(2792.237649427154, 0.01396118824713577, 174)

### Посчитаем бандита с распределением бернулли

In [8]:
# seed for homework
np.random.seed(seed=384758917)

start = time.time()
output_bernoulli = simulation(bernouli_strategy, n=200000)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


375.90051555633545

In [9]:
# baseline regret
output_bernoulli['regret'], output_bernoulli['regret']/output_bernoulli['rounds'],  output_bernoulli['total_banners']

(1662.978266216189, 0.008314891331080945, 216)

### Так как бандит со стратегией Бернулли сходу пробил бейзлайн, решил посчитать еще UCB, но он оказался хуже, чем жадный

In [11]:
# seed for homework
np.random.seed(seed=384758917)

start = time.time()
output_ucb = simulation(UCB_strategy(), n=200000)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


455.7166349887848

In [12]:
output_ucb['regret'], output_ucb['regret']/output_ucb['rounds'],  output_ucb['total_banners']

(9342.99688344694, 0.0467149844172347, 185)