# Imports

In [1]:
# extensions
%load_ext autoreload
%autoreload 2

# paths
import sys
from os.path import dirname, abspath, realpath
add_path = dirname(dirname(realpath('__file__')))

# standard imports
import logging
import math
import random
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from pprint import pprint
from IPython.display import display
import pickle
import time
import itertools

# special imports
import matplotlib.animation as animation
import matplotlib.colors as colors

# initialize
import importlib
importlib.reload(logging)
%matplotlib notebook
pd.options.display.max_rows = 7
matplotlib.rcParams['figure.figsize'] = 9,3

from datetime import timedelta

class YouTubeVideo(object):
    def __init__(self, id, width=400, height=300, start=timedelta()):
        self.id = id
        self.width = width
        self.height = height
        self.start = start.total_seconds()

    def _repr_html_(self):
        return """
            <iframe
                width="%i"
                height="%i"
                src="http://www.youtube.com/embed/%s?start=%i"
                frameborder="0"
                allowfullscreen
            ></iframe>
        """%(self.width, self.height, self.id, self.start)



print("init ok")

init ok


# Bandits

<img  src="one_arm_bandit.jpg"/>

Exploration vs exploitation

## epsilon-greedy

<img  src="epsilongreedy-768x466.png"/>

## UCB1

<img  src="LTSDJ96BDFOU5XODPN6DJ9R7GGYA0OFB.png"/>

More info at https://jeremykun.com/2013/10/28/optimism-in-the-face-of-uncertainty-the-ucb1-algorithm/

## MCTS

<img  src="MCTS_(English).svg.png"/>

# Example: Bayes bandit

Beta distribution is defined as 

$Beta(x|\alpha, \beta) = \dfrac{\Gamma(\alpha + \beta)}{\Gamma(\alpha)\Gamma(\beta)}x^{\alpha-1}(1 - x)^{\beta-1}. $

Basically a probability distribution describing probability of probabilities, in this case CTR.

The mean is

$E[x] = \dfrac{\alpha}{\alpha + \beta}.$

A/B testing is only exploration, less statistical significance, but higher average reward

A/B is special case of MAB

## Sample plots

In [2]:
# from scipy.special import gamma
from scipy.stats import beta

def pdf(a, b, x):
    rv = beta(a, b)
    return rv.pdf(x)

def beta_plot(a, b):
    x = np.arange(0.0, 1.01, 0.01)
    y = pdf(a, b, x)
    plt.plot(x, y)

plt.figure(1)
#beta_plot(1, 1)
#beta_plot(2, 2)
#beta_plot(3, 3)
#beta_plot(1, 6)
beta_plot(50, 3)
beta_plot(3, 8)
plt.show()

<IPython.core.display.Javascript object>

## Code for banners/bandits

In [3]:
from scipy.stats import beta
from random import random
 
class Banner():
    def __init__(self, CTR, alpha=1, beta=1):
        self.CTR = CTR
        self.alpha = alpha
        self.beta = beta
 
    def update(self, click):
        if click == True:
            self.alpha += 1
        else:
            self.beta += 1
 
    def sample(self):
        return beta(self.alpha, self.beta).rvs()
 
    def getCTR(self):
        return self.CTR
 
class BayesBandit:
    def __init__(self, CTRs):
        self.banners = []
        for CTR in CTRs:
            self.banners.append(Banner(CTR))
 
    def selectBanner(self):
        sampledCTRs = [banner.sample() for banner in self.banners]
        return sampledCTRs.index(max(sampledCTRs))
 
    def simulateUser(self, banner):
        CTR = self.banners[banner].getCTR()
        if random() > CTR:
            self.banners[banner].update(False)
        else:
            self.banners[banner].update(True)        

## Simulation

In [5]:
bandit = BayesBandit([0.25, 0.4, 0.67])
for i in range(100):
    banner = bandit.selectBanner()
    bandit.simulateUser(banner)
    #print("User clicked on banner %d" % (banner + 1))

In [6]:
plt.figure(2)

num_banners = len(bandit.banners)
for i in range(num_banners):
    a = bandit.banners[i].alpha
    b = bandit.banners[i].beta
    print("Banner #%d, a=%3d, b=%3d, real_CTR=%1.3f, mean=%1.3f" % \
          (i, a,b, bandit.banners[i].CTR, a/(a+b)))
    beta_plot(a, b)

plt.legend(["Banner #%d" % i for i in range(num_banners)])
plt.show() 


<IPython.core.display.Javascript object>

Banner #0, a=  1, b=  5, real_CTR=0.250, mean=0.167
Banner #1, a=  5, b=  8, real_CTR=0.400, mean=0.385
Banner #2, a= 61, b= 26, real_CTR=0.670, mean=0.701


# Relation to RL

In [7]:
YouTubeVideo("ZtP4eEYjJ2I", start=timedelta(seconds=24))

In [8]:
YouTubeVideo("V1eYniJ0Rnk", start=timedelta(seconds=25))