# Thompson Samplingで広告配信を最適化してみた

In [1]:
import math
import numpy as np
import random
import pandas as pd
import plotly.plotly as py

from pandas import DataFrame, Series

from plotly.offline import iplot, plot, init_notebook_mode
from plotly.graph_objs import Scatter

In [2]:
from mab_tools.algorithms import EpsilonGreedy, SoftMax, UCB1, ThompsonSampling
from mab_tools.test import BernoulliArm, average_rewards, cumulative_rewards, success_rate, sim_mabs_bern

### Arms

In [3]:
# 平均0.2,分散0.15の正規分布からアームの期待値を10個サンプル.
arms1 = [round(np.clip(np.random.normal(loc=0.2, scale=0.15), 0.01, 0.5), 3) for i in range(10)]

In [4]:
arms1

[0.176, 0.228, 0.186, 0.431, 0.081, 0.5, 0.19, 0.315, 0.222, 0.246]

In [21]:
# 平均0.1,分散0.1の正規分布からアームの期待値を10個サンプル.
arms2 = [round(np.clip(np.random.normal(loc=0.1, scale=0.10), 0.01, 0.5), 3) for i in range(10)]

In [22]:
arms2

[0.056, 0.138, 0.126, 0.121, 0.198, 0.216, 0.01, 0.257, 0.163, 0.065]

### Algorithmns

In [6]:
# シミュレーションに用いるアルゴリズムの指定.
algos = [EpsilonGreedy(0.1, len(arms1)), # ε-greedy (ε = 0.1)
          EpsilonGreedy(0.4, len(arms1)), # ε-greedy (ε = 0.4)
          SoftMax(0.1, len(arms1)), # Softmax(τ = 0.1)
          SoftMax(0.4, len(arms1)), # Softmax(τ = 0.4)
          UCB1(len(arms1)),
          ThompsonSampling(len(arms1))] # Upper Confidence Bound 1

name_list = ["Epsilon Greedy(ε=0.1)", 
             "Epsilon Greedy(ε=0.4)", 
             "SoftMax(τ=0.1)", 
             "SoftMax(τ=0.4)", 
             "UCB1", 
             "ThompsonSampling"]

## Simulation 1

### Run Bandit Algorithms

In [7]:
result_list1 = sim_mabs_bern(algo_list=algos, arms=arms1, algo_name=name_list, num_sims=100, horizon=3000)

Avg Elapsed Time(3000 iter) Epsilon Greedy(ε=0.1) : 0.137s
Avg Elapsed Time(3000 iter) Epsilon Greedy(ε=0.4) : 0.13s
Avg Elapsed Time(3000 iter) SoftMax(τ=0.1) : 0.437s
Avg Elapsed Time(3000 iter) SoftMax(τ=0.4) : 0.437s
Avg Elapsed Time(3000 iter) UCB1 : 0.267s
Avg Elapsed Time(3000 iter) ThompsonSampling : 0.287s


### Visualize Results

In [8]:
average_rewards(result_list1, name_list)

In [9]:
cumulative_rewards(result_list1, name_list)

In [10]:
success_rate(result_list1, name_list)

In [11]:
select_df1 = DataFrame()
for i in range(len(result_list1)):
    select_df1 = pd.concat([select_df1, 
                           round(result_list1[i].chosen_arm.value_counts(normalize=True).sort_index(), 3)], axis=1)
select_df1.columns = name_list
select_df1 = pd.concat([DataFrame(arms1, columns=["Arms"]), select_df1], axis=1)

In [12]:
select_df1

Unnamed: 0,Arms,Epsilon Greedy(ε=0.1),Epsilon Greedy(ε=0.4),SoftMax(τ=0.1),SoftMax(τ=0.4),UCB1,ThompsonSampling
0,0.176,0.026,0.043,0.092,0.091,0.029,0.007
1,0.228,0.014,0.041,0.096,0.096,0.039,0.01
2,0.186,0.013,0.041,0.093,0.093,0.031,0.008
3,0.431,0.232,0.103,0.118,0.118,0.191,0.081
4,0.081,0.01,0.04,0.083,0.083,0.02,0.005
5,0.5,0.636,0.567,0.127,0.126,0.512,0.843
6,0.19,0.011,0.04,0.093,0.093,0.032,0.008
7,0.315,0.032,0.043,0.105,0.105,0.067,0.018
8,0.222,0.013,0.042,0.095,0.096,0.037,0.009
9,0.246,0.013,0.041,0.097,0.098,0.042,0.011


## Simulation 2

### Run Bandit Algorithms

In [23]:
result_list2 = sim_mabs_bern(algo_list=algos, arms=arms2, algo_name=name_list, num_sims=100, horizon=5000, batch=True, batch_size=500)

Avg Elapsed Time(5000 iter) Epsilon Greedy(ε=0.1) : 0.23s
Avg Elapsed Time(5000 iter) Epsilon Greedy(ε=0.4) : 0.224s
Avg Elapsed Time(5000 iter) SoftMax(τ=0.1) : 0.755s
Avg Elapsed Time(5000 iter) SoftMax(τ=0.4) : 0.759s
Avg Elapsed Time(5000 iter) UCB1 : 0.466s
Avg Elapsed Time(5000 iter) ThompsonSampling : 0.499s


### Visualize Results

In [24]:
average_rewards(result_list2, name_list)

In [25]:
cumulative_rewards(result_list2, name_list)

In [26]:
success_rate(result_list2, name_list)

In [27]:
select_df2 = DataFrame()
for i in range(len(result_list2)):
    select_df2 = pd.concat([select_df2, 
                            round(result_list2[i].chosen_arm.value_counts(normalize=True).sort_index(), 3)], axis=1)
select_df2.columns = name_list
select_df2 = pd.concat([DataFrame(arms2, columns=["Arms"]), select_df2], axis=1)

In [28]:
select_df2

Unnamed: 0,Arms,Epsilon Greedy(ε=0.1),Epsilon Greedy(ε=0.4),SoftMax(τ=0.1),SoftMax(τ=0.4),UCB1,ThompsonSampling
0,0.056,0.1,0.1,0.094,0.093,0.037,0.011
1,0.138,0.014,0.041,0.101,0.1,0.069,0.02
2,0.126,0.012,0.04,0.099,0.098,0.063,0.017
3,0.121,0.012,0.04,0.098,0.099,0.061,0.017
4,0.198,0.104,0.06,0.106,0.106,0.127,0.067
5,0.216,0.166,0.081,0.107,0.108,0.169,0.109
6,0.01,0.01,0.04,0.089,0.09,0.028,0.01
7,0.257,0.529,0.515,0.112,0.11,0.32,0.713
8,0.163,0.041,0.042,0.102,0.102,0.086,0.027
9,0.065,0.011,0.04,0.094,0.094,0.04,0.011
