## Thompson sampling and the Binary Bernoulli Bandit Problem

### Introduction

### Example setup

In [1]:
import Pkg; Pkg.activate("."); Pkg.resolve(); Pkg.instantiate();

[32m[1m  Activating[22m[39m project at `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Multiarm-Bandit-TS`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Multiarm-Bandit-TS/Project.toml`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Multiarm-Bandit-TS/Manifest.toml`


In [2]:
# load reqd packages -
using Distributions
using Plots
using Colors
using StatsPlots
using PrettyTables
using DataFrames

# setup paths -
const _ROOT = pwd();
const _PATH_TO_FIGS = joinpath(_ROOT, "figs");

In [3]:
include("CHEME-5660-Example-CodeLib.jl");

In [4]:
# initialize the world -
world_d_dict = Dict{Int64, Bernoulli}();
K = 3;
θ_array = [0.90, 0.60, 0.80]; # values taken from Russo et al Fig. 3.1
number_of_steps = 5000;
    
# hard code the parameters for the Bernoulli distributions, these are the 
# ground truth, but the agent doesn't know them (and they are directly visible)
for k ∈ 1:K
    world_d_dict[k] = Bernoulli(θ_array[k]);
end

In [5]:
# lets build a world function -
function world(action::Int64, dict::Dict{Int64, Bernoulli})::Int64
    
    # grab the distrubution from the dict -
    d = dict[action];
    
    # roll -
    value = rand(d);
    if (value == true)
        return 1;    
    end
    
    # default -
    return 0;
end

world (generic function with 2 methods)

In [6]:
# build Thompson sample object -
model = ThompsonSamplingModel()
model.K = K;
model.α = ones(K); # initialize to uniform values
model.β = ones(K); # initialize to uniform values

# build the world function 
world_function(a) = world(a, world_d_dict);

# TS -
time_sample_results_dict_Ts = sample(model; 𝒯 = number_of_steps, world=world_function);

In [7]:
# Build an epsilon sample object -
epsilon_model = EpsilonSamplingModel();
epsilon_model.K = K;
epsilon_model.α = ones(K); # initialize to uniform values
epsilon_model.β = ones(K); # initialize to uniform values
epsilon_model.ϵ = 0.20; # some fraction of the iterations, do a random step

# EPS-TS 
time_sample_results_dict_eps = sample(epsilon_model; 𝒯 = number_of_steps, world=world_function);

In [8]:
time_sample_results_dict_eps

Dict{Int64, Matrix{Float64}} with 5000 entries:
  4986 => [3827.0 425.0; 197.0 129.0; 326.0 87.0]
  4700 => [3612.0 400.0; 183.0 122.0; 305.0 83.0]
  4576 => [3516.0 389.0; 179.0 120.0; 296.0 81.0]
  2288 => [1731.0 175.0; 99.0 65.0; 174.0 49.0]
  1703 => [1275.0 131.0; 71.0 51.0; 138.0 42.0]
  1956 => [1469.0 153.0; 86.0 55.0; 154.0 44.0]
  2350 => [1776.0 184.0; 104.0 65.0; 176.0 50.0]
  3406 => [2594.0 285.0; 143.0 99.0; 226.0 64.0]
  2841 => [2155.0 235.0; 121.0 81.0; 198.0 56.0]
  2876 => [2185.0 237.0; 123.0 81.0; 199.0 56.0]
  687  => [476.0 47.0; 28.0 26.0; 91.0 24.0]
  185  => [88.0 11.0; 7.0 8.0; 59.0 17.0]
  1090 => [790.0 82.0; 46.0 36.0; 110.0 31.0]
  2015 => [1517.0 157.0; 87.0 56.0; 158.0 45.0]
  3293 => [2508.0 275.0; 138.0 94.0; 220.0 63.0]
  1704 => [1276.0 131.0; 71.0 51.0; 138.0 42.0]
  3220 => [2448.0 269.0; 136.0 92.0; 217.0 63.0]
  4888 => [3762.0 411.0; 192.0 125.0; 317.0 86.0]
  422  => [266.0 25.0; 19.0 18.0; 76.0 23.0]
  1266 => [929.0 96.0; 52.0 40.0; 118.0 

In [9]:
# build a pretty table -
# pick a time step -
T_index = 5000

# build the array of beta distributions -
beta_array_Ts = build_beta_array(time_sample_results_dict_Ts[T_index]);
beta_array_eps = build_beta_array(time_sample_results_dict_eps[T_index]);

# initialize -
action_probability_array = Array{Any,2}(undef, K, 4);
for i ∈ 1:K
    
    action_probability_array[i,1] = i;
    action_probability_array[i,2] = θ_array[i];
    
    # get the d -
    d_Ts = beta_array_Ts[i];
    d_eps = beta_array_eps[i];
    
    # grab the parameters from distributions -
    # Thompson sampling -
    αᵢ,βᵢ = params(d_Ts);
    action_probability_array[i,3] = αᵢ/(αᵢ + βᵢ);
    
    # eps-TS -
    αᵢ,βᵢ = params(d_eps);
    action_probability_array[i,4] = αᵢ/(αᵢ + βᵢ);
end

# header -
action_table_header = (["Action aᵢ", "θᵢ", "θ̂ᵢ Ts", "θ̂ᵢ eps-Ts"]);

# display table -
pretty_table(action_probability_array; header = action_table_header)

┌───────────┬─────┬──────────┬───────────┐
│[1m Action aᵢ [0m│[1m  θᵢ [0m│[1m    θ̂ᵢ Ts [0m│[1m θ̂ᵢ eps-Ts [0m│
├───────────┼─────┼──────────┼───────────┤
│         1 │ 0.9 │ 0.898583 │  0.899625 │
│         2 │ 0.6 │ 0.611111 │  0.604294 │
│         3 │ 0.8 │ 0.765957 │  0.790361 │
└───────────┴─────┴──────────┴───────────┘


In [10]:
 # get the distributions for this time point -
plot(beta_array_Ts[1], legend=:topleft, label="a₁", lw=2, bg=colorant"#F2F2F2", 
    background_color_outside="white", framestyle = :box, fg_legend = :transparent, minorticks=0.05, 
    c=colorant"#EF4035")
plot!(beta_array_Ts[2], label="a₂", lw=2, c=colorant"#6EB43F")
plot!(beta_array_Ts[3], label="a₃", lw=2, c=colorant"#0068AC")

# setup labels -
xlabel!("Value (AU)", fontsize=18)
ylabel!("Probability density (AU)", fontsize=18)

# uncomment me to save -
#filename = "Fig-BBBP-Ts-T$(T_index).pdf"
#savefig(joinpath(_PATH_TO_FIGS, filename));

In [11]:
 # get the distributions for this time point -
plot(beta_array_eps[1], legend=:topleft, label="a₁", lw=2, bg=colorant"#F2F2F2", 
    background_color_outside="white", framestyle = :box, fg_legend = :transparent, minorticks=0.05, 
    c=colorant"#EF4035")
plot!(beta_array_eps[2], label="a₂", lw=2, c=colorant"#6EB43F")
plot!(beta_array_eps[3], label="a₃", lw=2, c=colorant"#0068AC")

# setup labels -
xlabel!("Value (AU)", fontsize=18)
ylabel!("Probability density (AU)", fontsize=18)

# uncomment me to save -
#filename = "Fig-BBBP-eps-20-T$(T_index).pdf"
#savefig(joinpath(_PATH_TO_FIGS, filename));

### Disclaimer and Risks
__This content is offered solely for training and  informational purposes__. No offer or solicitation to buy or sell securities or derivative products, or any investment or trading advice or strategy,  is made, given, or endorsed by the teaching team. 

__Trading involves risk__. Carefully review your financial situation before investing in securities, futures contracts, options, or commodity interests. Past performance, whether actual or indicated by historical tests of strategies, is no guarantee of future performance or success. Trading is generally inappropriate for someone with limited resources, investment or trading experience, or a low-risk tolerance.  Only risk capital that is not required for living expenses.

__You are fully responsible for any investment or trading decisions you make__. Such decisions should be based solely on your evaluation of your financial circumstances, investment or trading objectives, risk tolerance, and liquidity needs.