In [None]:
# From the course: Bayesin Machine Learning in Python: A/B Testing
# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing

from __future__ import print_function, division
from builtins import range

# Note: you may need to update your version of future
# sudo pip install -U future

# Thompson Sampling Categorical

import matplotlib.pyplot as plt
import numpy as np



In [None]:
np.random.seed(2)

# Custmer Input Meta Data

# Input the name of the experiment

experiment_title = ['To Find The Best Set Among 3 Sets of 6 Product Photos in the Web Page']

# Input the number of trials for the experiment

NUM_TRIALS = 10000

# Input the "real" probabilities for the test options in the simulation of the experiment

BANDIT_PROBABILITIES = [[0.2, 0.1, 0.1, 0.2, 0.2, 0.2], [0.5, 0.1, 0.1, 0.1, 0.1, 0.1], [0.3, 0.1, 0.1, 0.3, 0.1, 0.1]]

# Input the decision criteria

decision_criterion = ['reward generated']

# Input the "reward" of each user selection in the test options

reward_index = np.array([1, 2, 3, 4, 5, 6])
#How much money per click
reward_value = np.array([0, 399, 489, 599, 428, 528])

# Calculate the expected rewards for each test option

dice_expected_reward_list = [np.average(reward_value, weights = dice_prob) * NUM_TRIALS for dice_prob in BANDIT_PROBABILITIES]

reward_optimal = np.max(dice_expected_reward_list)
reward_lowest = np.min(dice_expected_reward_list)
reward_avg = np.average(dice_expected_reward_list)

In [None]:
class Bandit:
  def __init__(self, p, alpha=[1,1,1,1,1,1]):
    self.p = p
    self.alpha = alpha
    self.N = 0 # for information only

  def pull(self):
    return np.random.choice(reward_index, size=1, p=np.array(self.p))

  def sample(self):
    s = np.random.dirichlet(self.alpha, 1)
    a = np.average(s[0], weights=reward_value)
    return a

  def update(self, x):
    self.alpha += x
    self.N += 1


In [None]:
def experiment():
  bandits = [Bandit(p) for p in BANDIT_PROBABILITIES]
  print(bandits)

  sample_points = [1,2,3,4,5,1000,5000,10000]
  rewards = np.zeros(NUM_TRIALS)

  c =np.array([0, 0, 0, 0, 0, 0])

  for i in range(NUM_TRIALS):
    j = np.argmax([b.sample() for b in bandits]) # Take samples from Dirichlet

    x = bandits[j].pull() # Select an arm and check the reward
    rewards[i] = reward_value[x[0]-1]
    c[x[0]-1] = 1
    bandits[j].update(c)

    c =np.array([0, 0, 0, 0, 0, 0])

    if i in sample_points:
      print(f'{i}th iteration, select test option {j+1}, received award {rewards[i]}')

  # Print the customer input meta data

  print('\n**************Customer Inpurt Meta Data***************\n')

  print(f'Experiment Description: {experiment_title}\n')
  print(f'The number of trials for the experiment is: {NUM_TRIALS}\n')
  print(f'The metric used in the experiment is: {decision_criterion}\n')
  print(f'There are {len(BANDIT_PROBABILITIES)} test options and based on the simulation probabilities:')
  #print('Based on the probabilities and vales provided in this simulation, the expected value for each test option is as follows: ')
  #print(f'The expected rewards for test option 1 is: ${int(dice_expected_reward_list[0])} if only Test Option 1 is used')
  #print(f'The expected rewards for test option 2 is: ${int(dice_expected_reward_list[1])} if only Test Option 2 is used')
  #print(f'The expected rewards for test option 3 is: ${int(dice_expected_reward_list[2])} if only Test Option 3 is used\n')

  print(f"The highest expected total rewards is from test option {np.argmax(dice_expected_reward_list)+1}: ${round(reward_optimal,2)}")
  print(f"The lowest expected total rewards is from test option {np.argmin(dice_expected_reward_list)+1}: ${round(reward_lowest,2)}\n")
  print(f"The average expected total rewards from the traditinal statistical method is: ${int(reward_avg)}\n")

  print('**************Experiment Results**********************\n')

  print(f"total reward earned from the experiment with Bayesian Machine Learning: ${int(rewards.sum())}\n")
  print(f"overall win rate from the experiment: {rewards.sum() / NUM_TRIALS}\n")
  print(f"num times selected each test option: {[b.N for b in bandits]}\n")

  print('**************Experiment Result Analysis***************\n')
  print(f"difference between the Bayesian and traditional statistical approches is: ${int(rewards.sum())-int(reward_avg)}\n")
  print(f"difference between the Bayesian and highest expected rewards is: ${int(reward_optimal)-int(rewards.sum())}\n")


if __name__ == "__main__":
  experiment()

[<__main__.Bandit object at 0x7f8084e54b10>, <__main__.Bandit object at 0x7f8084e54ed0>, <__main__.Bandit object at 0x7f8084e54210>]
1th iteration, select test option 1, received award 428.0
2th iteration, select test option 2, received award 0.0
3th iteration, select test option 1, received award 599.0
4th iteration, select test option 1, received award 599.0
5th iteration, select test option 1, received award 599.0
1000th iteration, select test option 1, received award 0.0
5000th iteration, select test option 1, received award 489.0

**************Customer Inpurt Meta Data***************

Experiment Description: ['To Find The Best Set Among 3 Sets of 6 Product Photos in the Web Page']

The number of trials for the experiment is: 10000

The metric used in the experiment is: ['reward generated']

There are 3 test options and based on the simulation probabilities:
The highest expected total rewards is from test option 1: $3998000.0
The lowest expected total rewards is from test option 2