In [1]:
import numpy as np
import pandas as pd

## AB Test Simulation

* A: CVR 30%
* B: CVR 31%
* C: CVR 32%

In [2]:
A = 0.30
B = 0.31
C = 0.32

products_cvr = [A, B, C]
products_name = ["A", "B", "C"]

## 1. Greedy Algorithm

In [3]:
def greedy(n_iter, products_cvr, products_name, init_popul_per_iter=1000, increment_size=200):
    popul_total = 0
    converted_total = 0
    popul_size_per_iter = [init_popul_per_iter] * len(products_cvr)
    for i in range(n_iter):
        results = []
        for ix, cvr in enumerate(products_cvr):
            popul = popul_size_per_iter[ix]
            converted = np.random.binomial(popul, cvr)

            results.append(converted / popul)
            popul_total += popul
            converted_total += converted
        
        # update population
        max_ix = np.argmax(results)
        min_ix = np.argmin(results)
        popul_size_per_iter[max_ix] = popul_size_per_iter[max_ix] + increment_size
        popul_size_per_iter[min_ix] = popul_size_per_iter[min_ix] - increment_size

        if 0 in popul_size_per_iter:
            zero_ix = popul_size_per_iter.index(0)

            popul_size_per_iter = popul_size_per_iter[:zero_ix] + popul_size_per_iter[zero_ix+1:]
            products_cvr = products_cvr[:zero_ix] + products_cvr[zero_ix+1:]
            products_name = products_name[:zero_ix] + products_name[zero_ix+1:]

        print_str = f"iteration: {i+1:3} | CVR: {100 * converted_total / popul_total:.2f}% | population: "
        for ix, popul_size in enumerate(popul_size_per_iter):
            print_str += f"{products_name[ix]}={popul_size} "
            
        print(print_str)
        

In [4]:
greedy(10, products_cvr, products_name)

iteration:   1 | CVR: 30.53% | population: A=800 B=1200 C=1000 
iteration:   2 | CVR: 30.88% | population: A=1000 B=1200 C=800 
iteration:   3 | CVR: 30.36% | population: A=1000 B=1000 C=1000 
iteration:   4 | CVR: 30.46% | population: A=800 B=1000 C=1200 
iteration:   5 | CVR: 30.73% | population: A=600 B=1000 C=1400 
iteration:   6 | CVR: 30.87% | population: A=400 B=1000 C=1600 
iteration:   7 | CVR: 30.81% | population: A=200 B=1000 C=1800 
iteration:   8 | CVR: 30.88% | population: A=400 B=1000 C=1600 
iteration:   9 | CVR: 31.07% | population: A=200 B=1000 C=1800 
iteration:  10 | CVR: 31.03% | population: B=1000 C=2000 


## 2. Epsilon-Greedy Algorithm

In [5]:
def e_greedy(n_iter, products_cvr, products_name, eps = 0.3, init_popul_per_iter=1000, increment_size=200):
    popul_total = 0
    converted_total = 0
    popul_size_per_iter = [init_popul_per_iter] * len(products_cvr)
    for i in range(n_iter):
        results = []
        for ix, cvr in enumerate(products_cvr):
            popul = popul_size_per_iter[ix]
            converted = np.random.binomial(popul, cvr)

            results.append(converted / popul)
            popul_total += popul
            converted_total += converted
        
        # update population
        if np.random.rand() > eps:
            max_ix = np.argmax(results)
            min_ix = np.argmin(results)
        else:
            max_ix, min_ix = np.random.choice(range(len(popul_size_per_iter)), size=2, replace=False)

        popul_size_per_iter[max_ix] = popul_size_per_iter[max_ix] + increment_size
        popul_size_per_iter[min_ix] = popul_size_per_iter[min_ix] - increment_size

        if 0 in popul_size_per_iter:
            zero_ix = popul_size_per_iter.index(0)

            popul_size_per_iter = popul_size_per_iter[:zero_ix] + popul_size_per_iter[zero_ix+1:]
            products_cvr = products_cvr[:zero_ix] + products_cvr[zero_ix+1:]
            products_name = products_name[:zero_ix] + products_name[zero_ix+1:]

        print_str = f"iteration: {i+1:3} | CVR: {100 * converted_total / popul_total:.2f}% | population: "
        for ix, popul_size in enumerate(popul_size_per_iter):
            print_str += f"{products_name[ix]}={popul_size} "
            
        print(print_str)
        

In [6]:
e_greedy(10, products_cvr, products_name)

iteration:   1 | CVR: 30.20% | population: A=1000 B=800 C=1200 
iteration:   2 | CVR: 30.25% | population: A=1200 B=600 C=1200 
iteration:   3 | CVR: 30.34% | population: A=1000 B=800 C=1200 
iteration:   4 | CVR: 30.92% | population: A=800 B=1000 C=1200 
iteration:   5 | CVR: 31.13% | population: A=800 B=800 C=1400 
iteration:   6 | CVR: 31.31% | population: A=800 B=600 C=1600 
iteration:   7 | CVR: 31.32% | population: A=600 B=600 C=1800 
iteration:   8 | CVR: 31.48% | population: A=800 B=600 C=1600 
iteration:   9 | CVR: 31.46% | population: A=600 B=600 C=1800 
iteration:  10 | CVR: 31.45% | population: A=600 B=400 C=2000 


## 3. UCB (Upper-Confidence-Bound) Algorithm

In [7]:
def ucb(n_iter, products_cvr, products_name, c_val=5, init_popul_per_iter=1000, increment_size=200):
    popul_total = 0
    converted_total = 0
    popul_size_per_iter = [init_popul_per_iter] * len(products_cvr)

    n_list = [1] * len(products_cvr)
    for i in range(n_iter):
        results = []
        for ix, cvr in enumerate(products_cvr):
            popul = popul_size_per_iter[ix]
            converted = np.random.binomial(popul, cvr)

            results.append(converted / popul)
            popul_total += popul
            converted_total += converted
        
        # update population
        max_ix = np.argmax(np.array(results) + c_val * np.sqrt(np.log(i+1) / np.array(n_list)))
        min_ix = np.argmin(np.array(results) + c_val * np.sqrt(np.log(i+1) / np.array(n_list)))
        popul_size_per_iter[max_ix] = popul_size_per_iter[max_ix] + increment_size
        popul_size_per_iter[min_ix] = popul_size_per_iter[min_ix] - increment_size

        n_list[max_ix] += 1

        if 0 in popul_size_per_iter:
            zero_ix = popul_size_per_iter.index(0)

            popul_size_per_iter = popul_size_per_iter[:zero_ix] + popul_size_per_iter[zero_ix+1:]
            products_cvr = products_cvr[:zero_ix] + products_cvr[zero_ix+1:]
            products_name = products_name[:zero_ix] + products_name[zero_ix+1:]

            n_list = n_list[:zero_ix] + n_list[zero_ix+1:]

        print_str = f"iteration: {i+1:3} | CVR: {100 * converted_total / popul_total:.2f}% | population: "
        for ix, popul_size in enumerate(popul_size_per_iter):
            print_str += f"{products_name[ix]}={popul_size} "
            
        print(print_str)
        

In [8]:
ucb(10, products_cvr, products_name)

iteration:   1 | CVR: 31.97% | population: A=800 B=1200 C=1000 
iteration:   2 | CVR: 31.43% | population: A=800 B=1000 C=1200 
iteration:   3 | CVR: 31.51% | population: A=1000 B=800 C=1200 
iteration:   4 | CVR: 31.51% | population: A=800 B=800 C=1400 
iteration:   5 | CVR: 31.50% | population: A=800 B=1000 C=1200 
iteration:   6 | CVR: 31.49% | population: A=1000 B=800 C=1200 
iteration:   7 | CVR: 31.50% | population: A=1000 B=600 C=1400 
iteration:   8 | CVR: 31.55% | population: A=1200 B=600 C=1200 
iteration:   9 | CVR: 31.46% | population: A=1000 B=800 C=1200 
iteration:  10 | CVR: 31.35% | population: A=800 B=800 C=1400 


## 4. Thompson Sampling

In [9]:
def thompson(n_iter, products_cvr, products_name, init_popul_per_iter=1000):
    popul_total = 0
    converted_total = 0
    beta_params = np.array([[1, 1]] * len(products_cvr))
    for i in range(n_iter):
        # print(beta_params)
        popul_per_product = [0] * len(products_cvr)
        converted_per_product = [0] * len(products_cvr)
        for j in range(init_popul_per_iter * len(products_cvr)):
            picked_ix = np.argmax([np.random.beta(a,b) for a, b in beta_params])
            popul_per_product[picked_ix] += 1

            if np.random.binomial(1, products_cvr[picked_ix]) == 1: # converted
                beta_params[picked_ix][0] += 1
                converted_per_product[picked_ix] += 1
            else: # not converted
                beta_params[picked_ix][1] += 1
            
        converted_total += sum(converted_per_product)
        popul_total += sum(popul_per_product)
        print_str = f"iteration: {i+1:3} | CVR: {100 * converted_total / popul_total:.2f}% | population: "
        for ix, popul_size in enumerate(popul_per_product):
            print_str += f"{products_name[ix]}={popul_size} "

        print(print_str)
    
    return beta_params.tolist()
    

In [10]:
beta_params = thompson(10, products_cvr, products_name)

iteration:   1 | CVR: 30.80% | population: A=1705 B=1028 C=267 
iteration:   2 | CVR: 30.75% | population: A=1849 B=848 C=303 
iteration:   3 | CVR: 31.22% | population: A=1439 B=1190 C=371 
iteration:   4 | CVR: 30.88% | population: A=1092 B=786 C=1122 
iteration:   5 | CVR: 30.94% | population: A=1220 B=741 C=1039 
iteration:   6 | CVR: 30.88% | population: A=1545 B=626 C=829 
iteration:   7 | CVR: 30.87% | population: A=893 B=353 C=1754 
iteration:   8 | CVR: 31.07% | population: A=485 B=299 C=2216 
iteration:   9 | CVR: 31.18% | population: A=132 B=127 C=2741 
iteration:  10 | CVR: 31.31% | population: A=119 B=126 C=2755 


In [11]:
beta_params

[[3228, 7253], [1878, 4248], [4289, 9110]]

# Simulate when CVR not known

In [12]:
BASE_CVR = 0.3

A = BASE_CVR
B = BASE_CVR + (np.random.random() - 0.5) * 0.099
C = BASE_CVR + (np.random.random() - 0.5) * 0.099
D = BASE_CVR + (np.random.random() - 0.5) * 0.099

products_cvr = [A, B, C, D]
products_name = ["A", "B", "C", "D"]

In [14]:
for func, name in zip([greedy, e_greedy, ucb, thompson], ["greedy", "e_greedy", "ucb", "thompson"]):
    print(f"{'-'*30} {name} {'-'*30}")
    func(10, products_cvr, products_name)

print("\nAnswer!")

for cvr, name in zip(products_cvr, products_name):
    print(f"{name}: {100 * cvr:.2f}")

------------------------------ greedy ------------------------------
iteration:   1 | CVR: 29.65% | population: A=1000 B=1000 C=800 D=1200 
iteration:   2 | CVR: 29.54% | population: A=1000 B=1000 C=600 D=1400 
iteration:   3 | CVR: 29.93% | population: A=1000 B=1000 C=400 D=1600 
iteration:   4 | CVR: 30.18% | population: A=1000 B=1000 C=200 D=1800 
iteration:   5 | CVR: 30.08% | population: A=1000 B=1000 D=2000 
iteration:   6 | CVR: 30.38% | population: A=800 B=1000 D=2200 
iteration:   7 | CVR: 30.49% | population: A=800 B=800 D=2400 
iteration:   8 | CVR: 30.70% | population: A=800 B=600 D=2600 
iteration:   9 | CVR: 30.85% | population: A=800 B=400 D=2800 
iteration:  10 | CVR: 31.11% | population: A=600 B=400 D=3000 
------------------------------ e_greedy ------------------------------
iteration:   1 | CVR: 29.62% | population: A=1200 B=1000 C=800 D=1000 
iteration:   2 | CVR: 29.11% | population: A=1200 B=1000 C=600 D=1200 
iteration:   3 | CVR: 29.65% | population: A=1200 B=1