In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.stats.api as sms
from math import ceil
from sklearn.utils import resample

In [2]:
df = pd.read_csv("AB_test_data.csv")

In [3]:
df["purchase_TF"] = df["purchase_TF"].astype(int)
df

Unnamed: 0,purchase_TF,Variant,date,id
0,0,A,2019-11-08,0x25b44a
1,0,B,2020-08-27,0x46271e
2,0,A,2020-06-11,0x80b8f1
3,0,B,2020-08-22,0x8d736d
4,0,A,2020-08-05,0x96c9c8
...,...,...,...,...
129995,0,A,2020-07-23,0x4089c2
129996,0,A,2020-06-24,0x6a5e3a
129997,0,A,2019-10-12,0x95e302
129998,0,A,2020-03-18,0x7c4afa


In [4]:
df_A = df[df["Variant"] == "A"]
df_A

Unnamed: 0,purchase_TF,Variant,date,id
0,0,A,2019-11-08,0x25b44a
2,0,A,2020-06-11,0x80b8f1
4,0,A,2020-08-05,0x96c9c8
5,0,A,2020-03-10,0x751c24
6,0,A,2019-11-05,0x60d2bd
...,...,...,...,...
129995,0,A,2020-07-23,0x4089c2
129996,0,A,2020-06-24,0x6a5e3a
129997,0,A,2019-10-12,0x95e302
129998,0,A,2020-03-18,0x7c4afa


In [5]:
df_B = df[df["Variant"] == "B"]
df_B

Unnamed: 0,purchase_TF,Variant,date,id
1,0,B,2020-08-27,0x46271e
3,0,B,2020-08-22,0x8d736d
59,0,B,2020-08-19,0x3ff83f
74,0,B,2020-08-10,0x138d19
103,0,B,2020-08-04,0x966e6a
...,...,...,...,...
129805,0,B,2020-08-27,0x7d95d0
129827,0,B,2020-08-10,0x38a6e8
129879,0,B,2020-08-30,0x6a711a
129910,0,B,2020-08-13,0x13175a


In [6]:
mean_a = df_A.mean()
mean_b = df_B.mean()
print(mean_a)
print(mean_b)

purchase_TF    0.149616
dtype: float64
purchase_TF    0.1766
dtype: float64


# Q1

Ho: mean_b - mean_a = 0\
Ha: mean_b - mean_a > 0\
one-tail test

In [7]:
sample_size = 2500

In [8]:
np.random.seed(seed=3)
# create a sample of 50 sample_A & sample_B
sample_A = resample(df_A, n_samples=sample_size, replace=False)
sample_B = resample(df_B, n_samples=sample_size, replace=False)

In [9]:
mean_difference = sample_B.mean()-sample_A.mean()
mean_difference

purchase_TF    0.0128
dtype: float64

In [10]:
sample_A.mean()

purchase_TF    0.1564
dtype: float64

In [11]:
sample_B.mean()

purchase_TF    0.1692
dtype: float64

In [12]:
se_A = sample_A.std()/(sample_size**0.5)
se_A

purchase_TF    0.007266
dtype: float64

In [13]:
z_a2=stats.norm.ppf(q= 0.95,  #q=0.975/0.995 => 90%/ 95%/ 99% confidence interval
               loc = 0,               
               scale= 1)  
z_a2

1.6448536269514722

In [14]:
z = mean_difference/se_A
z

purchase_TF    1.761596
dtype: float64

Since z > z_a2, we are able to reject the null hypothesis with 95% statistical significance that Alternative B improved conversion rates over Alternative A.

# Q2

In [15]:
se = ((sample_A.std()/df_A.shape[0]) + (sample_B.std()/df_B.shape[0]))**0.5
se


purchase_TF    0.008827
dtype: float64

In [16]:
mean_a = df[df.Variant == 'A'].purchase_TF.mean()
mean_b = df[df.Variant == 'B'].purchase_TF.mean()

In [17]:
print(mean_a)
print(mean_b)

0.149616
0.1766


In [18]:
import scipy.stats as scs

def min_sample_size(bcr, mde, power=0.8, sig_level=0.05):
    standard_norm = scs.norm(0, 1)

    # find Z_beta from desired power
    Z_beta = standard_norm.ppf(power)

    # find Z_alpha
    Z_alpha = standard_norm.ppf(1-sig_level/2)

    # average of probabilities from both groups
    pooled_prob = (bcr + bcr + mde) / 2

    min_N = (2 * pooled_prob * (1 - pooled_prob) * (Z_beta + Z_alpha)**2
             / mde**2)

    return min_N

In [19]:
optimal_n = min_sample_size(mean_a, mean_a-mean_b, power=0.8, sig_level=0.05)
optimal_n = ceil(optimal_n)
print(optimal_n)

2536


## find optimal size by sms for validation

In [20]:
effect_size = sms.proportion_effectsize(mean_a, mean_b)    # Calculating effect size based on our expected rates
required_n = sms.NormalIndPower().solve_power(
    effect_size, 
    power=0.8, 
    alpha=0.05, 
    ratio=1
    )                                                  # Calculating sample size needed

required_n = ceil(required_n)                          # Rounding up to next whole number                          

print(required_n)


2939


## checking for 10 samples

In [21]:
i=0
mean_a = df[df["Variant"]=="A"].mean()
mean_a

purchase_TF    0.149616
dtype: float64

In [22]:
for i in range(10):
    np.random.seed(seed=i+1)
    sample_size = optimal_n
    sample = resample(df, n_samples=sample_size, replace=False)
    mean_a = sample[sample["Variant"]=="A"].mean()
    mean_b = sample[sample["Variant"]=="B"].mean()
    mean_difference = mean_b - mean_a
    correct = (mean_difference/se_A) > z_a2
    print("sample",i+1,":", correct)
    i=i+1

sample 1 : purchase_TF    False
dtype: bool
sample 2 : purchase_TF    True
dtype: bool
sample 3 : purchase_TF    True
dtype: bool
sample 4 : purchase_TF    True
dtype: bool
sample 5 : purchase_TF    True
dtype: bool
sample 6 : purchase_TF    True
dtype: bool
sample 7 : purchase_TF    True
dtype: bool
sample 8 : purchase_TF    True
dtype: bool
sample 9 : purchase_TF    False
dtype: bool
sample 10 : purchase_TF    False
dtype: bool


# Q3

In [23]:
alpha = 0.05
beta = 0.2
boundary_A = np.log(1/alpha)
boundary_B = np.log(beta)

In [24]:
ll=[0]
S=[0]
avg = []
for i in range(10):
    np.random.seed(seed=i+1)
    sample_size = optimal_n
    sample = resample(df, n_samples=optimal_n, replace=False)
    data = sample.purchase_TF.to_list()
    mean_a = 0.146869
    mean_b = 0.1766
    #mean_a = sample[sample["Variant"]=="A"].mean()
    #mean_b = sample[sample["Variant"]=="B"].mean()
    #for i in mean_a:
    #    a = i
    #for j in mean_b:
    #    b = j
    for i in range(len(data)):
        if data[i] == 1:
            log_ratio = np.log(mean_b) - np.log(mean_a)
        else:
            log_ratio = np.log(1-mean_b) - np.log(1-mean_a)
        ll.append(log_ratio)
        S=np.sum(ll)
        if S>boundary_A:
            print('upward decision: %d steps (S=%f)'%(i,S))
            break
        elif S<boundary_B:
            print('downward decision: %d steps (S=%f)'%(i,S))
            break
    avg.append(i)
    ll=[0]
    S=[0]
print(avg)
mean_step = sum(avg)/len(avg)
mean_step

downward decision: 441 steps (S=-1.609878)
downward decision: 212 steps (S=-1.620257)
downward decision: 355 steps (S=-1.636814)
downward decision: 615 steps (S=-1.626950)
downward decision: 194 steps (S=-1.641231)
downward decision: 1947 steps (S=-1.613614)
downward decision: 751 steps (S=-1.615026)
downward decision: 212 steps (S=-1.620257)
downward decision: 441 steps (S=-1.609878)
downward decision: 522 steps (S=-1.625405)
[441, 212, 355, 615, 194, 1947, 751, 212, 441, 522]


569.0

In [25]:
step_list = [441,212,355,615,194,1947,751,212,441,522]
from numpy import *
mean_step = mean(step_list)
mean_step

569.0