In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
df = pd.read_csv('AB_test_data.csv')

In [3]:
df.head()

Unnamed: 0,Variant,purchase_TF,date,id
0,A,False,2019-12-26,0x6f9421
1,A,False,2019-08-16,0x59d442
2,A,True,2019-03-18,0x6db8f8
3,A,False,2019-02-13,0x68245d
4,A,False,2019-09-28,0x28566e


# Part 1 - Hypo Test

In [4]:
df_A = df[df['Variant'] == 'A']
df_B = df[df['Variant'] == 'B']

A_conv_rate = len(df_A[df_A['purchase_TF'] == True]) / len(df_A)
B_conv_rate = len(df_B[df_B['purchase_TF'] == True]) / len(df_B)
n = len(df_B)

Null Hypothesis: B_conv_rate = A_conv_rate

Alternative Hypothesis: B_conv_rate >= A_conv_rate

In [5]:
#Calculate Z-score
z = (B_conv_rate - A_conv_rate) / math.sqrt((A_conv_rate * (1- A_conv_rate))/n)
print(z)

8.692151285198767


Z (alpha) = 1.64

Hence we reject the null hypothesis and conclude that B conversion rate does indeed have significantly higher conversion rate than A.

# Part 2 - Optimal Sample Size

In [6]:
t_alpha = 1.96
p_bar = (A_conv_rate + B_conv_rate) / 2
p0 = A_conv_rate
p1 = B_conv_rate
delta = (B_conv_rate - A_conv_rate)
t_beta = 0.842

optimal = (t_alpha * math.sqrt((2*p_bar*(1-p_bar))) + t_beta * math.sqrt(p0*(1-p0) + p1*(1-p1)))**2 * (1/(delta**2))
print(optimal)

1157.8288770933054


## Sampling Optimal Sizes from Data

Conduct the test 10 times using samples of the optimal size. Report results.

In [7]:
list_of_z_scores = []

for i in range(10):
    sample = df_B.sample(n=1157,axis=0)
    B_conv_rate = len(sample[sample['purchase_TF'] == True]) / len(sample)
    
    z = (B_conv_rate - A_conv_rate) / math.sqrt((A_conv_rate * (1- A_conv_rate))/len(sample))
    list_of_z_scores.append(z)

list_of_z_scores

[5.900341910385552,
 5.490974495532124,
 4.99973359770801,
 4.835986631766639,
 2.871023040470184,
 3.3622639382942987,
 4.917860114737323,
 4.590366182854582,
 4.99973359770801,
 4.26287225097184]

In [8]:
list_of_success = []

for i in list_of_z_scores:
    if i > 1.96:
        list_of_success.append(True)
    else:
        list_of_success.append(False)

In [9]:
sum(list_of_success)/len(list_of_success)

1.0

# Part 3-  Conduct Sequential Test

Conduct a sequential test for the 10 samples. For any of the samples, were you able to stop the test prior to using the full sample? What was the average number of iterations required to stop the test?

Under H0: P(x=1) = 0.15206

Under H1: P(x=1) = 0.1962

Type I error: 5%

Type II error: 20%

In [10]:
upper = np.log(1/0.05)
lower = np.log(0.2)

p0 = 0.15206
p1 = 0.1962

In [11]:
def feed_criteria(total_criteria,sample,number):
    global len_log
    global success_log
    
    if (total_criteria <= lower):
        print("test stopped and accept H0")
        print(len(log))
        len_log.append(len(log))
        success_log.append(0)
    elif total_criteria >= upper:
        print('test stopped and accept H1')
        print(len(log))
        len_log.append(len(log))
        success_log.append(1)

    else:
        #print("keep going")
        if sample.purchase_TF.iloc[number] == True:
            criteria = np.log(p1/p0)
        else:
            criteria = np.log((1-p1)/(1-p0))
        log.append(criteria)
        end_criteria = sum(log)
        number = number +1
        #print("current number:",end_criteria)
        feed_criteria(end_criteria,sample,number)

In [12]:
len_log = []
success_log = []
for i in range(10):
    sample = df_B.sample(n=1157,axis=0)
    number = 0
    log = []
    feed_criteria(0,sample,number)


test stopped and accept H0
106
test stopped and accept H1
543
test stopped and accept H1
582
test stopped and accept H1
248
test stopped and accept H1
341
test stopped and accept H1
363
test stopped and accept H0
244
test stopped and accept H1
243
test stopped and accept H1
142
test stopped and accept H1
275


In [13]:
#Avg number of iterations
sum(len_log)/len(len_log)

308.7

In [14]:
#Avg number of successes
sum(success_log)/len(success_log)

0.8