***
This notebook is a walkthrough of Tammy Rotem's AB Testing With Python post on Kaggle.
***

In [111]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.special import factorial

In [41]:
metric_tup = np.array([('unique cookies to view page per day:', 40000),
('unique cookies to click "Start free trial" per day:', 3200),
('enrollments per day:', 660),
('click-through-probability on "Start free trial":', 0.08),
('probability of enrolling, given click:', 0.20625),
('probability of payment, given enroll:', 0.53),
('probability of payment, given click:', 0.1093125)])

In [60]:
df = pd.DataFrame(metric_tup, columns = ['', 'Baseline Value'])
df = df.style.set_properties(**{'text-align': 'left'})
df

Unnamed: 0,Unnamed: 1,Baseline Value
0,unique cookies to view page per day:,40000.0
1,"unique cookies to click ""Start free trial"" per day:",3200.0
2,enrollments per day:,660.0
3,"click-through-probability on ""Start free trial"":",0.08
4,"probability of enrolling, given click:",0.20625
5,"probability of payment, given enroll:",0.53
6,"probability of payment, given click:",0.1093125


In [5]:
baseline = {"Cookies":40000,"Clicks":3200,"Enrollments":660,"CTP":0.08,"GConversion":0.20625,
           "Retention":0.53,"NConversion":0.109313}

In [6]:
# scaling sample size to 5000 for variance estimation
baseline['Cookies'] = 5000
baseline['Clicks'] = baseline['Clicks']*5000/40000
baseline['Enrollments'] = baseline['Enrollments']*5000/40000
baseline

{'Cookies': 5000,
 'Clicks': 400.0,
 'Enrollments': 82.5,
 'CTP': 0.08,
 'GConversion': 0.20625,
 'Retention': 0.53,
 'NConversion': 0.109313}

#### Gross Conversion

In [7]:
gross_conversion = {}
gross_conversion["d_min"] = 0.01
gross_conversion["p"] = baseline["GConversion"]
gross_conversion["n"] = baseline["Clicks"]
gross_conversion["sd"] = round(np.sqrt(
    (gross_conversion["p"]*(1-gross_conversion["p"]))/gross_conversion["n"]), 4)
gross_conversion['sd']

0.0202

#### Retention

In [8]:
retention = {}
retention["d_min"] = 0.01
retention["p"] = baseline["Retention"]
retention["n"] = baseline["Enrollments"]
retention["sd"] = round(np.sqrt((retention["p"]*(1-retention["p"]))/retention["n"]), 4)
retention["sd"]

0.0549

#### Net Conversion 

In [99]:
net_conversion = {}
net_conversion["d_min"] = 0.01
net_conversion["p"] = baseline["NConversion"]
net_conversion["n"] = baseline["Clicks"]
net_conversion["sd"] = round(np.sqrt(
    (net_conversion["p"]*(1-net_conversion["p"]))/net_conversion["n"]), 4)
net_conversion['sd']

0.0156

In [143]:
def calculate_std(p, d):
    std1 = np.sqrt(2*p*(1-p))
    std2 = np.sqrt(p*(1-p)+(p+d)*(1-(p+d)))
    std = [std1, std2]
    return std

def calculate_z_score(alpha):
    return norm.ppf(alpha)

def calculate_sample_size(std, alpha, beta, d):
    n = pow(calculate_z_score(1-alpha/2)*std[0]+calculate_z_score(1-beta)*std[1], 2)/pow(d, 2)
    return n

def calculate_probability(x, n):
    return round((factorial(n)/(factorial(x)*factorial(n-x)))*(0.5**x)*(0.5**(n-x)), 4)

def calculate_two_side_p(x, n):
    p = 0 
    for i in range(0, x+1):
        p = p + calculate_probability(i, n)
    return 2*p

In [101]:
gross_conversion['d'] = 0.01
retention['d'] = 0.01
net_conversion['d'] = 0.0075

In [25]:
gross_conversion['sample_size'] = round(calculate_sample_size(calculate_std(
    gross_conversion['p'], gross_conversion['d']), 0.05, 0.2, gross_conversion['d']))
gross_conversion['sample_size'] = round(
    ((gross_conversion['sample_size']/0.08)*2))
gross_conversion['sample_size']

645875.0

In [70]:
retention['sample_size'] = round(calculate_sample_size(
    calculate_std(retention['p'], retention['d']), 0.05, 0.2, retention['d']))
retention['sample_size'] = round(((retention['sample_size']/0.20625/0.08*2)))
retention['sample_size']

4737818.0

In [71]:
net_conversion['sample_size'] = round(calculate_sample_size(calculate_std(
    net_conversion['p'], net_conversion['d']), 0.05, 0.2, net_conversion['d']))
net_conversion['sample_size'] = round(
    ((net_conversion['sample_size']/0.08)*2))
net_conversion['sample_size']

685325.0

### Analyze Collected Data

In [72]:
control = pd.read_csv("control_data.csv")
experiment = pd.read_csv("experiment_data.csv")
control.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7723,687,134.0,70.0
1,"Sun, Oct 12",9102,779,147.0,70.0
2,"Mon, Oct 13",10511,909,167.0,95.0
3,"Tue, Oct 14",9871,836,156.0,105.0
4,"Wed, Oct 15",10014,837,163.0,64.0


#### Sanity Check

In [74]:
pageview_control = control['Pageviews'].sum()
pageview_experiment = experiment['Pageviews'].sum()
pageview_total = pageview_control + pageview_experiment 
print("number of pageviews in control:", pageview_control)
print("number of pageviews in experiment:", pageview_experiment)

number of pageviews in control: 345543
number of pageviews in experiment: 344660


In [88]:
p = 0.5
alpha = 0.05
p_hat = round(pageview_control/pageview_total, 4)
std = np.sqrt(p*(1-p)/pageview_total)
margin_of_error = round(calculate_z_score((1-(alpha/2)))*std, 4)
print ("The confidence interval is between",p-margin_of_error,"and",p+margin_of_error)
print("According to our calculation, ", p_hat, "is in this range")

The confidence interval is between 0.4988 and 0.5012
According to our calculation,  0.5006 is in this range


In [87]:
click_control = control['Clicks'].sum()
click_experiment = experiment['Clicks'].sum()
click_total = click_control + click_experiment

p_hat = round((click_control/click_total), 4)
std = np.sqrt(p*(1-p)/click_total)
margin_of_error = round(calculate_z_score((1-(alpha/2)))*std, 4)
print ("The confidence interval is between",p-margin_of_error,"and",p+margin_of_error)
print("According to our calculation, ", p_hat, "is in this range")

The confidence interval is between 0.4959 and 0.5041
According to our calculation,  0.5005 is in this range


In [91]:
ctp_control = click_control/pageview_control
ctp_experiment = click_experiment/pageview_experiment
d_hat = round(ctp_experiment-ctp_control, 4)
p_pooled = click_total/pageview_total
std_pooled = np.sqrt(p_pooled*(1-p_pooled) *
                     (1/pageview_control+1/pageview_experiment))
margin_of_error = round(calculate_z_score(1-(alpha/2))*std_pooled, 4)
print ("The confidence interval is between",0-margin_of_error,"and",0+margin_of_error)
print("According to our calculation, ", d_hat, "is in this range")

The confidence interval is between -0.0013 and 0.0013
According to our calculation,  0.0001 is in this range


#### Examine Effect Size 

In [92]:
click_control = control['Clicks'].loc[control['Enrollments'].notnull()].sum()
click_experiment = experiment['Clicks'].loc[experiment['Enrollments'].notnull(
)].sum()
enroll_control = control['Enrollments'].sum()
enroll_experiment = experiment['Enrollments'].sum()

gross_control = enroll_control/click_control
gross_experiment = enroll_experiment/click_experiment
gross_pooled = (enroll_control + enroll_experiment) / \
    (click_control + click_experiment)
gross_std_pooled = np.sqrt(
    gross_pooled*(1-gross_pooled)*(1/click_control + 1/click_experiment))
gross_me = round(calculate_z_score(1-(alpha/2))*gross_std_pooled, 4)
gross_diff = round(gross_experiment-gross_control, 4)
print("The change due to the experiment is", gross_diff*100, "%")
print("Confidence Interval: [",gross_diff-gross_me, ",", gross_diff+gross_me, "]")
print("The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if", -
      gross_conversion["d_min"], "is not in the CI as well.")

The change due to the experiment is -2.06 %
Confidence Interval: [ -0.0292 , -0.012 ]
The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if -0.01 is not in the CI as well.


In [102]:
payment_control = control['Payments'].sum()
payment_experiment = experiment['Payments'].sum()

net_control = payment_control/click_control
net_experiment = payment_experiment/click_experiment
net_pooled = (payment_control + payment_experiment) / \
    (click_control + click_experiment)
net_std_pooled = np.sqrt(
    net_pooled*(1-net_pooled)*(1/click_control + 1/click_experiment))
net_me = round(calculate_z_score(1-(alpha/2))*net_std_pooled, 4)
net_diff = round(net_experiment-net_control, 4)
print("The change due to the experiment is", net_diff*100, "%")
print("Confidence Interval: [",net_diff-net_me, ",", net_diff+net_me, "]")
print("The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if", -
      net_conversion["d"], "is not in the CI as well.")

The change due to the experiment is -0.49 %
Confidence Interval: [ -0.0116 , 0.0018000000000000004 ]
The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if -0.0075 is not in the CI as well.


#### Sign Test

In [103]:
full = control.join(other=experiment, how='inner', lsuffix='_cont', rsuffix='_exp')
full.count()

Date_cont           37
Pageviews_cont      37
Clicks_cont         37
Enrollments_cont    23
Payments_cont       23
Date_exp            37
Pageviews_exp       37
Clicks_exp          37
Enrollments_exp     23
Payments_exp        23
dtype: int64

In [106]:
full = full.loc[full['Enrollments_cont'].notnull()]
full.count()

Date_cont           23
Pageviews_cont      23
Clicks_cont         23
Enrollments_cont    23
Payments_cont       23
Date_exp            23
Pageviews_exp       23
Clicks_exp          23
Enrollments_exp     23
Payments_exp        23
dtype: int64

In [144]:
x = full['Enrollments_cont']/full['Clicks_cont']
y = full['Enrollments_exp']/full['Clicks_exp']
full['gross_conversion'] = np.where(x<y,1,0)

a = full['Payments_cont']/full['Clicks_cont']
b = full['Payments_exp']/full['Clicks_exp']
full['net_conversion'] = np.where(a<b,1,0)
full.head()

Unnamed: 0,Date_cont,Pageviews_cont,Clicks_cont,Enrollments_cont,Payments_cont,Date_exp,Pageviews_exp,Clicks_exp,Enrollments_exp,Payments_exp,gross_conversion,net_conversion
0,"Sat, Oct 11",7723,687,134.0,70.0,"Sat, Oct 11",7716,686,105.0,34.0,0,0
1,"Sun, Oct 12",9102,779,147.0,70.0,"Sun, Oct 12",9288,785,116.0,91.0,0,1
2,"Mon, Oct 13",10511,909,167.0,95.0,"Mon, Oct 13",10480,884,145.0,79.0,0,0
3,"Tue, Oct 14",9871,836,156.0,105.0,"Tue, Oct 14",9867,827,138.0,92.0,0,0
4,"Wed, Oct 15",10014,837,163.0,64.0,"Wed, Oct 15",9793,832,140.0,94.0,0,1


In [145]:
gross_conversion_count = full['gross_conversion'][full['gross_conversion']==1].count()
net_conversion_count = full['net_conversion'][full['net_conversion']==1].count()
n = full['net_conversion'].count()
print("No. of cases for gross conversion:",gross_conversion_count)
print("No. of cases for net conversion:",net_conversion_count)
print("No. of total cases", n)

No. of cases for gross conversion: 4
No. of cases for net conversion: 10
No. of total cases 23


***
Step 1. Set up hypotheses and determine level of significance.

H0: The median difference is zero versus
H1: The median difference is not zero α = 0.05

Step 2. Select the appropriate test statistic.
The test statistic for the Sign Test is the smaller of the number of positive or negative signs.

Step 3. Set up the decision rule.
The appropriate critical value for the Sign Test can be found in the table of critical values for the Sign Test. To determine the appropriate critical value we need the sample size, and our two-sided level of significance α = 0.05.

Step 4. Compute the test statistic.
***

In [148]:
print("Gross conversion Change is significant if", round(
    calculate_two_side_p(gross_conversion_count, n), 4), "is smaller than 0.05")
print("Net conversion Change is insignificant because", round(
    calculate_two_side_p(net_conversion_count, n), 4), "is larger than 0.05")

Gross conversion Change is significant if 0.0026 is smaller than 0.05
Net conversion Change is insignificant because 0.6774 is larger than 0.05


The change in gross conversion is significant, while the change in net conversion is not. We recommend not to initiate the change since it is only significant for the gross conversion.


#### Reference
https://www.kaggle.com/tammyrotem/ab-tests-with-python/data
http://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_nonparametric/BS704_Nonparametric5.html