## AB tests snippets - frequentist calcs


In [1]:
import math as mt
import numpy as np
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
import scipy.stats as scs
import matplotlib.ticker as mtick
import seaborn as sns

### Example of Significance Power Calculator

In [2]:
#based on pooled is the best calculation
def binomial_sample_size(metric, mde, alpha, beta):
    # standard normal distribution to determine z-values
    snd = norm(0, 1)

    Z_beta = snd.ppf(1-beta)
    print(Z_beta)

    Z_alpha = snd.ppf(1-alpha/2)
    print(Z_alpha)

    # average of probabilities from both groups. This is the pooled probability
    p_pooled = (metric + metric + mde) / 2
    print(p_pooled)

    N = (2 * p_pooled * 
             (1 - p_pooled) * 
             (Z_beta + Z_alpha)**2
             / mde**2)

    print((Z_beta + Z_alpha)**2 / (mde**2))
    return int(N)

In [3]:
binomial_sample_size(metric=0.33, mde=0.02, alpha=0.05, beta=0.2)

0.8416212335729143
1.959963984540054
0.34
19622.19933587272


8806

### From Udacity course

In [4]:
def get_sds(p,d):
    sd1=mt.sqrt(2*p*(1-p))
    sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))
    x=[sd1,sd2]
    return x

#Inputs: required alpha value (alpha should already fit the required test)
#Returns: z-score for given alpha
def get_z_score(alpha):
    return norm.ppf(alpha)

# Inputs p-baseline conversion rate which is our estimated p and d-minimum detectable change
# Returns
def get_sds(p,d):
    sd1=mt.sqrt(2*p*(1-p))
    sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))
    sds=[sd1,sd2]
    return sds

# Inputs:sd1-sd for the baseline,sd2-sd for the expected change,alpha,beta,d-d_min,p-baseline estimate p
# Returns: the minimum sample size required per group according to metric denominator
def get_sampSize(sds,alpha,beta,d):
    n=pow((get_z_score(1-alpha/2)*sds[0] + get_z_score(1-beta)*sds[1]),2)/pow(d,2)
    return n

#### Calculating the sample size

In [5]:
# prob or conversion rate
p = 0.33
#Expected uplift 
d = 0.02

get_sampSize(get_sds(p,d),0.05,0.2,d)


8714.567712731887

#### Sanity Checks (sample mismatch)

* **Sanity Checks for differences between counts** 

In [6]:
p=0.5
alpha=0.05
pageviews_cont=4000
pageviews_total=8000

p_hat=round(pageviews_cont/(pageviews_total),4)
sd=mt.sqrt(p*(1-p)/(pageviews_total))
ME=round(get_z_score(1-(alpha/2))*sd,4)
print ("The confidence interval is between",p-ME,"and",p+ME,"; Is",p_hat,"inside this range?")

The confidence interval is between 0.489 and 0.511 ; Is 0.5 inside this range?


* **Sanity Checks for differences between probabilities** 

In [7]:
clicks_cont= 3000
pageviews_cont= 5000
clicks_exp = 3000
pageviews_exp= 5000
clicks_total = clicks_cont + clicks_exp

ctp_cont=clicks_cont/pageviews_cont
ctp_exp=clicks_exp/pageviews_exp
d_hat=round(ctp_exp-ctp_cont,4)
p_pooled=clicks_total/pageviews_total
sd_pooled=mt.sqrt(p_pooled*(1-p_pooled)*(1/pageviews_cont+1/pageviews_exp))
ME=round(get_z_score(1-(alpha/2))*sd_pooled,4)
print ("The confidence interval is between",0-ME,"and",0+ME,"; Is",d_hat,"within this range?")

The confidence interval is between -0.017 and 0.017 ; Is 0.0 within this range?


#### we calculate the confidence intervals

In [8]:
alpha=0.05

n_cont=8000
x_cont=1600
n_exp=8000
x_exp=1810


p_cont=x_cont/n_cont
p_exp=x_exp/n_exp


p_pooled=(x_cont+x_exp)/(n_cont+n_exp)
sd_pooled=mt.sqrt(p_pooled*(1-p_pooled)*(1/n_cont+1/n_exp))
ME=round(get_z_score(1-alpha/2)*sd_pooled,4)
p_diff=round(p_exp-p_cont,4)

print("The p_cont is: ", p_cont, "The p_cont margings are: [",round(p_cont-ME,4),",",round(p_cont+ME,4),"]" )
print("The p_exp is: ", p_exp,   "The p_cont margings are: [",round(p_exp-ME,4),",",round(p_exp+ME,4),"]" )

print("The change due to the experiment is", p_diff*100,"% (absolute)")
print("The ME is: ", ME)
print("Confidence Interval: [",p_diff-ME,",",p_diff+ME,"]")

The p_cont is:  0.2 The p_cont margings are: [ 0.1873 , 0.2127 ]
The p_exp is:  0.22625 The p_cont margings are: [ 0.2136 , 0.239 ]
The change due to the experiment is 2.62 % (absolute)
The ME is:  0.0127
Confidence Interval: [ 0.013500000000000002 , 0.038900000000000004 ]


In [9]:
alpha=0.05

n_cont=10072
x_cont=974
n_exp=9886
x_exp=1242


p_cont=x_cont/n_cont
p_exp=x_exp/n_exp


p_pooled=(x_cont+x_exp)/(n_cont+n_exp)
sd_pooled=mt.sqrt(p_pooled*(1-p_pooled)*(1/n_cont+1/n_exp))
ME=round(get_z_score(1-alpha/2)*sd_pooled,4)
p_diff=round(p_exp-p_cont,4)

print("The change due to the experiment is", p_diff*100,"% (absolute)")
print("Confidence Interval: [",p_diff-ME,",",p_diff+ME,"]")

The change due to the experiment is 2.8899999999999997 % (absolute)
Confidence Interval: [ 0.0202 , 0.037599999999999995 ]


## From AB testing calculator github

### Compute sample size

In [10]:
#mde is absolute (the absolute increase)
def compute_sample_size(p0, mde, alpha=0.05, beta=0.2, tails="Two"):
    
    # Conditional alpha value based on whether one or two tail test
    if tails == "Two":
        computed_alpha = alpha / 2
    else:
        computed_alpha = alpha

    p1 = p0 + mde
    
    #commented: if mde is relative, ie. a lift in conversion rate %
    # p1 = p0 * (1 + mde)

    print(p1)
    N = (
        (norm.ppf(1 - computed_alpha) + norm.ppf(1 - beta)) ** 2
        * (p0 * (1 - p0) + p1 * (1 - p1))
        / ((p0 - p1) ** 2)
    )
    
    
    print((norm.ppf(1 - computed_alpha) + norm.ppf(1 - beta)) ** 2/ ((p0 - p1) ** 2))
    print(p0 - p1)
        
    return int(N)

In [11]:
compute_sample_size(0.33, 0.02)

0.35000000000000003
19622.199335872687
-0.020000000000000018


8802

### Z test

In [12]:
def z_test(visitors_A, conversions_A, visitors_B, conversions_B, alpha=0.05, tail_direction="two tail"):
    """Run a Z-test with your data, returning the Z-score and p-value.

    Returns
    -------
    z_score : float
        Number of standard deviations between the mean of the control
        conversion rate distribution and the variant conversion rate
    p_value : float
        Probability of obtaining test results at least as extreme as the
        observed results, under the conditions of the null hypothesis
    """

    control_cr = conversions_A / visitors_A
    variant_cr = conversions_B / visitors_B
    relative_difference = variant_cr / control_cr - 1
    
    control_se = (control_cr * (1 - control_cr) / visitors_A) ** 0.5
    variant_se = (variant_cr * (1 - variant_cr) / visitors_B) ** 0.5
    se_difference = (control_se ** 2 + variant_se ** 2) ** 0.5
    
    pool_cr = (conversions_A + conversions_B) / (
        visitors_A + visitors_B
    )
    pool_se = (
        pool_cr
        * (1 - pool_cr)
        * (1 / visitors_A + 1 / visitors_B)
    ) ** 0.5

    # z-score
    #z_score = (variant_cr - control_cr) / pool_se
    z_score = (variant_cr - control_cr) / se_difference

    # Calculate the p-value dependent on one or two tails
    if tail_direction == "left":
        p_value = norm.cdf(-z_score)
    elif tail_direction == "right":
        p_value = norm.cdf(z_score)
    else:
        p_value = 2 * norm.cdf(-abs(z_score))

    return round(z_score,4), round(p_value,4)
    

In [13]:
z_test(80000, 1600, 80050, 1696)

(1.6716, 0.0946)