In [70]:
# import packages
import numpy as np
import scipy.stats as stats
import math
import pandas as pd
import statsmodels.stats.proportion as sms

import matplotlib.pyplot as plt
%matplotlib inline

# Experiment Size
Because we have two evaluation metrics of interest, we should make sure that we are choosing an appropriate significance level to conduct each test, in order to preserve a maximum overall Type I error rate of $.05$. Since we would be happy to deploy the new homepage if either download rate or license purchase rate showed a statistically significant increase, performing both individual tests at a $.05$ error rate carries the risk of making too many Type I errors. As such, we'll apply the Bonferroni correction to run each test at a $.025$ error rate so as to protect against making too many errors.

In [93]:
# function to compute the experiment size
def experiment_size(p_null, p_alt, alpha = .05, beta = .20):
    """
    Compute the minimum number of samples needed to achieve a desired power
    level for a given effect size.
    
    Input parameters:
        p_null: base success rate under null hypothesis
        p_alt : desired success rate to be detected
        alpha : Type-I error rate
        beta  : Type-II error rate
    
    Output value:
        n : Number of samples required for each group to obtain desired power
    """
    
    # Get necessary z-scores and standard deviations (@ 1 obs per group)
    z_null = stats.norm.ppf(1 - alpha)
    z_alt  = stats.norm.ppf(beta)
    sd_null = np.sqrt(p_null * (1-p_null) + p_null * (1-p_null))
    sd_alt  = np.sqrt(p_null * (1-p_null) + p_alt  * (1-p_alt) )
    
    # Compute and return minimum sample size
    p_diff = p_alt - p_null
    n = ((z_null*sd_null - z_alt*sd_alt) / p_diff) ** 2
    return np.ceil(n)

In [94]:
# define users for each groups (experimental and control)
user_per_day = 3250
control_user = user_per_day/2
exper_user = user_per_day/2

user_per_day, control_user, exper_user

(3250, 1625.0, 1625.0)

In [104]:
# find data we need to collect
download_visitor = experiment_size(.16, .175, alpha=.025, beta=.20)
license_visitor = experiment_size(.02, .023, alpha=.025, beta=.20)

print(f"The visitors needs for download is {download_visitor}")
print(f"The visitors needs for license is {license_visitor}")


The visitors needs for download is 9481.0
The visitors needs for license is 34930.0


In [113]:
# find how many days run collect data
download_days = math.ceil(download_visitor/(user_per_day/2))
license_days = math.ceil(license_visitor/(user_per_day/2))
print(f'The days need for collecting data is {download_days} and {license_days}')


The days need for collecting data is 6 and 22


# Invariant Metric

In [7]:
df = pd.read_csv('homepage-experiment-data.csv')
df.head(2)

Unnamed: 0,Day,Control Cookies,Control Downloads,Control Licenses,Experiment Cookies,Experiment Downloads,Experiment Licenses
0,1,1764,246,1,1850,339,3
1,2,1541,234,2,1590,281,2


$H_0: P=0.5$<br>
$H_1: p \ne 0.5$

In [61]:
# function need
def zProportion(p, p_hat, n, side='two-tail'):
    x1 = p_hat-p
    x2 = np.sqrt(p*(1-p)/n)
    z_value = x1/x2
    
    if side == 'two-tail':
        p_value = stats.norm.sf(abs(z_value))*2
    elif side == 'upper-tail':
        p_value = stats.norm.sf(z_value)
    elif side == 'lower-tail':
        p_value = stats.norm.cdf(z_value)
    
    return z_value, p_value

In [206]:
n_obs = df['Experiment Cookies'].sum() + df['Control Cookies'].sum()
n_control = df['Control Cookies'].sum()

n_control, n_obs

(46851, 94197)

In [207]:
z_val, p_val = zProportion(p, (n_control/n_obs), n_obs)
print('z_val=', z_val, 'p_val=',p_val)

if p_val <= 0.05:
    print('Reject H_0')
else:
    print("Don't reject H_0")

z_val= -1.6128228329131171 p_val= 0.10678300401876598
Don't reject H_0


# Evaluation Metric

In [63]:
df.head(3)

Unnamed: 0,Day,Control Cookies,Control Downloads,Control Licenses,Experiment Cookies,Experiment Downloads,Experiment Licenses
0,1,1764,246,1,1850,339,3
1,2,1541,234,2,1590,281,2
2,3,1457,240,1,1515,274,1


## Download Rate

$H_0: \text{download rate}_{\text{new}} \le \text{download rate}_{\text{old}} $ <br>
$H_1: \text{download rate}_{\text{new}} > \text{download rate}_{\text{old}} $ 

In [226]:
# number of observation in control group
n_control = df.query('Day < 7')['Control Cookies'].sum()
print(f'Number of Observation in Control Group {n_control}')

# number of downloads in control group
n_control_download = df.query('Day < 7')['Control Downloads'].sum()

# porportion of downloads in control group
p_control_download = (n_control_download / n_control)
print(f"Porportion of downloads in control group {p_control_download}")

print('─'*50)


# number of observation in experiment group
n_exper = df.query('Day < 7')['Experiment Cookies'].sum()
print(f'Number of Observation in Experiment Group {n_exper}')

# number of downloads in experiment group
n_exper_download = df.query('Day < 7')['Experiment Downloads'].sum()

# porportion of downloads in experiment group
p_exper_download = (n_exper_download / n_exper)
print(f"Porportion of downloads in experiment group {p_exper_download}")

print('─'*50)

obs_diff = p_exper_download - p_control_download
print(f"The difference between experiment and control group is '{obs_diff}' ")

Number of Observation in Control Group 9636
Porportion of downloads in control group 0.1540058115400581
──────────────────────────────────────────────────
Number of Observation in Experiment Group 9919
Porportion of downloads in experiment group 0.1783445911886279
──────────────────────────────────────────────────
The difference between experiment and control group is '0.024338779648569786' 


In [227]:
nobs=[n_exper, n_control]
count = [n_exper_download, n_control_download]
value = 0.5 - 0.5
alternative = 'larger'

stat, pval_1 = sms.proportions_ztest(count=count, nobs=nobs, alternative=alternative, value =value)
stat, pval_1

(4.569277914761742, 2.4470372893307737e-06)

In [228]:
z_value = stats.norm.ppf(0.95)
if abs(stat) > z_value:
    print('reject the H0')
else:
    print('faild to reject H0')

reject the H0


## License Purchasing Rate

$H_0: \text{purchasing rate}_{\text{new}} \le \text{purchasing rate}_{\text{old}} $ <br>
$H_1: \text{purchasing rate}_{\text{new}} > \text{purchasing rate}_{\text{old}} $ 

In [229]:
# number of observation in control group
n_control = df.query('Day < 22')['Control Cookies'].sum()
print(f'Number of Observation in Control Group {n_control}')

# number of licenses in control group
n_control_license = df['Control Licenses'].sum()

# porportion of licenses in control group
p_control_license = (n_control_license / n_control)
print(f"Porportion of licenses in control group {p_control_license}")

print('─'*50)


# number of observation in experiment group
n_exper = df.query('Day < 22')['Experiment Cookies'].sum()
print(f'Number of Observation in Experiment Group {n_exper}')

# number of licenses in experiment group
n_exper_license = df['Experiment Licenses'].sum()

# porportion of licenses in experiment group
p_exper_license = (n_exper_license / n_exper)
print(f"Porportion of licenses in experiment group {p_exper_license}")

print('─'*50)

obs_diff = p_exper_license - p_control_license
print(f"The difference between experiment and control group is '{obs_diff}' ")

Number of Observation in Control Group 33758
Porportion of licenses in control group 0.021032051661828307
──────────────────────────────────────────────────
Number of Observation in Experiment Group 34338
Porportion of licenses in experiment group 0.021317490826489604
──────────────────────────────────────────────────
The difference between experiment and control group is '0.00028543916466129693' 


Note: we divided all 29 days by 21 days because there are 7 days trials for the software so the last 7 days to see the rate of license purchase

In [230]:
nobs=[n_exper, n_control]
count = [n_exper_license, n_control_license]
value = 0.5 - 0.5
alternative = 'larger'

stat, pval_2 = sms.proportions_ztest(count=count, nobs=nobs, alternative=alternative, value =value)
stat, pval_2

(0.2586750111658684, 0.3979430008399871)

In [231]:
z_value = stats.norm.ppf(0.95)
if abs(stat) > z_value:
    print('reject the H0')
else:
    print('faild to reject H0')

faild to reject H0


# Conclusion

In [224]:
p_values = pval_1, pval_2
tests = ['Download Rate', 'License Rate']
p_values

(2.4470372893307737e-06, 0.3979430008399871)

In [225]:
bonferroni_rate = (0.05/len(p_values))
for i, p_value in enumerate(p_values):
    if p_value < (bonferroni_rate):
        print(f'Reject the H0 for {tests[i]} test')
    else:
        print(f'Faild to reject H0 for {tests[i]} test')

Reject the H0 for Download Rate test
Faild to reject H0 for License Rate test


Despite the fact that statistical significance wasn't obtained for the number of licenses purchased, the new homepage appeared to have a strong effect on the number of downloads made. Based on our goals, this seems enough to suggest replacing the old homepage with the new homepage. Establishing whether there was a significant increase in the number of license purchases, either through the rate or the increase in the number of homepage visits, will need to wait for further experiments or data collection.