<a href="https://colab.research.google.com/github/sindla97/AB_testing/blob/main/Udacity_AB_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Udacity A/B Testing


In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In [2]:
# Example baseline values
baseline = {
    'pageviews': 40000,          # Unique cookies/day
    'clicks': 3200,              # Clicks on "Start free trial"
    'enrollments': 660,          # Daily enrollments
    'Retention': 0.53,           # Payments percentage
    'CTP': 0.08,                 # 3200/40000
    'gross_conv': 0.20625,       # 660/3200
    'net_conv': 0.1093125        # (660*0.53)/3200
}

In [3]:
# Standard error at N=1

for k, v in baseline.items():
  if k in ['Retention','CTP','gross_conv','net_conv']:
    s=np.sqrt(v * (1-v) )
    print( f'SE of {k} is {s} \n')




SE of Retention is 0.4990991885387112 

SE of CTP is 0.2712931993250107 

SE of gross_conv is 0.4046120827409878 

SE of net_conv is 0.3120308916497692 



##Experiment Size Estimation

In [4]:
# calculate the size of experiment
from scipy.stats import norm


def get_z_star(alpha):
   """Returns the critical z-value for a two-tailed test."""
   return -norm.ppf(alpha / 2)

def get_beta(z_star, s, d_min, N):
    """Calculates the Type II error rate (beta) for a given sample size."""
    SE = s / np.sqrt(N)
    return norm.cdf(z_star * SE, loc=d_min, scale=SE)

def required_size(s, d_min, Ns=range(1, 100000), alpha=0.05, beta=0.2):
    """Finds the smallest N where beta <= desired threshold."""
    z_star = get_z_star(alpha)
    for N in Ns:
        if get_beta(z_star, s, d_min, N) <= beta:
            return N
    return -1




In [5]:
alpha =0.05
beta =0.02
d_min_gross=0.01
d_min_net=0.0075

p_gross=baseline['gross_conv']
p_net=baseline['net_conv']


s_gross = np.sqrt(p_gross * (1 - p_gross) * 2)  # Pooled SE for two groups
s_net = np.sqrt(p_net * (1 - p_net) * 2)

# Calculate required clicks
N_clicks_gross = required_size(s=s_gross, d_min=d_min_gross, alpha=alpha, beta=beta)
N_clicks_net = required_size(s=s_net, d_min=d_min_net, alpha=alpha, beta=beta)

# Convert clicks to pageviews
N_pageviews_gross = int(np.ceil(N_clicks_gross / baseline['CTP']))
N_pageviews_net = int(np.ceil(N_clicks_net / baseline['CTP']))

print(f'Total No of clicks(control & experiment) needed for Gross_conv are {N_clicks_gross}, pageviews needed are {N_pageviews_gross}')


print(f'Total No of clicks(control & experiment) needed for Net_conv  = {N_clicks_net}, pageviews needed are = {N_pageviews_net}')

Total No of clicks(control & experiment) needed for Gross_conv are 52748, pageviews needed are 659350
Total No of clicks(control & experiment) needed for Net_conv  = 55770, pageviews needed are = 697125


## Sanity Checks

In [7]:
df_control = pd.read_excel('/content/Final Project Results.xlsx', sheet_name='Control')
df_experiment = pd.read_excel('/content/Final Project Results.xlsx', sheet_name='Experiment')

In [8]:
print(f'''Control \n Pageviews = {df_control['Pageviews'].sum()} \n Clicks = {df_control['Clicks'].sum()} \n Enrollments = {df_control['Enrollments'].sum()} \n Payments = {df_control['Payments'].sum()}''')

print(f'''Experiment \n Pageviews = {df_experiment['Pageviews'].sum()} \n Clicks = {df_experiment['Clicks'].sum()} \n Enrollments = {df_experiment['Enrollments'].sum()} \n Payments = {df_experiment['Payments'].sum()}''')

Control 
 Pageviews = 345543 
 Clicks = 28378 
 Enrollments = 3785.0 
 Payments = 2033.0
Experiment 
 Pageviews = 344660 
 Clicks = 28325 
 Enrollments = 3423.0 
 Payments = 1945.0


In [9]:
# No of pageviews
# H0: proportion_pv_exp = proportion_pv_ctrl
# H1: proportion_pv_exp <> proportion_pv_ctrl


ctrl_PV=df_control['Pageviews'].sum()
exp_PV=df_experiment['Pageviews'].sum()

total=(ctrl_PV+exp_PV)

print(total,ctrl_PV,exp_PV)

from statsmodels.stats.proportion import proportions_ztest

zstat, pval = proportions_ztest(count=[ctrl_PV,exp_PV], nobs=[total,total], value=0.0, alternative='two-sided')
print(f"Two-proportion z-test p-value: {pval:.3f} , Zstat : {zstat}")

# There is no evidence the Pageviews in control and experiment groups are different


690203 345543 344660
Two-proportion z-test p-value: 0.133 , Zstat : 1.503097941694545


In [10]:
# No of clicks
# H0: proportion_clicks_exp = proportion_clicks_ctrl
# H1: proportion_clicks_exp <> proportion_clicks_ctrl


ctrl_clicks=df_control['Clicks'].sum()
exp_clicks=df_experiment['Clicks'].sum()

total=(ctrl_clicks+exp_clicks)

print(total,ctrl_clicks,exp_clicks)

from statsmodels.stats.proportion import proportions_ztest

zstat, pval = proportions_ztest(count=[ctrl_clicks,exp_clicks], nobs=[total,total], value=0.0, alternative='two-sided')
print(f"Two-proportion z-test p-value: {pval:.3f} , Zstat : {zstat}")



# There is no evidence the Clicks in control and experiment groups are different

56703 28378 28325
Two-proportion z-test p-value: 0.753 , Zstat : 0.314766024552368


In [11]:
from scipy.stats import ttest_ind

control_ctp =  df_control['Clicks']/df_control['Pageviews']
experiment_ctp = df_experiment['Clicks']/df_experiment['Pageviews']

tstat, pval = ttest_ind(control_ctp, experiment_ctp)
print(f"Two-proportion t-test p-value: {pval:.3f} , tstat : {tstat}")


# There is no evidence the CTP in control and experiment groups are different

Two-proportion t-test p-value: 0.934 , tstat : -0.08302608446561602


## Experiment Evaluation

In [20]:
df_control['group']='Control'
df_experiment['group']='Experiment'
combined_df = pd.concat((df_control[df_control['Payments'].isna()==False], df_experiment[df_experiment['Payments'].isna()==False]), axis=0)
summary_df=combined_df.groupby('group')[['Pageviews','Clicks','Enrollments','Payments']].sum().reset_index()
summary_df

Unnamed: 0,group,Pageviews,Clicks,Enrollments,Payments
0,Control,212163,17293,3785.0,2033.0
1,Experiment,211362,17260,3423.0,1945.0


In [49]:
# checking if Gross conversion is significantly different in experiment vs control
# Total enrollments/Clicks

alpha=0.05

GC_ctrl=summary_df.loc[0,'Enrollments']/summary_df.loc[0,'Clicks']
GC_exp=summary_df.loc[1,'Enrollments']/summary_df.loc[1,'Clicks']

pooled_prop= (summary_df.loc[0,'Enrollments']+summary_df.loc[1,'Enrollments'])/(summary_df.loc[0,'Clicks']+summary_df.loc[1,'Clicks'])


print(f'Gross Conversion Experiment : {GC_exp}\nGross Conversion Control : {GC_ctrl}\nPooled proportion :{pooled_prop}')

SE=np.sqrt((pooled_prop*(1-pooled_prop))*((1/summary_df.loc[0,'Clicks'])+(1/summary_df.loc[1,'Clicks'])))

z=norm.ppf(alpha / 2)

CI_upper_limit = -z * SE
CI_lower_limit = z * SE

diff= GC_exp-GC_ctrl


print(f'Standard Error ={SE}\nP_exp - P_ctrl = {diff}\nMarginal error = {z * SE}\nconfidence interval = {[CI_lower_limit,CI_upper_limit]}\n')

if diff < CI_upper_limit and diff > CI_lower_limit:
    print(f"The observed difference in Gross Conversion between the experiment and control groups is not statistically significant at the 5% significance level : diff ={np.round(diff,4)} is inside confidence interval [{[np.round(CI_lower_limit,4),np.round(CI_upper_limit,4)]}].\nTherefore, we fail to reject the null hypothesis and conclude that there is no evidence of a difference in Gross Conversion between the two groups.")
else:
    print(f"The observed difference in Gross Conversion between the experiment and control groups is statistically significant at the 5% significance level : diff ={np.round(diff,4)} is outside confidence interval [{[np.round(CI_lower_limit,4),np.round(CI_upper_limit,4)]}].\nTherefore, we reject the null hypothesis and conclude that there is evidence of a difference in Gross Conversion between the two groups.")

Gross Conversion Experiment : 0.19831981460023174
Gross Conversion Control : 0.2188746891805933
Pooled proportion :0.20860706740369866
Standard Error =0.004371675385225936
P_exp - P_ctrl = -0.020554874580361565
Marginal error = -0.008568326307143104
confidence interval = [np.float64(-0.008568326307143104), np.float64(0.008568326307143104)]

The observed difference in Gross Conversion between the experiment and control groups is statistically significant at the 5% significance level : diff =-0.0206 is outside confidence interval [[np.float64(-0.0086), np.float64(0.0086)]].
Therefore, we reject the null hypothesis and conclude that there is evidence of a difference in Gross Conversion between the two groups.


In [50]:
# checking if Net conversion is significantly different in experiment vs control
# Total Payments/Clicks

alpha=0.05

NC_ctrl=summary_df.loc[0,'Payments']/summary_df.loc[0,'Clicks']
NC_exp=summary_df.loc[1,'Payments']/summary_df.loc[1,'Clicks']

pooled_prop= (summary_df.loc[0,'Payments']+summary_df.loc[1,'Payments'])/(summary_df.loc[0,'Clicks']+summary_df.loc[1,'Clicks'])


print(f'Gross Conversion Experiment : {GC_exp}\nGross Conversion Control : {GC_ctrl}\nPooled proportion :{pooled_prop}')


SE=np.sqrt((pooled_prop*(1-pooled_prop))*((1/summary_df.loc[0,'Clicks'])+(1/summary_df.loc[1,'Clicks'])))

z=norm.ppf(alpha / 2)

CI_upper_limit = -z * SE
CI_lower_limit = z * SE

diff= NC_exp-NC_ctrl

print(f'Standard Error ={SE}\nP_exp - P_ctrl = {diff}\nMarginal error = {z * SE}\nconfidence interval = {[CI_lower_limit,CI_upper_limit]}\n')



if diff < CI_upper_limit and diff > CI_lower_limit:
    print(f"The observed difference in Net Conversion between the experiment and control groups is not statistically significant at the 5% significance level : diff ={np.round(diff,4)} is inside confidence interval [{[np.round(CI_lower_limit,4),np.round(CI_upper_limit,4)]}].\nTherefore, we fail to reject the null hypothesis and conclude that there is no evidence of a difference in Gross Conversion between the two groups.")
else:
    print(f"The observed difference in Net Conversion between the experiment and control groups is statistically significant at the 5% significance level : diff ={np.round(diff,4)} is outside confidence interval [{[np.round(CI_lower_limit,4),np.round(CI_upper_limit,4)]}].\nTherefore, we reject the null hypothesis and conclude that there is evidence of a difference in Gross Conversion between the two groups.")

Gross Conversion Experiment : 0.19831981460023174
Gross Conversion Control : 0.2188746891805933
Pooled proportion :0.1151274853124186
Standard Error =0.0034341335129324238
P_exp - P_ctrl = -0.0048737226745441675
Marginal error = -0.006730778003449568
confidence interval = [np.float64(-0.006730778003449568), np.float64(0.006730778003449568)]

The observed difference in Net Conversion between the experiment and control groups is not statistically significant at the 5% significance level : diff =-0.0049 is inside confidence interval [[np.float64(-0.0067), np.float64(0.0067)]].
Therefore, we fail to reject the null hypothesis and conclude that there is no evidence of a difference in Gross Conversion between the two groups.
