<a href="https://colab.research.google.com/github/sindla97/AB_testing/blob/main/Udacity_AB_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
from scipy import stats

In [3]:
# Example baseline values
baseline = {
    'pageviews': 40000,          # Unique cookies/day
    'clicks': 3200,              # Clicks on "Start free trial"
    'enrollments': 660,          # Daily enrollments
    'Retention': 0.53,           # Payments percentage
    'CTP': 0.08,                 # 3200/40000
    'gross_conv': 0.20625,       # 660/3200
    'net_conv': 0.1093125        # (660*0.53)/3200
}

In [6]:
# Standard error at N=1

for k, v in baseline.items():
  if k in ['Retention','CTP','gross_conv','net_conv']:
    s=np.sqrt(v * (1-v) )
    print( f'SE of {k} is {s} \n')




SE of CTP is 0.2712931993250107 

SE of gross_conv is 0.4046120827409878 

SE of net_conv is 0.3120308916497692 



In [7]:
# calculate the size of experiment
from scipy.stats import norm


def get_z_star(alpha):
   """Returns the critical z-value for a two-tailed test."""
   return -norm.ppf(alpha / 2)

def get_beta(z_star, s, d_min, N):
    """Calculates the Type II error rate (beta) for a given sample size."""
    SE = s / np.sqrt(N)
    return norm.cdf(z_star * SE, loc=d_min, scale=SE)

def required_size(s, d_min, Ns=range(1, 100000), alpha=0.05, beta=0.2):
    """Finds the smallest N where beta <= desired threshold."""
    z_star = get_z_star(alpha)
    for N in Ns:
        if get_beta(z_star, s, d_min, N) <= beta:
            return N
    return -1




In [16]:
alpha =0.05
beta =0.02
d_min_gross=0.01
d_min_net=0.0075

p_gross=baseline['gross_conv']
p_net=baseline['net_conv']


s_gross = np.sqrt(p_gross * (1 - p_gross) * 2)  # Pooled SE for two groups
s_net = np.sqrt(p_net * (1 - p_net) * 2)

# Calculate required clicks per group
N_clicks_gross = required_size(s=s_gross, d_min=d_min_gross, alpha=alpha, beta=beta)
N_clicks_net = required_size(s=s_net, d_min=d_min_net, alpha=alpha, beta=beta)

# Convert clicks to pageviews
N_pageviews_gross = int(np.ceil(N_clicks_gross / baseline['CTP']))
N_pageviews_net = int(np.ceil(N_clicks_net / baseline['CTP']))

print(f'Total No of clicks(control & experiment) needed for Gross_conv are {N_clicks_gross}, pageviews needed are {N_pageviews_gross}')


print(f'Total No of clicks(control & experiment) needed for Net_conv  = {N_clicks_net}, pageviews needed are = {N_pageviews_net}')

Total No of clicks(control & experiment) needed for Gross_conv are 52748, pageviews needed are 659350
Total No of clicks(control & experiment) needed for Net_conv  = 55770, pageviews needed are = 697125


In [19]:
df_control = pd.read_excel('/content/Final Project Results.xlsx', sheet_name='Control')
df_experiment = pd.read_excel('/content/Final Project Results.xlsx', sheet_name='Experiment')

In [29]:
print(f'''Control \n Pageviews = {df_control['Pageviews'].sum()} \n Clicks = {df_control['Clicks'].sum()} \n Enrollments = {df_control['Enrollments'].sum()} \n Payments = {df_control['Payments'].sum()}''')

print(f'''Experiment \n Pageviews = {df_experiment['Pageviews'].sum()} \n Clicks = {df_experiment['Clicks'].sum()} \n Enrollments = {df_experiment['Enrollments'].sum()} \n Payments = {df_experiment['Payments'].sum()}''')

Control 
 Pageviews = 345543 
 Clicks = 28378 
 Enrollments = 3785.0 
 Payments = 2033.0
Experiment 
 Pageviews = 344660 
 Clicks = 28325 
 Enrollments = 3423.0 
 Payments = 1945.0


In [21]:
df_control[~df_control['Enrollments'].isnull()]['Pageviews'].sum()

np.int64(212163)

In [None]:
from statsmodels.stats.proportion import proportions_ztest

count = 230_500  # Control cookies
nobs = 464_000   # Total cookies
zstat, pval = proportions_ztest(count, nobs, value=0.5, alternative='two-sided')

print(f"z-score: {zstat:.3f}, p-value: {pval:.4f}")