In [1]:
import numpy as np
import scipy.stats as stats

In [15]:
# Instantiate test variables
prob_control = 0.10
prob_treatment = 0.12

size_control = 1000
size_treatment = 1000

alpha = 0.05 # Allowance for type 1 error. P value significance threshold.

# Hypotheses

**Null:** Control and treatment samples are part of the same binomial distribution. Treatment prob - Control prob <= 0.

**Alt:** Treatment sample is part of a binomial distribution with mean higher than that of control. Treatment prob - Control prob > 0.

# Step 1: Derive Stat Power from Probs and Sample Sizes

## Step 1a: Simulation Approach

In [16]:
# Get variances of binomial variables based on treatment and control probs
# REMEMBER: variance of binomial variable is k(p)(1-p), where k is count of trials
# In a marketing experiment trial count per sample member is 1 (only one chance to click or not per member)
# In light of hypotheses, we need to consider both control and treatment as parts of the same distribution
# So, we combine their binomial distributions Control + Treatment
# Variances are likewise added Var(Treatment) + Var(Control)
variance_control = 1 * prob_control * (1 - prob_control)
variance_treatment = 1 * prob_treatment * (1 - prob_treatment)
variance_null = variance_control + variance_treatment

# Get standard error of null distribution
# Using control for both elements of calculation, because null hypothesis is that Treatment + Control is same as Control + Control
# Both treatment and control are parts of equivalent binomial distributions
sterror_null = np.sqrt((variance_control / size_control) + (variance_control / size_control))

In [19]:
# Get the null binomial distribution
null_dist = stats.norm(loc = 0, scale = sterror_null)
p_crit = null_dist.ppf(1 - alpha)
print("Critical Difference: {}".format(p_crit))

Critical Difference: 0.022068027137403433


In [20]:
# Get standard error of alt distribution (assumes treatment - prob is mean of its own separate binomial variable)
sterror_alt = np.sqrt((variance_control / size_control) + (variance_treatment / size_treatment))

In [21]:
# Get the alt binomial distribution
alt_dist = stats.norm(loc = prob_treatment - prob_control, scale = sterror_alt)
beta = alt_dist.cdf(p_crit) # cumulative distribution function. Proportion of values in distribution below given value.
stat_power = 1 - beta # inverts beta to get proportion of values in alt_dist above p_crit

print("Stat Power: {}".format(stat_power))

Stat Power: 0.44122379261151545


# Step 2: Derive Sample Size from Stat Power and Probs

## Step 2a: Analytic Approach

In [24]:
# Specify inputs
alpha = 0.05
power = 0.80

In [30]:
z_null = stats.norm.ppf(1-alpha)
z_alt = stats.norm.ppf(power)

stdev_null = np.sqrt((prob_control * (1 - prob_control)) + (prob_control * (1 - prob_control)))
stdev_alt = np.sqrt((prob_control * (1 - prob_control)) + (prob_treatment * (1 - prob_treatment)))

z_diff = z_null - z_alt
p_diff = treatment_prob - control_prob

n = int(np.ceil((z_diff / p_diff) ** 2))

print("Target Sig Level: {}".format(alpha))
print("Target Power: {}".format(power))
print("Target Group Size: {:,}".format(n))
print("Target Experiment Size: {:,}".format(2*n))

Target Sig Level: 0.05
Target Power: 0.8
Target Group Size: 1,613
Target Experiment Size: 3,226
