In [12]:
import math
from scipy.stats import norm

def calculate_mde_one_sided(weekly_traffic,
                            weekly_conversions,
                            confidence_level,
                            power,
                            num_variants,
                            weeks=[1,2,3,4,5,6]):
    
    # Convert confidence level and power to Z-scores for one-sided test
    z_alpha = norm.ppf(confidence_level)  # One-sided, so no division by 2
    z_beta = norm.ppf(power) 

    baseline_cr = weekly_conversions / weekly_traffic  # Baseline conversion rate
    
    mde_list = []
    
    for w in weeks:
        # Number of users per variant after w weeks
        total_users_per_variant = (weekly_traffic / num_variants) * w
        
        # Calculate MDE for w weeks (absolute)
        mde = z_alpha * math.sqrt((2 * baseline_cr * (1 - baseline_cr)) / total_users_per_variant) + \
              z_beta * math.sqrt((2 * baseline_cr * (1 - baseline_cr)) / total_users_per_variant)
        
        # Calculate relative MDE as a percentage of the baseline conversion rate
        relative_mde = (mde / baseline_cr) * 100
        
        # Append week, absolute MDE, relative MDE, and visitors per variant
        mde_list.append((w, mde, relative_mde, total_users_per_variant))
    
    return mde_list

# Example usage:
weekly_traffic = 10000  # Weekly traffic (sessions or users)
weekly_conversions =300  # Weekly conversions
num_variants = 2  # Number of variants (including control)
baseline_cr = 0.05  # 60% baseline conversion rate
confidence_level = 0.95  # One-sided confidence level (95%)
power = 0.80  # Statistical power (80%)

mde_results = calculate_mde_one_sided(weekly_traffic, weekly_conversions, confidence_level, power, num_variants)

# Print results
print(f"{'Week':<6}{'MDE (%)':<10}{'Relative MDE (%)':<20}{'Visitors per Variant':<20}")
for week, mde, relative_mde, visitors_per_variant in mde_results:
    print(f"{week:<6}{mde:.2%}     {relative_mde:.2f}%              {int(visitors_per_variant):<20}")

Week  MDE (%)   Relative MDE (%)    Visitors per Variant
1     0.85%     28.28%              5000                
2     0.60%     20.00%              10000               
3     0.49%     16.33%              15000               
4     0.42%     14.14%              20000               
5     0.38%     12.65%              25000               
6     0.35%     11.54%              30000               


In [9]:
import numpy as np
def calculate_sample_size_corrected(delta, var_treatment, var_control, alpha=0.05, beta=0.2):
    """
    Calculate the required sample size for a given clinically meaningful difference (delta).
    
    Parameters:
    delta (float): Clinically meaningful difference (effect size)
    var_treatment (float): Variance of the treatment group
    var_control (float): Variance of the control group
    alpha (float): Significance level (default is 0.05)
    beta (float): Type II error rate (default is 0.2, corresponding to 80% power)
    
    Returns:
    float: Required sample size
    """
    # Z-scores for the given alpha and beta
    z_alpha = norm.ppf(1 - alpha / 2)
    z_beta = norm.ppf(1 - beta)
    
    # Calculate the sample size per group
    n = ((z_alpha + z_beta) ** 2 * (var_treatment + var_control)) / delta ** 2
    
    return n


def calculate_clinically_meaningful_difference_corrected(n, var_treatment, var_control, alpha=0.05, beta=0.2):
    """
    Calculate the smallest clinically meaningful difference for a given sample size.
    
    Parameters:
    n (float): Sample size per group
    var_treatment (float): Variance of the treatment group
    var_control (float): Variance of the control group
    alpha (float): Significance level (default is 0.05)
    beta (float): Type II error rate (default is 0.2, corresponding to 80% power)
    
    Returns:
    float: Smallest clinically meaningful difference
    """
    # Z-scores for the given alpha and beta
    z_alpha = norm.ppf(1 - alpha / 2)
    z_beta = norm.ppf(1 - beta)
    
    # Calculate the clinically meaningful difference (delta)
    delta = (z_alpha + z_beta) * np.sqrt((var_treatment + var_control) / n)
    
    return delta


# Example test values
var_treatment = 0.05
var_control = 0.05
alpha = 0.05
beta = 0.2
delta = 0.01  # Example clinically meaningful difference

# Calculate sample size using the first equation
n = calculate_sample_size_corrected(delta, var_treatment, var_control, alpha, beta)

# Use the calculated sample size to compute the clinically meaningful difference
calculated_delta = calculate_clinically_meaningful_difference_corrected(n, var_treatment, var_control, alpha, beta)

# compare the calculated delta with the original delta
print(f"Original delta: {delta}")
print(f"Calculated delta: {calculated_delta}")
print(f"Sample size: {n}")


Original delta: 0.01
Calculated delta: 0.01
Sample size: 7848.879734349089


In [37]:
from scipy.stats import norm

def calculate_required_sample_size(conv_rate, relative_mde, confidence_level, power):
    """
    Calculate the required sample size for a given conversion rate, minimum detectable effect (MDE),
    confidence level, and statistical power (one-sided test).
    
    Parameters:
    conv_rate (float): Conversion rate in percentage (e.g., 5 for 5%)
    relative_mde (float): Minimum detectable effect as a relative difference (e.g., 50 for 50%)
    confidence_level (float): Confidence level (e.g., 0.95 for 95%)
    power (float): Statistical power (e.g., 0.8 for 80%)
    
    Returns:
    float: Required sample size per group
    """
    # Convert percentages to proportions
    conv_rate = conv_rate / 100
    # Calculate absolute MDE as a proportion based on the relative MDE
    mde = conv_rate * (relative_mde / 100)
    
    # Z-scores for the given confidence level and power (one-sided test)
    z_alpha = norm.ppf(confidence_level)  # For one-sided test
    z_beta = norm.ppf(power)
    
    # Calculate the pooled variance for the sample size formula
    pooled_var = 2 * conv_rate * (1 - conv_rate)
    
    # Calculate the required sample size per group
    n = ((z_alpha + z_beta) ** 2 * pooled_var) / mde ** 2
    
    # Return the sample size with a small adjustment factor (e.g., 1.02 for potential loss)
    return int(n * 1.06)

# Example usage:
conv_rate = 10  # 5%
relative_mde = 10  # 10% relative difference
confidence_level = 0.95  # 95% confidence level (one-sided)
power = 0.8  # 80% power

required_sample_size = calculate_required_sample_size(conv_rate, relative_mde, confidence_level, power)
print(f"Required sample size per group: {required_sample_size}")

Required sample size per group: 11796
