Original source code and data found here: https://github.com/FlorentBuissonOReilly/BehavioralDataAnalysis

In [1]:
import pandas as pd
hist_data_df = pd.read_csv('data/chap8-historical_data.csv')
exp_data_df = pd.read_csv('data/chap8-experimental_data.csv')

In [2]:
effect = 0.01
historical_conversion_rate = 0.184
new_conversion_rate = historical_conversion_rate + effect
new_conversion_rate

0.194

In [3]:
# 1% effect is a 5.4% increase
(new_conversion_rate - historical_conversion_rate) / historical_conversion_rate

0.05434782608695657

In [4]:
import statsmodels.stats.proportion as ssprop # To calculate the standardized effect size
effect_size = ssprop.proportion_effectsize(new_conversion_rate, historical_conversion_rate)
effect_size

0.02554423106645265

I'm not sure exactly what effect_size is doing, despite knowing the algorithm

`2 * (arcsin(sqrt(prop1)) - arcsin(sqrt(prop2)))`

In [9]:
# calculate sample size to detect 1%
# kindle location 5430
# "As long as our treatment doesn't increase our booking rate,
# we don't really care whether it has the same booking rate or a lower booking rate compared to our control;
# either way we won't implement it. This implies that we can run a one-sided test instead of a two-sided test ..."
import statsmodels.stats.power as ssp # To calculate the standard power
from math import ceil

per_group_size = ssp.tt_ind_solve_power(
    effect_size = effect_size, 
    alpha = 0.05, 
    nobs1 = None, 
    alternative = 'larger', 
    power=0.8
)
print(per_group_size)
print(ceil(per_group_size) * 2)

18950.818821440742
37902


In [18]:
def sample_size(
        conversion_rate: float,
        effect: float=0.01,
        alpha: float=0.05,
        power: float = 0.8) -> int:
    """
    Args:
        conversion_rate:
            the original/historical conversion rate of interest
        effect:
            the assumed effect size in percentage points (not percent change)
            e.g. 0.01 is 1 percentage point e.g. going from conversion rate of 10% to 11%
        alpha:
            statistical significance i.e. false positive rate
            
            How often would we declare success when there is exactly no impact? A value of `0.05`
            means we would implement the treatment %5 of the time (when there is exactly no
            impact). When there is actually an underlying negative affect, it would be less than 5%
            of the time.
            - See table in Behavior Data Analysis loc. 5367
        power:
            If there is actually positive effect of the treatment by exactly `effect` percentage
            points, how often would we detect the positive effect and implement the treatment?
            A value of `0.8` means we would detect the change and implement the treatment 80% of
            the time when there is exactly exactly an `effect` percentage points increase.
    """
    import statsmodels.stats.proportion as ssprop # To calculate the standardized effect size
    import statsmodels.stats.power as ssp # To calculate the standard power
    from math import ceil

    effect_size = ssprop.proportion_effectsize(conversion_rate + effect, conversion_rate)
    per_group_size = ssp.tt_ind_solve_power(
        effect_size=effect_size, 
        alpha=alpha, 
        nobs1=None, 
        alternative='larger', 
        power=power
    )
    
    return ceil(per_group_size) * 2

In [46]:
print(sample_size(conversion_rate=0.184, effect=0.01))


37902


In [47]:
import numpy as np
import plotly_express as px
from itertools import product

combos = pd.DataFrame(
    product(np.arange(0.8, 0.96, 0.01), [0.1, 0.05, 0.01]),
    columns=['power', 'alpha']
)
combos['Sample Size'] = combos.apply(
    lambda x: sample_size(conversion_rate=0.184, effect=0.01, alpha=x['alpha'], power=x['power']),
    axis=1
)
fig = px.line(
    data_frame=combos,
    x='power',
    y='Sample Size',
    markers=True,
    color='alpha',
    title="Sample Size Required for different values of `power` and `alpha`"
)
fig.update_layout(yaxis={'range':[0, max(combos['Sample Size']) * 1.10]})