In [1]:
import numpy as np
import pandas as pd
import scipy.stats as sp

### Notes on computing runtime for silent badging experiment
- the experiment runtime can be determined using a [calculator](https://www.experimentcalculator.com/) as normal
- instead of computing browsers per day that the experiment has been running, I'd compute time to power using bucketing batches and compute browsers per batch
- these experiments are much lower coverage so it may make sense to power for larger lift sizes

### Computing current experiment power

Power function for proportion metrics. From [catapult stats library](https://github.com/etsy-dev/catapult-stats/blob/main/src/catapultstats/ztest.py)

In [2]:
def power_proportion_metric(
    mean_control: np.ndarray,
    sample_size_control: np.ndarray,
    sample_size_treatment: np.ndarray,
    percent_lift: np.ndarray,
    alpha: float = 0.05,
) -> np.ndarray:
    """Calculates the current power of the proportion metric (vectorized).

    Taken from Etsyweb:
    'Power calculation for two sample test for proportions taken directly
    from the R function `pwr.2p2n.test`
    https://www.rdocumentation.org/packages/pwr/versions/1.2-0/topics/pwr.2p2n.test'

    Args:
        mean_control: The mean of the control group.
        sample_size_control: The sample size of the control group.
        sample_size_treatment: The sample size of the treatment group.
        percent_lift: The expected percent lift in the metric (scale: [0, 100]).
        alpha: The significance threshold.

    Returns:
        A numpy array containing the power calculations for the proportion
        metrics provided — or NaN if these calculations can't be computed.
    """
    p1: np.ndarray = mean_control
    p2: np.ndarray = mean_control * (1 + percent_lift / 100)
    p2 = np.minimum(p2, 1)

    result: np.ndarray = np.full(len(mean_control), np.nan)
    row_idx: np.ndarray = (
        (p1 != 0) & (p2 != 0) & (sample_size_control + sample_size_treatment > 0)
    )

    # Non-directional h, magnitude of difference
    h: np.ndarray = np.abs(
        2 * np.arcsin(np.sqrt(np.maximum(p1[row_idx], 0)))
        - 2 * np.arcsin(np.sqrt(np.maximum(p2[row_idx], 0)))
    )

    temp: np.ndarray = np.maximum(
        (sample_size_control[row_idx] * sample_size_treatment[row_idx])
        / (sample_size_control[row_idx] + sample_size_treatment[row_idx]),
        0,
    )
    power: np.ndarray = (
        1
        - sp.norm.cdf(-sp.norm.ppf(alpha / 2) - h * np.sqrt(temp))
        + sp.norm.cdf(sp.norm.ppf(alpha / 2) - h * np.sqrt(temp))
    )
    result[row_idx] = power

    return result

Compute power for experiment as of 6/7 - power for 1 & 2% lifts

In [16]:
power_proportion_metric(
    mean_control = np.array([0.0813, 0.0813]),
    sample_size_control = np.array([1433375, 1433375]),
    sample_size_treatment = np.array([1432167, 1432167]),
    percent_lift = np.array([1, 2])
)

array([0.7095945 , 0.99886734])