# Experiment Design

This example notebook will show you how to prepare for experiment, choose and evaluate the test parameters, e.g. type I/II error, sample size.

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=[FutureWarning, DeprecationWarning])

import sys
import logging
import numpy as np
import pandas as pd

from abacus.auto_ab.abtest import ABTest, ABTestParams
from abacus.auto_ab.params import DataParams, HypothesisParams
from abacus.splitter.params import SplitBuilderParams
from abacus.mde_researcher.params import MdeParams
from abacus.mde_researcher.mde_research_builder import MdeResearchBuilder
from abacus.mde_researcher.multiple_split_builder import MultipleSplitBuilder

logging.basicConfig(level = logging.INFO)

%load_ext autoreload
%autoreload 2

In [None]:
df = pd.read_csv('./data/ab_data_height.csv')
df.head()

Prepare parameters for experiment design:

In [None]:
data_params = DataParams(
    id_col='id', 
    group_col='groups',
    control_name='control',
    treatment_name='target',
    target='height_now', 
    covariate='height_prev',
)

hypothesis_params = HypothesisParams(
    alpha=0.05, 
    beta=0.2, 
    alternative='greater', 
    metric_type='continuous', 
    metric_name='mean', 
    metric=np.mean, 
    n_boot_samples=200, 
    n_buckets=50
)

ab_params = ABTestParams(data_params, hypothesis_params)

Parameters for splitter:

In [None]:
split_builder_params = SplitBuilderParams(
    map_group_names_to_sizes={
        'control': None,
        'target': None
    },
    main_strata_col = "moda_city",
    split_metric_col = "height_now",
    id_col = "id",
    cols = ["height_prev"],
    cat_cols=["country"],
    alpha=0.05,
    n_bins = 6,
    min_cluster_size = 500
)

## Experiment design for continuous metric

Parameters for experiment design:

In [None]:
prepilot_params = MdeParams(
    metrics_names=['height_now'],
    injects=[1.0001, 1.0002, 1.0003],
    min_group_size=35000, 
    max_group_size=50000, 
    step=5000,
    variance_reduction=None,
    use_buckets=False,
    stat_test=ABTest.test_welch,
    iterations_number=10,
    max_beta_score=0.9,
    min_beta_score=0.02,
)

In [None]:
prepilot = MdeResearchBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

Simulate experiment:

In [None]:
beta, alpha = prepilot.collect()

In [None]:
beta

In [None]:
alpha

## Experiment design for continuous metric with variance reduction

Parameters for experiment design:

In [None]:
prepilot_params = MdeParams(
    metrics_names=['height_now'],
    injects=[1.0001, 1.0002, 1.0003],
    min_group_size=35000, 
    max_group_size=45000, 
    step=5000,
    variance_reduction=ABTest.cuped,     # variance reduction approach
    use_buckets=False,
    stat_test=ABTest.test_welch,
    iterations_number=50,
    max_beta_score=0.9,
    min_beta_score=0.02,
)

In [None]:
prepilot = MdeResearchBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

Simulate experiment:

In [None]:
beta,alpha = prepilot.collect()

In [None]:
beta

In [None]:
alpha

## Experiment design for binary (conversion) metric

In [None]:
df = pd.read_csv('./data/ab_data_height.csv')
df.head()

In [None]:
data_params = DataParams(
    id_col='id', 
    group_col='groups', 
    target_flg='conversion', 
)

hypothesis_params = HypothesisParams(
    alpha=0.05, 
    beta=0.2, 
    alternative='greater', 
    metric_type='binary', 
    metric_name='mean', 
    metric=np.mean, 
)

ab_params = ABTestParams(data_params, hypothesis_params)

Parameters for splitter:

In [None]:
split_builder_params = SplitBuilderParams(
    map_group_names_to_sizes={
        'control': None,
        'target': None
    },
    main_strata_col="moda_city",
    split_metric_col="height_now",
    id_col="id",
    cols=["height_prev"],
    cat_cols=["country"],
    alpha=0.05,
    n_bins=6,
    min_cluster_size=500
)

Parameters for experiment design:

In [None]:
prepilot_params = MdeParams(
    metrics_names=['conversion'],
    injects=[1.001,1.002,1.0022],
    min_group_size=35000, 
    max_group_size=50000, 
    step=5000,
    variance_reduction=None,
    use_buckets=False,
    stat_test=ABTest.test_z_proportions,
    iterations_number=10,
    max_beta_score=0.9,
    min_beta_score=0.01,
)

ab_params = ABTestParams(data_params, hypothesis_params)

In [None]:
prepilot = MdeResearchBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

Simulate experiment:

In [None]:
beta,alpha = prepilot.collect()

In [None]:
beta

In [None]:
alpha