# Experiment based on user conversions

## Setup

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path('../src').resolve()))

In [None]:
import logging

import pandas as pd

from btech_experiment import (
    HistoricBasedSampleParams,
    eval_strats_weights,
    get_daily_users,
    get_period,
    get_user_sessions,
    show_diagnostics,
    plot_error_rates,
    load_credentials,
    HistoricalUsersConversionsSampler,
    UsersConversionsBootstrap,
)
from duration_estimator import (
    Effect,
    ExperimentDurationEstimator,
    save_experiment_result,
    load_experiment_result,
    
)

In [None]:
logging.basicConfig(level=logging.INFO)

VERBOSE = True
DATA_PATH = Path('../data')
PATH_TO_CREDENTIALS = str((DATA_PATH / 'credentials.json').resolve())
EXPERIMENTS_DIR = DATA_PATH / 'experiments'

## Pick historical period

In [None]:
period = get_period(
    last_available_period_date='2022-10-01',
    n_month_from_last_date=1,
)
print(f'{period = }')

Skip the cell below in case data for the period you are loading haven't changed

## Load data

In [None]:
# load data
credentials = load_credentials(PATH_TO_CREDENTIALS)
df_daily_users = get_daily_users(*period, credentials)
df_user_sessions = get_user_sessions(*period, credentials)

# save for future
df_daily_users.to_pickle(DATA_PATH / 'df_daily_users.pkl')
df_user_sessions.to_pickle(DATA_PATH / 'df_user_sessions.pkl')

## Load pre-loaded data

In [None]:
df_daily_users = pd.read_pickle(DATA_PATH / 'df_daily_users_latest.pkl')
df_user_sessions = pd.read_pickle(DATA_PATH / 'df_user_sessions_latest.pkl')

## Experiment setup

In [None]:
expected_effect = Effect(0.05, is_additive=False)
sample_params = HistoricBasedSampleParams(
    share_of_all_users=0.1,
    share_of_sample_for_pilot=0.9,
)
max_days = 30
print(f'{sample_params = }')

## Experiment diagnostics

In [None]:
show_diagnostics(df_daily_users, sample_params)

## Estimation

In [None]:
# components set up
sample_generator = HistoricalUsersConversionsSampler(
    df_daily_users=df_daily_users,
    df_user_sessions=df_user_sessions,
)
experiment_conductor = UsersConversionsBootstrap(
    strats_weights=eval_strats_weights(df_daily_users)
)

# duration estimator
duration_estimator = ExperimentDurationEstimator(
    effect=expected_effect,
    sample_generator=sample_generator,
    experiment_conductor=experiment_conductor,
    sample_params=sample_params,
    max_days=max_days,
)

In [None]:
# fit duration estimator (that will take some time)
duration_estimator.fit(verbose=VERBOSE)

In [None]:
# save results
experiment_name = "test_1"

save_experiment_result(
    experiment_name,
    duration_estimator,
    experiments_dir=EXPERIMENTS_DIR,
)

## View experiment results

In [None]:
error_rates = load_experiment_result(
    experiment_name=experiment_name,
    experiments_dir=EXPERIMENTS_DIR
)

error_rates

In [None]:
plot_error_rates(error_rates)