In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(
    dict(
        user_id=np.random.randint(0, 1000, size=100000),
        n_imp=np.random.randint(1, 101, size=100000),
    )
)
df["n_click"] = df.n_imp.apply(lambda x: np.random.randint(0, int(np.ceil((x+1)*.3))))
df

Unnamed: 0,user_id,n_imp,n_click
0,484,96,14
1,482,72,4
2,244,18,1
3,420,53,13
4,283,96,14
...,...,...,...
99995,505,83,21
99996,744,84,4
99997,938,46,0
99998,350,35,6


In [3]:
from t_ab.frequentist.aa import AATest
from t_ab.frequentist.ctr import ImpressionBasedCTRTtest, UserBasedCTRTtest

ibtest = ImpressionBasedCTRTtest("user_id", "n_imp", "n_click")
ubtest = UserBasedCTRTtest("user_id", "n_imp", "n_click")
aa_test = AATest(ibtest, ubtest)

In [4]:
from typing import Generator
from sklearn.model_selection import train_test_split


def split_data(values: np.ndarray, n: int, random_state: int = 42) -> Generator[np.ndarray, None, None]:
    for i in range(n-1):
        vs, values = train_test_split(values, train_size=1/(n-i), random_state=random_state)
        yield vs
    yield values


def dfs_loader(df: pd.DataFrame, col: str, n_groups: int, n_tests: int=1000) -> Generator[list[pd.DataFrame], None, None]:
    values = df[col].unique()
    for i in range(n_tests):
        yield [df[df[col].isin(vs)] for vs in split_data(values, n_groups, i)]

In [5]:
aa_test_results = aa_test(dfs_loader(df, "user_id", 3, 1000))

In [6]:
for test_name, aa_test_result in aa_test_results.items():
    print(test_name)
    print(aa_test_result.multipletests_result, aa_test_result.is_rejected)

n_click / n_imp (Impression-Based)
(array([False, False, False]), array([0.12793981, 0.4152471 , 0.78213848]), 0.016952427508441503, 0.016666666666666666) False
n_click / n_imp (User-Based)
(array([False, False, False]), array([0.05001447, 0.31765793, 0.97795874]), 0.016952427508441503, 0.016666666666666666) False


In [7]:
from t_ab.frequentist.ab import ABTest

ab_test = ABTest(ibtest, ubtest)

In [8]:
for dfs in dfs_loader(df, "user_id", 3):
    ab_test_results = ab_test(dfs)
    break

In [9]:
from pprint import pprint

for test_name, ab_test_result in ab_test_results.items():
    print(test_name)
    pprint(ab_test_result)

n_click / n_imp (Impression-Based)
{'(0, 1)': CTRTestResult(statistics=(Statistics(mean=0.147442380935773, std=0.009526629997433504, nobs=333), Statistics(mean=0.14818872229621036, std=0.010301748759417315, nobs=333)), ttest_result=TtestResult(statistic=-0.9706347622043752, pvalue=0.33208362020327375, is_rejected=False)),
 '(0, 2)': CTRTestResult(statistics=(Statistics(mean=0.147442380935773, std=0.009526629997433504, nobs=333), Statistics(mean=0.1480690739700291, std=0.01042049436500961, nobs=334)), ttest_result=TtestResult(statistic=-0.8105357648427263, pvalue=0.4179222842030986, is_rejected=False)),
 '(1, 2)': CTRTestResult(statistics=(Statistics(mean=0.14818872229621036, std=0.010301748759417315, nobs=333), Statistics(mean=0.1480690739700291, std=0.01042049436500961, nobs=334)), ttest_result=TtestResult(statistic=0.14911502958728748, pvalue=0.8815080750032513, is_rejected=False))}
n_click / n_imp (User-Based)
{'(0, 1)': CTRTestResult(statistics=(Statistics(mean=0.14737134138917832,