In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(
    dict(
        user_id=np.random.randint(0, 1000, size=100000),
        n_imp=np.random.randint(1, 101, size=100000),
    )
)
df["n_click"] = df.n_imp.apply(lambda x: np.random.randint(0, int(np.ceil((x+1)*.3))))
df

Unnamed: 0,user_id,n_imp,n_click
0,347,95,4
1,552,31,6
2,924,94,19
3,567,35,9
4,411,21,0
...,...,...,...
99995,429,28,4
99996,857,37,5
99997,870,21,1
99998,807,81,11


In [3]:
from t_ab.aa import AATest
from t_ab.ctr import ImpressionBasedCTRTtest, UserBasedCTRTtest

ibtest = ImpressionBasedCTRTtest("user_id", "n_imp", "n_click")
ubtest = UserBasedCTRTtest("user_id", "n_imp", "n_click")
aa_test = AATest(ibtest, ubtest)

In [4]:
from typing import Generator
from sklearn.model_selection import train_test_split

user_ids = df.user_id.unique()
n = 3


def split_data(user_ids: np.ndarray, n: int, random_state: int = 42) -> Generator[np.ndarray, None, None]:
    for i in range(n-1):
        uids, user_ids = train_test_split(user_ids, train_size=1/(n-i), random_state=random_state)
        yield uids
    yield user_ids


def dfs_loader() -> Generator[list[pd.DataFrame], None, None]:
    for i in range(1000):
        yield [
            df[df.user_id.isin(uids)]
            for uids in split_data(user_ids, n, i)
        ]

In [5]:
results = aa_test(dfs_loader())

[[0.5922752139445159, 0.8496689160969753, 0.05858805070365176, 0.12841395025866514, 0.4814498397117607, 0.9628570323585848, 0.4133690876027023, 0.1807787785472084, 0.526089711611315, 0.08654291702063746, 0.5793827723216833, 0.017270254340485073, 0.10905380277723603, 0.24905964424586569, 0.5617395639775018, 0.8662079313021879, 0.6795205126842985, 0.48963079581287594, 0.4522512969307243, 0.58620925835727, 0.8465726563712882, 0.3662798780693558, 0.4605478702717408, 0.7521891751668792, 0.9980534644366494, 0.364073305231537, 0.8244812390670694, 0.9454428940870505, 0.36880504727379926, 0.15549742824216758, 0.08728410684067611, 0.37765251570424097, 0.3761228743492595, 0.7605887461778159, 0.3437625592309187, 0.11639712161288118, 0.2539497452950072, 0.4714156547525341, 0.957470339140734, 0.5701676751765808, 0.6318855207939618, 0.8368043990602927, 0.012257128527953109, 0.6131525596001262, 0.8286306052334037, 0.15012328544813003, 0.5666872495333986, 0.13210305832745578, 0.8568733002918397, 0.8146

In [6]:
for result in results:
    print(result.multipletests_result, result.is_rejected)

(array([False, False, False]), array([0.77784794, 0.91335285, 0.95126304]), 0.016952427508441503, 0.016666666666666666) False
(array([False, False, False]), array([0.88501944, 0.98208132, 0.98208132]), 0.016952427508441503, 0.016666666666666666) False
