In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(
    dict(
        user_id=np.random.randint(0, 1000, size=100000),
        n_imp=np.random.randint(1, 101, size=100000),
    )
)
df["n_click"] = df.n_imp.apply(lambda x: np.random.randint(0, int(np.ceil((x+1)*.3))))
df

Unnamed: 0,user_id,n_imp,n_click
0,257,62,0
1,795,92,11
2,349,50,10
3,752,90,7
4,16,61,14
...,...,...,...
99995,672,57,1
99996,780,53,4
99997,655,83,17
99998,974,28,2


In [3]:
from t_ab.aa import AATest
from t_ab.ctr import ImpressionBasedCTRTtest, UserBasedCTRTtest

ibtest = ImpressionBasedCTRTtest("user_id", "n_imp", "n_click")
ubtest = UserBasedCTRTtest("user_id", "n_imp", "n_click")
aa_test = AATest(ibtest, ubtest)

In [4]:
from typing import Generator
from sklearn.model_selection import train_test_split

user_ids = df.user_id.unique()
n = 3


def split_data(user_ids: np.ndarray, n: int, random_state: int = 42) -> Generator[np.ndarray, None, None]:
    for i in range(n-1):
        uids, user_ids = train_test_split(user_ids, train_size=1/(n-i), random_state=random_state)
        yield uids
    yield user_ids


def dfs_loader() -> Generator[list[pd.DataFrame], None, None]:
    for i in range(1000):
        yield [
            df[df.user_id.isin(uids)]
            for uids in split_data(user_ids, n, i)
        ]

In [5]:
results = aa_test(dfs_loader())

In [6]:
for result in results:
    print(result.multipletests_result, result.is_rejected)

(array([False, False, False]), array([0.75140196, 0.72721807, 0.78211625]), 0.016952427508441503, 0.016666666666666666) False
(array([False, False, False]), array([0.94595489, 0.94595489, 0.94595489]), 0.016952427508441503, 0.016666666666666666) False
