In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(
    dict(
        user_id=np.random.randint(0, 1000, size=100000),
        n_imp=np.random.randint(1, 1001, size=100000),
    )
)
df["n_click"] = df.n_imp.apply(lambda x: np.random.randint(0, x+1))
df

Unnamed: 0,user_id,n_imp,n_click
0,887,165,69
1,244,932,506
2,3,828,296
3,103,509,442
4,528,917,10
...,...,...,...
99995,145,843,333
99996,713,991,345
99997,564,288,244
99998,701,389,65


In [3]:
from sklearn.model_selection import train_test_split

user_ids = df.user_id.unique()

def data_loader(i: int) -> list[pd.DataFrame]:
    return [
        df[df.user_id.isin(uids)]
        for uids in train_test_split(user_ids, test_size=.5, random_state=i)
    ]

In [4]:
from t_ab.ctr import ImpressionBasedCTRTtest, UserBasedCTRTtest

_ibtest = ImpressionBasedCTRTtest("user_id", "n_imp")
_ubtest = UserBasedCTRTtest("user_id", "n_imp")


def ibtest(dfs: list[pd.DataFrame]) -> float:
    return _ibtest(*dfs, "n_click")[1].pvalue


def ubtest(dfs: list[pd.DataFrame]) -> float:
    return _ubtest(*dfs, "n_click")[1].pvalue

In [5]:
from t_ab.aa import AATest

In [6]:
aa_test = AATest(data_loader)
results = aa_test(ibtest, ubtest)

In [7]:
for result in results:
    print(result.pvalue, result.is_rejected)

0.7151342299091334 False
0.6606357823280484 False
