In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(
    dict(
        user_id=np.random.randint(0, 1000, size=100000),
        n_imp=np.random.randint(0, 1001, size=100000),
    )
)
df["n_click"] = df.n_imp.apply(lambda x: np.random.randint(0, x+1))
df

Unnamed: 0,user_id,n_imp,n_click
0,53,827,195
1,554,526,440
2,973,925,162
3,884,119,59
4,636,768,398
...,...,...,...
99995,577,927,290
99996,483,951,464
99997,161,974,777
99998,17,26,4


In [3]:
from sklearn.model_selection import train_test_split

user_ids = df.user_id.unique()

def data_loader(i: int) -> list[pd.DataFrame]:
    return [
        df[df.user_id.isin(uids)]
        for uids in train_test_split(user_ids, test_size=.5, random_state=i)
    ]

In [4]:
from t_ab.ctr import ImpressionBasedCTRTtest, UserBasedCTRTtest


def ibtest(dfs: list[pd.DataFrame]) -> float:
    _ibtest = ImpressionBasedCTRTtest("user_id", "n_imp")
    return _ibtest(*dfs, "n_click")[1].pvalue


def ubtest(dfs: list[pd.DataFrame]) -> float:
    _ubtest = UserBasedCTRTtest("user_id", "n_imp")
    return _ubtest(*dfs, "n_click")[1].pvalue

In [5]:
from t_ab.aa.aa import AATest

In [6]:
results = AATest()(data_loader, ibtest, ubtest)

In [7]:
for result in results:
    print(result.pvalue, result.is_rejected)

0.7394257714891308 False
0.8429637602259783 False
