In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(
    dict(
        user_id=np.random.randint(0, 1000, size=100000),
        n_imp=np.random.randint(0, 1001, size=100000),
    )
)
df["n_click"] = df.n_imp.apply(lambda x: np.random.randint(0, x+1))
df

Unnamed: 0,user_id,n_imp,n_click
0,636,32,2
1,161,143,3
2,631,625,95
3,741,554,415
4,713,801,396
...,...,...,...
99995,458,619,20
99996,407,414,409
99997,324,21,8
99998,247,582,178


In [3]:
from sklearn.model_selection import train_test_split

user_ids = df.user_id.unique()

def data_loader(i: int) -> list[pd.DataFrame]:
    return [
        df[df.user_id.isin(uids)]
        for uids in train_test_split(user_ids, test_size=.5, random_state=i)
    ]

In [4]:
from t_ab.ctr import ImpressionBasedCTRTtest, UserBasedCTRTtest

_ibtest = ImpressionBasedCTRTtest("user_id", "n_imp")
_ubtest = UserBasedCTRTtest("user_id", "n_imp")


def ibtest(dfs: list[pd.DataFrame]) -> float:
    return _ibtest(*dfs, "n_click")[1].pvalue


def ubtest(dfs: list[pd.DataFrame]) -> float:
    return _ubtest(*dfs, "n_click")[1].pvalue

In [5]:
from t_ab.aa import AATest

In [6]:
results = AATest()(data_loader, ibtest, ubtest)

In [7]:
for result in results:
    print(result.pvalue, result.is_rejected)

0.8509996368670685 False
0.507305117038115 False
