In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(
    dict(
        user_id=np.random.randint(0, 1000, size=100000),
        n_imp=np.random.randint(1, 101, size=100000),
    )
)
df["n_click"] = df.n_imp.apply(lambda x: np.random.randint(0, int(np.ceil((x+1)*.3))))
df

Unnamed: 0,user_id,n_imp,n_click
0,171,27,8
1,295,98,6
2,83,18,5
3,783,5,1
4,715,64,5
...,...,...,...
99995,544,2,0
99996,224,1,0
99997,927,42,1
99998,22,92,8


In [3]:
from sklearn.model_selection import train_test_split

user_ids = df.user_id.unique()

def data_loader(i: int) -> list[pd.DataFrame]:
    return [
        df[df.user_id.isin(uids)]
        for uids in train_test_split(user_ids, test_size=.5, random_state=i)
    ]

In [4]:
from itertools import combinations

from t_ab.ctr import ImpressionBasedCTRTtest, UserBasedCTRTtest

_ibtest = ImpressionBasedCTRTtest("user_id", "n_imp")
_ubtest = UserBasedCTRTtest("user_id", "n_imp")


def ibtest(dfs: list[pd.DataFrame]) -> float:
    return [
        _ibtest(*dfs, "n_click")[1].pvalue
        for dfa, dfb in combinations(dfs, 2)
    ]


def ubtest(dfs: list[pd.DataFrame]) -> float:
    return [
        _ubtest(*dfs, "n_click")[1].pvalue
        for dfa, dfb in combinations(dfs, 2)
    ]

In [5]:
from t_ab.aa import AATest

In [6]:
aa_test = AATest(data_loader)

In [7]:
results = aa_test(ibtest, ubtest)

In [8]:
for result in results:
    print(result.pvalues, result.is_rejected)

[0.9387365437289252] False
[0.9941206734983588] False
