In [1]:
from hypex.dataset.dataset import Dataset, ExperimentData
from hypex.dataset.roles import (
    InfoRole,
    FeatureRole,
    TreatmentRole,
    TargetRole,
)
from hypex.experiments.aa_test import AA_TEST
from hypex.reporters.aa import AADictReporter

In [2]:
data = Dataset(
    roles={
        "user_id": InfoRole(),
        "signup_month": FeatureRole(),
        "treat": TreatmentRole(),
        "pre_spends": TargetRole(),
        "post_spends": TargetRole(),
        "age": FeatureRole(),
        "gender": FeatureRole(),
        "industry": FeatureRole(),
    }, data="data.csv",
)
data

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0           0             0      0       488.0   414.444444   NaN      M   
1           1             8      1       512.5   462.222222  26.0    NaN   
2           2             7      1       483.0   479.444444  25.0      M   
3           3             0      0       501.5   424.333333  39.0      M   
4           4             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995     9995            10      1       538.5   450.444444  42.0      M   
9996     9996             0      0       500.5   430.888889  26.0      F   
9997     9997             3      1       473.0   534.111111  22.0      F   
9998     9998             2      1       495.0   523.222222  67.0      F   
9999     9999             7      1       508.0   475.888889  38.0      F   

        industry  
0     E-commerce  
1     E-commerce  
2      Logistics  
3     E-com

In [3]:
data.roles

{'user_id': Info,
 'signup_month': Feature,
 'treat': Treatment,
 'pre_spends': Target,
 'post_spends': Target,
 'age': Target,
 'gender': Feature,
 'industry': Feature}

In [4]:
test = AA_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [ ]:
AADictReporter().report(result)

In [ ]:
result.analysis_tables

In [ ]:
from hypex.experiments.ab_test import AB_TEST

test = AB_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [ ]:
result.analysis_tables['ABAnalyzer╰╰']

In [ ]:
from hypex.experiments.homogeneity_test import HOMOGENEITY_TEST

test = HOMOGENEITY_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [ ]:
result.analysis_tables

In [ ]:
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

from hypex.comparators.hypothesis_testing import TTest, KSTest
from hypex.comparators.comparators import GroupSizes, GroupDifference
from hypex.utils.enums import SpaceEnum
from hypex.splitters.aa import AASplitter
from hypex.analyzers.aa import OneAASplitAnalyzer
from hypex.experiments.base import CycledExperiment, Experiment, OnRoleExperiment

aa = Experiment(
    executors=[
        AASplitter(),
        OnRoleExperiment(
            executors=[
                GroupSizes(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
                GroupDifference(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
                TTest(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
                KSTest(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
            ],
            role=TargetRole(),
        )
    ]
)
experiment = CycledExperiment(n_iterations=2, inner_executor=aa, analyzer=OneAASplitAnalyzer())
ed = ExperimentData(data)
res = experiment.execute(ed)

In [ ]:
res.analysis_tables

In [ ]:
res_analyzer = OneAASplitAnalyzer().execute(res)

In [ ]:
res_analyzer.analysis_tables['OneAASplitAnalyzer╰╰']

In [ ]:
# analyser version

from hypex.dataset.roles import StatisticRole

new_res = {}
fields = ['pre_spends', 'post_spends'] # получаем столбцы по TargetRole
tests = ['KSTest', 'TTest']
for j in fields:
    for i in list(res.analysis_tables.keys()):
        if j in i and i.split('╰╰')[0] in tests:
            test = i.split('╰╰')[0]
            if j not in new_res:
                new_res[j] = {test: []}
            elif test not in new_res[j]:
                new_res[j].update({test: []})
            new_res[j][test].append(list(res.analysis_tables[i].data['pass'])[0])
    new_res[j] = Dataset.from_dict(new_res[j], roles={}).mean()
    new_res[j].add_column(new_res[j].apply(
                lambda x: int(0.8 * 0.05 <= x['TTest'] <= 1.2 * 0.05), {'TTest passed': StatisticRole()}, axis=1
            ))
    new_res[j].add_column(new_res[j].apply(
                lambda x: int(0.8 * 0.05 <= x['KSTest'] <= 1.2 * 0.05), {'KSTest passed': StatisticRole()}, axis=1
            ))
new_res