In [1]:
import pandas as pd
import warnings

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

from hypex.dataset import Dataset, ExperimentData, InfoRole, TreatmentRole, TargetRole
from hypex.comparators.abstract import GroupComparator
from hypex.comparators import TTest, KSTest, GroupSizes, GroupDifference
from hypex.utils import SpaceEnum
from hypex.splitters import AASplitter
from hypex.analyzers import OneAAStatAnalyzer
from hypex.experiments import Experiment, OnRoleExperiment
from hypex.experiments.base_complex import ParamsExperiment
from hypex.reporters import DatasetReporter, AADictReporter

In [2]:
data = Dataset(
    roles={
        "user_id": InfoRole(int),
        "treat": TreatmentRole(int),
        "pre_spends": TargetRole(),
        "post_spends": TargetRole()
    }, data="data.csv",
)
data

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0           0             0      0       488.0   414.444444   NaN      M   
1           1             8      1       512.5   462.222222  26.0    NaN   
2           2             7      1       483.0   479.444444  25.0      M   
3           3             0      0       501.5   424.333333  39.0      M   
4           4             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995     9995            10      1       538.5   450.444444  42.0      M   
9996     9996             0      0       500.5   430.888889  26.0      F   
9997     9997             3      1       473.0   534.111111  22.0      F   
9998     9998             2      1       495.0   523.222222  67.0      F   
9999     9999             7      1       508.0   475.888889  38.0      F   

        industry  
0     E-commerce  
1     E-commerce  
2      Logistics  
3     E-com

In [3]:
aa = ParamsExperiment(
    executors=[
        AASplitter(),
        OnRoleExperiment(
            executors=[
                GroupSizes(),
                GroupDifference(),
                TTest(),
                KSTest(),
            ],
            role=TargetRole(),
        ),
        OneAAStatAnalyzer()
    ],
    parameters = {
        AASplitter: {
            "random_state": range(10)
        },
        GroupComparator: {
            "grouping_role": [TreatmentRole()],
            "space": [SpaceEnum.additional]
        }
    },
    reporter=DatasetReporter(AADictReporter(front=False))
)

In [4]:
ed = ExperimentData(data)
res = aa.execute(ed)

In [5]:
res.analysis_tables

{'ParamsExperiment┴┴':    random_state  pre_spends┴control mean┴test  pre_spends┴test mean┴test  \
 0             0                      486.9940                   487.1935   
 0             1                      486.8745                   487.3130   
 0             2                      487.0467                   487.1408   
 0             3                      486.8456                   487.3419   
 0             4                      486.8927                   487.2948   
 0             5                      487.0975                   487.0900   
 0             6                      487.1112                   487.0763   
 0             7                      487.1000                   487.0875   
 0             8                      487.1599                   487.0276   
 0             9                      487.4289                   486.7586   
 
    pre_spends┴difference┴test  pre_spends┴difference %┴test  \
 0                      0.1995                      0.040966   
 

In [None]:
res.analysis_tables

In [5]:
res_analyzer = OneAAStatAnalyzer().execute(res)
res_analyzer.analysis_tables['OneAAStatAnalyzer╰╰']

   TTest p-value  TTest pass  KSTest p-value  KSTest pass  mean test score
0       0.399466        0.05        0.501559          0.0         0.467528