In [3]:
from hypex.dataset import Dataset, ExperimentData, InfoRole, TreatmentRole, TargetRole
from hypex.experiments.aa import ONE_AA_TEST
from hypex.reporters.aa import AADatasetReporter

# Creation of a new test dataset with synthetic data. 

It is important to mark the data fields by assigning the appropriate roles:
- FeatureRole: a role for columns that contain features or predictor variables. Our split will be based on them. Applied by default if the role is not specified for the column.
- TreatmentRole: a role for columns that show the treatment or intervention.
- TargetRole: a role for columns that show the target or outcome variable.
- InfoRole: a role for columns that contain information about the data, such as user IDs. 

In [4]:
data = Dataset(
    roles={
        "user_id": InfoRole(float),
        "treat": TreatmentRole(int),
        "pre_spends": TargetRole(),
        "post_spends": TargetRole()
    }, data="data.csv",
)
data

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0         0.0             0      0       488.0   414.444444   NaN      M   
1         1.0             8      1       512.5   462.222222  26.0    NaN   
2         2.0             7      1       483.0   479.444444  25.0      M   
3         3.0             0      0       501.5   424.333333  39.0      M   
4         4.0             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995   9995.0            10      1       538.5   450.444444  42.0      M   
9996   9996.0             0      0       500.5   430.888889  26.0      F   
9997   9997.0             3      1       473.0   534.111111  22.0      F   
9998   9998.0             2      1       495.0   523.222222  67.0      F   
9999   9999.0             7      1       508.0   475.888889  38.0      F   

        industry  
0     E-commerce  
1     E-commerce  
2      Logistics  
3     E-com

# AA test
Then we create the test pipeline, experiment data, with which it will work and run it. We select one of the pre-assembled pipelines, in our case ONE_AA_TEST, or create a custom one, then we wrap our prepared dataset into ExperimentData to be able to run experiments on it and then execute the test:

In [5]:
test = ONE_AA_TEST
ed = ExperimentData(data)
result = test.execute(ed)

We can access the results of the experiment directly with the property analysis_tables of our ExperimentDara: 

In [6]:
result.analysis_tables

{'GroupSizes┴┴':    control size  test size  control size %  test size %
 0          5000       5000            50.0         50.0,
 'GroupDifference┴┴pre_spends':    control mean  test mean  difference  difference %
 0      487.1276   487.0599     -0.0677     -0.013898,
 'TTest┴┴pre_spends':    p-value  statistic   pass
 0  0.85765   0.179371  False,
 'KSTest┴┴pre_spends':     p-value  statistic   pass
 0  0.807376     0.0128  False,
 'GroupDifference┴┴post_spends':    control mean  test mean  difference  difference %
 0    451.647911   452.6812    1.033289      0.228782,
 'TTest┴┴post_spends':     p-value  statistic   pass
 0  0.189825  -1.311187  False,
 'KSTest┴┴post_spends':     p-value  statistic   pass
 0  0.406718     0.0178  False,
 'OneAAStatAnalyzer┴┴':    mean TTest p-value  mean TTest pass  mean KSTest p-value  mean KSTest pass  \
 0            0.523737              0.0             0.607047               0.0   
 
    mean test score  
 0         0.579277  }

# Experiment results
To show the report with the summary of the test we run the report method of the reporter, associated with the respective test type, AA test in our case:

In [7]:
AADatasetReporter().report(result)

       feature group TTest pass  TTest p-value KSTest pass  KSTest p-value
0   pre_spends     0         OK       0.857650          OK        0.807376
1  post_spends     0         OK       0.189825          OK        0.406718