In [1]:
from hypex.dataset import Dataset, ExperimentData, InfoRole, TreatmentRole, TargetRole
from hypex.experiments.aa import ONE_AA_TEST, AA_TEST, AA_TEST_WITH_STRATIFICATION
from hypex.reporters.aa import AADatasetReporter, AAPassedReporter, AABestSplitReporter
from hypex.splitters import AASplitter

# Creation of a new test dataset with synthetic data. 

It is important to mark the data fields by assigning the appropriate roles:
- FeatureRole: a role for columns that contain features or predictor variables. Our split will be based on them. Applied by default if the role is not specified for the column.
- TreatmentRole: a role for columns that show the treatment or intervention.
- TargetRole: a role for columns that show the target or outcome variable.
- InfoRole: a role for columns that contain information about the data, such as user IDs. 

In [2]:
data = Dataset(
    roles={
        "user_id": InfoRole(float),
        "treat": TreatmentRole(int),
        "pre_spends": TargetRole(),
        "post_spends": TargetRole()
    }, data="data.csv",
)
data

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0         0.0             0      0       488.0   414.444444   NaN      M   
1         1.0             8      1       512.5   462.222222  26.0    NaN   
2         2.0             7      1       483.0   479.444444  25.0      M   
3         3.0             0      0       501.5   424.333333  39.0      M   
4         4.0             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995   9995.0            10      1       538.5   450.444444  42.0      M   
9996   9996.0             0      0       500.5   430.888889  26.0      F   
9997   9997.0             3      1       473.0   534.111111  22.0      F   
9998   9998.0             2      1       495.0   523.222222  67.0      F   
9999   9999.0             7      1       508.0   475.888889  38.0      F   

        industry  
0     E-commerce  
1     E-commerce  
2      Logistics  
3     E-com

# AA test
Then we create the test pipeline, experiment data, with which it will work and run it. We select one of the pre-assembled pipelines, in our case ONE_AA_TEST, or create a custom one, then we wrap our prepared dataset into ExperimentData to be able to run experiments on it and then execute the test:

In [3]:
test = ONE_AA_TEST
ed = ExperimentData(data)
result = test.execute(ed)

We can access the results of the experiment directly with the property analysis_tables of our ExperimentDara: 

In [4]:
result.analysis_tables

{'GroupSizes┴┴':    control size  test size  control size %  test size %
 0          5000       5000            50.0         50.0,
 'GroupDifference┴┴pre_spends':    control mean  test mean  difference  difference %
 0      487.2024   486.9851     -0.2173     -0.044602,
 'TTest┴┴pre_spends':    p-value  statistic   pass
 0   0.5648   0.575746  False,
 'KSTest┴┴pre_spends':     p-value  statistic   pass
 0  0.877289     0.0118  False,
 'GroupDifference┴┴post_spends':    control mean   test mean  difference  difference %
 0    452.493178  451.835933   -0.657244      -0.14525,
 'TTest┴┴post_spends':     p-value  statistic   pass
 0  0.404321   0.833965  False,
 'KSTest┴┴post_spends':     p-value  statistic   pass
 0  0.544187      0.016  False,
 'OneAAStatAnalyzer┴┴':    mean TTest p-value  mean TTest pass  mean KSTest p-value  mean KSTest pass  \
 0             0.48456              0.0             0.710738               0.0   
 
    mean test score  
 0         0.635346  }

# Experiment results
To show the report with the summary of the test we run the report method of the reporter, associated with the respective test type, AA test in our case:

In [5]:
AADatasetReporter().report(result)

       feature group TTest pass  TTest p-value KSTest pass  KSTest p-value
0   pre_spends     0         OK       0.564800          OK        0.877289
1  post_spends     0         OK       0.404321          OK        0.544187

# Cycled AA test
Then we create the test pipeline, experiment data, with which it will work and run it. We select one of the pre-assembled pipelines, in our case AA_TEST, or create a custom one. We can also adjust some of the preset parameters of the experiment by assigning them to the respective params of the experiment. I.e. here we set the range of the random states we want to run our AA test for. Then we run the experiment on our prepared dataset, wrapped into ExperimentData. 

In [6]:
aa = AA_TEST
aa.executors[0].params[AASplitter]= {"random_states": range(10)}
res = aa.execute(ExperimentData(data))
res

<hypex.dataset.dataset.ExperimentData at 0x7f212da0f7f0>

# Experiment results
To show the report with the summary of the test, we run the report method of the reporter, associated with the respective test type, AA test in our case.

AAPassedReporter shows the results of the number of tests (OK / NOT OK) for the different random states.

In [7]:
AAPassedReporter().report(res)

  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)


  TTest aa test KSTest aa test TTest best split KSTest best split  result  \
0        NOT OK         NOT OK               OK                OK  NOT OK   
1        NOT OK         NOT OK               OK                OK  NOT OK   

       feature group  
0  post_spends     0  
1   pre_spends     0  

AABestSplitReporter returns the dataset with the best split among the ones covered by the cycled AA test.

In [8]:
AABestSplitReporter().report(res)

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0         0.0             0      0       488.0   414.444444   NaN      M   
1         1.0             8      1       512.5   462.222222  26.0    NaN   
2         2.0             7      1       483.0   479.444444  25.0      M   
3         3.0             0      0       501.5   424.333333  39.0      M   
4         4.0             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995   9995.0            10      1       538.5   450.444444  42.0      M   
9996   9996.0             0      0       500.5   430.888889  26.0      F   
9997   9997.0             3      1       473.0   534.111111  22.0      F   
9998   9998.0             2      1       495.0   523.222222  67.0      F   
9999   9999.0             7      1       508.0   475.888889  38.0      F   

        industry    split  
0     E-commerce  control  
1     E-commerce  control  
2  

# AA test with stratification

Then we repeat that for the AA test with stratification, also setting the share of the control group in the splits, we expect our test to generate.

In [9]:
aa = AA_TEST_WITH_STRATIFICATION
aa.executors[0].params[AASplitter]= {"random_states": range(10), "control_size": [0.3]}
res = aa.execute(ExperimentData(data))
res

<hypex.dataset.dataset.ExperimentData at 0x7f212d8a2bc0>

In [10]:
AAPassedReporter().report(res)

  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)
  return self.data.replace(to_replace=to_replace, value=value, regex=regex)


  TTest aa test KSTest aa test TTest best split KSTest best split  result  \
0        NOT OK         NOT OK               OK                OK  NOT OK   
1        NOT OK         NOT OK               OK                OK  NOT OK   

       feature group  
0  post_spends     0  
1   pre_spends     0  

In [11]:
AABestSplitReporter().report(res)

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0         0.0             0      0       488.0   414.444444   NaN      M   
1         1.0             8      1       512.5   462.222222  26.0    NaN   
2         2.0             7      1       483.0   479.444444  25.0      M   
3         3.0             0      0       501.5   424.333333  39.0      M   
4         4.0             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995   9995.0            10      1       538.5   450.444444  42.0      M   
9996   9996.0             0      0       500.5   430.888889  26.0      F   
9997   9997.0             3      1       473.0   534.111111  22.0      F   
9998   9998.0             2      1       495.0   523.222222  67.0      F   
9999   9999.0             7      1       508.0   475.888889  38.0      F   

        industry    split  
0     E-commerce     test  
1     E-commerce     test  
2  