In [3]:
from hypex.dataset import Dataset, InfoRole, TreatmentRole, TargetRole
from hypex.experiments.ab import ABTest

In [4]:
data = Dataset(
    roles={
        "user_id": InfoRole(int),
        "treat": TreatmentRole(),
        "pre_spends": TargetRole(),
        "post_spends": TargetRole(), 
        "gender": TargetRole()
    }, data="data.csv",
)
data

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0           0             0      0       488.0   414.444444   NaN      M   
1           1             8      1       512.5   462.222222  26.0    NaN   
2           2             7      1       483.0   479.444444  25.0      M   
3           3             0      0       501.5   424.333333  39.0      M   
4           4             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995     9995            10      1       538.5   450.444444  42.0      M   
9996     9996             0      0       500.5   430.888889  26.0      F   
9997     9997             3      1       473.0   534.111111  22.0      F   
9998     9998             2      1       495.0   523.222222  67.0      F   
9999     9999             7      1       508.0   475.888889  38.0      F   

        industry  
0     E-commerce  
1     E-commerce  
2      Logistics  
3     E-com

In [5]:
data.roles

{'user_id': Info(<class 'int'>),
 'treat': Treatment(<class 'int'>),
 'pre_spends': Target(<class 'float'>),
 'post_spends': Target(<class 'float'>),
 'gender': Target(<class 'str'>),
 'signup_month': Feature(<class 'int'>),
 'age': Feature(<class 'float'>),
 'industry': Feature(<class 'str'>)}

In [6]:
test = ABTest(additional_tests=['t-test', 'u-test', 'chi2-test'])
result = test.execute(data)

In [7]:
result.resume

       feature group TTest pass  TTest p-value UTest pass  UTest p-value  \
0   pre_spends     0         OK       0.117942         OK       0.116867   
1  post_spends     0         OK       0.956834         OK       0.949912   
2       gender     0        NaN            NaN        NaN            NaN   

  Chi2Test pass  Chi2Test p-value  
0           NaN               NaN  
1           NaN               NaN  
2            OK               1.0  

In [8]:
result.multitest

"There was less than three groups or multitest mathod wasn't provided"

In [9]:
result.difference

   control mean   test mean  difference  difference % group        field
0    484.920637  489.220379    4.299743      0.886690     1   pre_spends
0    484.920637  470.666667  -14.253970     -2.939444     1  post_spends
0    420.046760  483.470664   63.423904     15.099249     2   pre_spends
0    420.046760  419.814815   -0.231945     -0.055219     2  post_spends

In [10]:
result.sizes

   control size  test size  control size %  test size % group
0          4933       5064              49           50     1
0          4933          3              99            0     2

In [11]:
test = ABTest(multitest_method="bonferroni")
result = test.execute(data)

In [12]:
result.resume

       feature group TTest pass  TTest p-value
0   pre_spends     0         OK       0.117942
1  post_spends     0         OK       0.956834

In [13]:
result.sizes

   control size  test size  control size %  test size % group
0          4933       5064              49           50     1
0          4933          3              99            0     2

In [14]:
result.difference

   control mean   test mean  difference  difference % group        field
0    484.920637  489.220379    4.299743      0.886690     1   pre_spends
0    484.920637  470.666667  -14.253970     -2.939444     1  post_spends
0    420.046760  483.470664   63.423904     15.099249     2   pre_spends
0    420.046760  419.814815   -0.231945     -0.055219     2  post_spends

In [15]:
result.multitest

   correction        field   new p-value   old p-value  rejected   test group
0    0.250000   pre_spends  1.230538e-29  3.076345e-30      True  TTest     1
1    0.250000  post_spends  4.717694e-01  1.179423e-01     False  TTest     1
2    0.000000   pre_spends  0.000000e+00  0.000000e+00      True  TTest     2
3    0.956834  post_spends  1.000000e+00  9.568340e-01     False  TTest     2