In [1]:
from hypex.dataset import Dataset, ExperimentData, InfoRole, TreatmentRole, TargetRole
from hypex.experiments.ab import AB_TEST 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = Dataset(
    roles={
        "user_id": InfoRole(int),
        "treat": TreatmentRole(),
        "pre_spends": TargetRole(),
        "post_spends": TargetRole(), 
        "gender": TargetRole(str)
    }, data="data.csv",
)
data

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0           0             0      0       488.0   414.444444   NaN      M   
1           1             8      1       512.5   462.222222  26.0    NaN   
2           2             7      1       483.0   479.444444  25.0      M   
3           3             0      0       501.5   424.333333  39.0      M   
4           4             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995     9995            10      1       538.5   450.444444  42.0      M   
9996     9996             0      0       500.5   430.888889  26.0      F   
9997     9997             3      1       473.0   534.111111  22.0      F   
9998     9998             2      1       495.0   523.222222  67.0      F   
9999     9999             7      1       508.0   475.888889  38.0      F   

        industry  
0     E-commerce  
1     E-commerce  
2      Logistics  
3     E-com

In [3]:
data.roles

{'user_id': Info(<class 'int'>),
 'treat': Treatment(<class 'int'>),
 'pre_spends': Target(<class 'float'>),
 'post_spends': Target(<class 'float'>),
 'gender': Target(<class 'str'>),
 'signup_month': Feature(<class 'int'>),
 'age': Feature(<class 'float'>),
 'industry': Feature(<class 'str'>)}

In [4]:
test = AB_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [5]:
result.analysis_tables

{'GroupSizes┴┴':    control size  test size  control size %  test size %
 0          4936       5064           49.36        50.64,
 'GroupDifference┴┴pre_spends':    control mean   test mean  difference  difference %
 0    484.911973  489.220379    4.308406      0.888492,
 'TTest┴┴pre_spends':         p-value  statistic  pass
 0  2.315047e-30 -11.489293  True,
 'UTest┴┴pre_spends':         p-value   statistic  pass
 0  7.624263e-12  11509971.0  True,
 'GroupDifference┴┴post_spends':    control mean   test mean  difference  difference %
 0    420.046619  483.470664   63.424045     15.099287,
 'TTest┴┴post_spends':    p-value   statistic  pass
 0      0.0 -135.560001  True,
 'UTest┴┴post_spends':    p-value  statistic  pass
 0      0.0   408754.5  True,
 'Chi2Test┴┴gender':     p-value  statistic   pass
 0  0.351553   0.867843  False,
 'ABAnalyzer┴┴MultiTest':   accepted hypothesis
 0                 [2],
 'ABAnalyzer┴┴':    TTest p-value  TTest pass  UTest p-value  UTest pass
 0   1.157

In [6]:
from hypex.reporters import ABDictReporter

ABDictReporter().report(result)

{'GroupSizes control size 0': 4936,
 'GroupSizes test size 0': 5064,
 'GroupSizes control size % 0': 49.36,
 'GroupSizes test size % 0': 50.63999999999999,
 'pre_spends GroupDifference control mean 0': 484.91197325769855,
 'pre_spends GroupDifference test mean 0': 489.2203791469194,
 'pre_spends GroupDifference difference 0': 4.3084058892208645,
 'pre_spends GroupDifference difference % 0': 0.8884923711568682,
 'post_spends GroupDifference control mean 0': 420.04661894471457,
 'post_spends GroupDifference test mean 0': 483.470664384764,
 'post_spends GroupDifference difference 0': 63.42404544004944,
 'post_spends GroupDifference difference % 0': 15.099287217068902,
 'pre_spends TTest p-value 0': 2.3150474758856975e-30,
 'pre_spends TTest pass 0': True,
 'post_spends TTest p-value 0': 0.0,
 'post_spends TTest pass 0': True,
 'pre_spends UTest p-value 0': 7.624262939003523e-12,
 'pre_spends UTest pass 0': True,
 'post_spends UTest p-value 0': 0.0,
 'post_spends UTest pass 0': True,
 'gen