In [1]:
from hypex.dataset import Dataset, ExperimentData, InfoRole, TreatmentRole, TargetRole
from hypex.experiments.aa import AA_TEST
from hypex.reporters import AADictReporter

In [2]:
data = Dataset(
    roles={
        "user_id": InfoRole(),
        "treat": TreatmentRole(),
        "pre_spends": TargetRole(),
        "post_spends": TargetRole()
    }, data="data.csv",
)
data

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0           0             0      0       488.0   414.444444   NaN      M   
1           1             8      1       512.5   462.222222  26.0    NaN   
2           2             7      1       483.0   479.444444  25.0      M   
3           3             0      0       501.5   424.333333  39.0      M   
4           4             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995     9995            10      1       538.5   450.444444  42.0      M   
9996     9996             0      0       500.5   430.888889  26.0      F   
9997     9997             3      1       473.0   534.111111  22.0      F   
9998     9998             2      1       495.0   523.222222  67.0      F   
9999     9999             7      1       508.0   475.888889  38.0      F   

        industry  
0     E-commerce  
1     E-commerce  
2      Logistics  
3     E-com

In [3]:
data.roles

{'user_id': Info,
 'treat': Treatment,
 'pre_spends': Target,
 'post_spends': Target,
 'signup_month': Feature,
 'age': Feature,
 'gender': Feature,
 'industry': Feature}

In [4]:
test = AA_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [5]:
AADictReporter().report(result)

{'random_state': None,
 'B pre_spends control mean': 487.1159,
 'B pre_spends difference': -0.0443000000000211,
 'B pre_spends difference %': -0.009094344898208373,
 'B pre_spends test mean': 487.0716,
 'B post_spends control mean': 452.0868444444444,
 'B post_spends difference': 0.15542222222217106,
 'B post_spends difference %': 0.034378842059235026,
 'B post_spends test mean': 452.2422666666666,
 'B control size': 5000.0,
 'B control size %': 50.0,
 'B test size': 5000.0,
 'B test size %': 50.0,
 'TTest p-value': p-value    0.875119
 dtype: float64,
 'TTest pass': pass    0.0
 dtype: float64,
 'KSTest p-value': p-value    0.730616
 dtype: float64,
 'KSTest pass': pass    0.0
 dtype: float64,
 'mean test score': p-value    0.778783
 dtype: float64}

In [6]:
result.analysis_tables

{'GroupSizes║║[][A]':                      B
 control size    5000.0
 control size %    50.0
 test size       5000.0
 test size %       50.0,
 "GroupDifference║║['pre_spends'][A]":                                   B
 pre_spends control mean  487.115900
 pre_spends difference     -0.044300
 pre_spends difference %   -0.009094
 pre_spends test mean     487.071600,
 "TTest║║['pre_spends'][A]":   group  statistic   p-value   pass
 0     B   0.117373  0.906567  False,
 "KSTest║║['pre_spends'][A]":   group  statistic  p-value   pass
 0     B     0.0152  0.61041  False,
 "GroupDifference║║['post_spends'][A]":                                    B
 post_spends control mean  452.086844
 post_spends difference      0.155422
 post_spends difference %    0.034379
 post_spends test mean     452.242267,
 "TTest║║['post_spends'][A]":   group  statistic   p-value   pass
 0     B  -0.197206  0.843671  False,
 "KSTest║║['post_spends'][A]":   group  statistic   p-value   pass
 0     B     0.0122  0.85082

In [7]:
from hypex.experiments.ab import AB_TEST

test = AB_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [10]:
result.analysis_tables

{'GroupSizes║║[][t]':                       1
 control size    4936.00
 control size %    49.36
 test size       5064.00
 test size %       50.64,
 "GroupDifference║║['pre_spends'][t]":                                   1
 pre_spends control mean  484.911973
 pre_spends difference      4.308406
 pre_spends difference %    0.888492
 pre_spends test mean     489.220379,
 "ATE║║['pre_spends'][t]":                         1
 pre_spends ATE  487.09375,
 "TTest║║['pre_spends'][t]":    group  statistic       p-value  pass
 0      1 -11.489293  2.315047e-30  True,
 "UTest║║['pre_spends'][t]":    group   statistic       p-value  pass
 0      1  11509971.0  7.624263e-12  True,
 "GroupDifference║║['post_spends'][t]":                                    1
 post_spends control mean  420.046619
 post_spends difference     63.424045
 post_spends difference %   15.099287
 post_spends test mean     483.470664,
 "ATE║║['post_spends'][t]":                           1
 post_spends ATE  452.164556,
 "TTest║

In [13]:
from hypex.experiments.homogeneity import HOMOGENEITY_TEST

test = HOMOGENEITY_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [14]:
result.analysis_tables

{"GroupDifference║║['pre_spends'][t]":                                   1
 pre_spends control mean  484.911973
 pre_spends difference      4.308406
 pre_spends difference %    0.888492
 pre_spends test mean     489.220379,
 "TTest║║['pre_spends'][t]":    group  statistic       p-value  pass
 0      1 -11.489293  2.315047e-30  True,
 "KSTest║║['pre_spends'][t]":    group  statistic       p-value  pass
 0      1   0.077573  1.559150e-13  True,
 "GroupDifference║║['post_spends'][t]":                                    1
 post_spends control mean  420.046619
 post_spends difference     63.424045
 post_spends difference %   15.099287
 post_spends test mean     483.470664,
 "TTest║║['post_spends'][t]":    group   statistic  p-value  pass
 0      1 -135.560001      0.0  True,
 "KSTest║║['post_spends'][t]":    group  statistic  p-value  pass
 0      1     0.8959      0.0  True,
 'OneAAStatAnalyzer║║':                             TTest p-value                  TTest pass  \
 0  p-value    1.15

In [15]:
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

from hypex.comparators import TTest, KSTest, GroupSizes, GroupDifference
from hypex.utils import SpaceEnum
from hypex.splitters import AASplitter
from hypex.analyzers import OneAAStatAnalyzer
from hypex.experiments import CycledExperiment, Experiment, OnRoleExperiment

aa = Experiment(
    executors=[
        AASplitter(),
        OnRoleExperiment(
            executors=[
                GroupSizes(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
                GroupDifference(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
                TTest(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
                KSTest(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
            ],
            role=TargetRole(),
        )
    ]
)
experiment = CycledExperiment(n_iterations=20, inner_executor=aa, analyzer=OneAAStatAnalyzer())
ed = ExperimentData(data)
res = experiment.execute(ed)

In [16]:
res.additional_fields

     AASplitter║║0 AASplitter║║1 AASplitter║║2 AASplitter║║3 AASplitter║║4  \
0                A             B             A             B             A   
1                A             A             A             B             A   
2                B             A             B             B             B   
3                A             B             B             B             A   
4                A             B             B             B             B   
...            ...           ...           ...           ...           ...   
9995             B             A             A             B             A   
9996             B             B             A             A             A   
9997             B             B             B             A             A   
9998             A             B             A             A             A   
9999             A             A             B             B             A   

     AASplitter║║5 AASplitter║║6 AASplitter║║7 AASplitter║║8 AA

In [17]:
res.analysis_tables

{"GroupSizes║║['pre_spends'][A]0":                      B
 control size    5000.0
 control size %    50.0
 test size       5000.0
 test size %       50.0,
 "GroupDifference║║['pre_spends'][A]0":                                   B
 pre_spends control mean  487.071200
 pre_spends difference      0.045100
 pre_spends difference %    0.009259
 pre_spends test mean     487.116300,
 "TTest║║['pre_spends'][A]0":   group  statistic   p-value   pass
 0     B  -0.119493  0.904888  False,
 "KSTest║║['pre_spends'][A]0":   group  statistic   p-value   pass
 0     B     0.0154  0.593677  False,
 "GroupSizes║║['post_spends'][A]0":                      B
 control size    5000.0
 control size %    50.0
 test size       5000.0
 test size %       50.0,
 "GroupDifference║║['post_spends'][A]0":                                    B
 post_spends control mean  451.897111
 post_spends difference      0.534889
 post_spends difference %    0.118365
 post_spends test mean     452.432000,
 "TTest║║['post_spends']

In [19]:
res_analyzer = OneAAStatAnalyzer().execute(res)

In [21]:
res_analyzer.analysis_tables['OneAAStatAnalyzer║║']

                        TTest p-value                    TTest pass  \
0  p-value    0.530011
dtype: float64  pass    0.025
dtype: float64   

                       KSTest p-value                 KSTest pass  \
0  p-value    0.566635
dtype: float64  pass    0.0
dtype: float64   

                      mean test score  
0  p-value    0.554427
dtype: float64  

In [1]:
import pandas as pd

t = pd.read_csv("data.csv")
t.sample(frac=1, random_state=None)

Unnamed: 0,user_id,signup_month,treat,pre_spends,post_spends,age,gender,industry
3187,3187,0,0,492.5,430.666667,34.0,M,E-commerce
3846,3846,6,1,480.5,496.777778,23.0,F,E-commerce
3473,3473,6,1,496.5,483.111111,35.0,F,Logistics
489,489,3,1,480.0,528.888889,63.0,M,Logistics
5820,5820,4,1,473.0,495.000000,,M,E-commerce
...,...,...,...,...,...,...,...,...
1666,1666,0,0,498.0,414.111111,18.0,F,Logistics
5020,5020,7,1,479.0,469.333333,,F,Logistics
9110,9110,8,1,490.5,452.555556,,M,E-commerce
9831,9831,0,0,462.0,424.444444,34.0,,Logistics


In [3]:
from hypex.hypotheses.hypothesis import Hypothesis
ed, exp  = Hypothesis("test_config.json").execute()

In [4]:
res = exp.execute(ed)

In [5]:
exp.executors[1].executors[0].space

<SpaceEnum.additional: 'additional'>

In [6]:
res.analysis_tables

{"GroupDifference║║['pre_spends'][A]":                                   B
 pre_spends control mean  486.938200
 pre_spends difference      0.311100
 pre_spends difference %    0.063889
 pre_spends test mean     487.249300,
 "GroupDifference║║['post_spends'][A]":                                    B
 post_spends control mean  452.103689
 post_spends difference      0.121733
 post_spends difference %    0.026926
 post_spends test mean     452.225422}

In [7]:
config = {
  "dataset": {
    "path": "data.csv",
    "roles": {
      "role_names": [
        "Info",
        "Treatment",
        "Target",
        "Target",
      ],
      "columns": [
        "user_id",
        "treat",
        "pre_spends",
        "post_spends"
      ]
    },
    "backend": "pandas"
  },
  "experiment": {
    "AASplitter": {
      "control_size": 0.5,
      "random_state": None
    },
    "OnRoleExperiment": {
      "executors": {
        "GroupDifference": {
          "grouping_role": "Treatment",
          "space": "additional"
        }
      },
      "role": "Target"
    }
  },
  "report": {}
} 

ed, exp  = Hypothesis(config).execute()

In [8]:
res = exp.execute(ed)

In [9]:
res.analysis_tables

{"GroupDifference║║['pre_spends'][A]":                                   B
 pre_spends control mean  487.359900
 pre_spends difference     -0.532300
 pre_spends difference %   -0.109221
 pre_spends test mean     486.827600,
 "GroupDifference║║['post_spends'][A]":                                    B
 post_spends control mean  452.496511
 post_spends difference     -0.663911
 post_spends difference %   -0.146722
 post_spends test mean     451.832600}