In [1]:
from hypex.dataset.dataset import Dataset, ExperimentData
from hypex.dataset.roles import (
    InfoRole,
    FeatureRole,
    TreatmentRole,
    TargetRole,
)
from hypex.experiments.aa_test import AA_TEST
from hypex.reporters.aa import AADictReporter

None


In [2]:
data = Dataset(
    roles={
        "user_id": InfoRole(),
        "signup_month": FeatureRole(),
        "treat": TreatmentRole(),
        "pre_spends": TargetRole(),
        "post_spends": TargetRole(),
        "age": FeatureRole(),
        "gender": FeatureRole(),
        "industry": FeatureRole(),
    }, data="data.csv",
)
data

      user_id  signup_month  treat  pre_spends  post_spends   age gender  \
0           0             0      0       488.0   414.444444   NaN      M   
1           1             8      1       512.5   462.222222  26.0    NaN   
2           2             7      1       483.0   479.444444  25.0      M   
3           3             0      0       501.5   424.333333  39.0      M   
4           4             1      1       543.0   514.555556  18.0      F   
...       ...           ...    ...         ...          ...   ...    ...   
9995     9995            10      1       538.5   450.444444  42.0      M   
9996     9996             0      0       500.5   430.888889  26.0      F   
9997     9997             3      1       473.0   534.111111  22.0      F   
9998     9998             2      1       495.0   523.222222  67.0      F   
9999     9999             7      1       508.0   475.888889  38.0      F   

        industry  
0     E-commerce  
1     E-commerce  
2      Logistics  
3     E-com

In [3]:
data.roles

{'user_id': Info,
 'signup_month': Feature,
 'treat': Treatment,
 'pre_spends': Target,
 'post_spends': Target,
 'age': Feature,
 'gender': Feature,
 'industry': Feature}

In [4]:
test = AA_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [5]:
result.analysis_tables['OneAASplitAnalyzer╰╰']

   TTest p-value  TTest pass  KSTest p-value  KSTest pass  mean test score
0       0.173506         0.0        0.215686          0.0         0.201626

In [6]:
AADictReporter().report(result)

{'random_state': None,
 'B pre_spends control mean': 487.3426,
 'B pre_spends difference': -0.4977000000000089,
 'B pre_spends difference %': -0.10212528106511298,
 'B pre_spends test mean': 486.8449,
 'B post_spends control mean': 452.7185777777778,
 'B post_spends difference': -1.108044444444488,
 'B post_spends difference %': -0.24475347353392074,
 'B post_spends test mean': 451.6105333333333}

In [7]:
result.analysis_tables['GroupDifference╰╰post_spends[A]'].to_dict()

{'backend': 'pandas',
 'roles': {'role_names': ['B'], 'columns': [Statistic]},
 'data': {'data': {'B': [452.7185777777778,
    -1.108044444444488,
    -0.24475347353392074,
    451.6105333333333]},
  'index': ['post_spends control mean',
   'post_spends difference',
   'post_spends difference %',
   'post_spends test mean']}}

In [8]:
from hypex.experiments.ab_test import AB_TEST

test = AB_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [9]:
result.analysis_tables

{'GroupSizes╰╰pre_spends[[]':                       1
 control size    4936.00
 control size %    49.36
 test size       5064.00
 test size %       50.64,
 'GroupDifference╰╰pre_spends[[]':                                   1
 pre_spends control mean  484.911973
 pre_spends difference      4.308406
 pre_spends difference %    0.888492
 pre_spends test mean     489.220379,
 'TTest╰╰pre_spends[[]':    group  statistic       p-value  pass
 0      1 -11.489293  2.315047e-30  True,
 'MannWhitney╰╰pre_spends[[]':    group   statistic       p-value  pass
 0      1  11509971.0  7.624263e-12  True,
 'GroupSizes╰╰post_spends[[]':                       1
 control size    4936.00
 control size %    49.36
 test size       5064.00
 test size %       50.64,
 'GroupDifference╰╰post_spends[[]':                                    1
 post_spends control mean  420.046619
 post_spends difference     63.424045
 post_spends difference %   15.099287
 post_spends test mean     483.470664,
 'TTest╰╰post_spends[

In [10]:
from hypex.experiments.homogeneity_test import HOMOGENEITY_TEST

test = HOMOGENEITY_TEST
ed = ExperimentData(data)
result = test.execute(ed)

In [11]:
result.analysis_tables

{'GroupDifference╰╰pre_spends[[]':                                   1
 pre_spends control mean  484.911973
 pre_spends difference      4.308406
 pre_spends difference %    0.888492
 pre_spends test mean     489.220379,
 'TTest╰╰pre_spends[[]':    group  statistic       p-value  pass
 0      1 -11.489293  2.315047e-30  True,
 'KSTest╰╰pre_spends[[]':    group  statistic       p-value  pass
 0      1   0.077573  1.559150e-13  True,
 'GroupDifference╰╰post_spends[[]':                                    1
 post_spends control mean  420.046619
 post_spends difference     63.424045
 post_spends difference %   15.099287
 post_spends test mean     483.470664,
 'TTest╰╰post_spends[[]':    group   statistic  p-value  pass
 0      1 -135.560001      0.0  True,
 'KSTest╰╰post_spends[[]':    group  statistic  p-value  pass
 0      1     0.8959      0.0  True,
 'OneAASplitAnalyzer╰╰':    TTest p-value  TTest pass  KSTest p-value  KSTest pass  mean test score
 0   1.157524e-30         1.0    7.79575

In [13]:
from hypex.comparators.hypothesis_testing import TTest, KSTest
from hypex.comparators.comparators import GroupSizes, GroupDifference
from hypex.utils.enums import SpaceEnum
from hypex.splitters.aa import AASplitter
from hypex.analyzers.aa import OneAASplitAnalyzer
from hypex.experiments.base import CycledExperiment, Experiment, OnRoleExperiment 

aa = Experiment(
    executors=[
        AASplitter(),
        OnRoleExperiment(
            executors=[ 
                GroupSizes(grouping_role=TreatmentRole(), space=SpaceEnum.additional), 
                GroupDifference(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
                TTest(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
                KSTest(grouping_role=TreatmentRole(), space=SpaceEnum.additional),
            ],
            role=TargetRole(),
        )
    ]
)
experiment = CycledExperiment(n_iterations=250, inner_executor=aa, analyzer=OneAASplitAnalyzer()) 
ed = ExperimentData(data)
res = experiment.execute(ed)

None
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 

  self.data[name] = data


101, 

  self.data[name] = data


102, 

  self.data[name] = data


103, 

  self.data[name] = data


104, 

  self.data[name] = data


105, 

  self.data[name] = data


106, 

  self.data[name] = data


107, 

  self.data[name] = data


108, 

  self.data[name] = data


109, 

  self.data[name] = data


110, 

  self.data[name] = data


111, 

  self.data[name] = data


112, 

  self.data[name] = data


113, 

  self.data[name] = data


114, 

  self.data[name] = data


115, 

  self.data[name] = data


116, 

  self.data[name] = data


117, 

  self.data[name] = data


118, 

  self.data[name] = data


119, 

  self.data[name] = data


120, 

  self.data[name] = data


121, 

  self.data[name] = data


122, 

  self.data[name] = data


123, 

  self.data[name] = data


124, 

  self.data[name] = data


125, 

  self.data[name] = data


126, 

  self.data[name] = data


127, 

  self.data[name] = data


128, 

  self.data[name] = data


129, 

  self.data[name] = data


130, 

  self.data[name] = data


131, 

  self.data[name] = data


132, 

  self.data[name] = data


133, 

  self.data[name] = data


134, 

  self.data[name] = data


135, 

  self.data[name] = data


136, 

  self.data[name] = data


137, 

  self.data[name] = data


138, 

  self.data[name] = data


139, 

  self.data[name] = data


140, 

  self.data[name] = data


141, 

  self.data[name] = data


142, 

  self.data[name] = data


143, 

  self.data[name] = data


144, 

  self.data[name] = data


145, 

  self.data[name] = data


146, 

  self.data[name] = data


147, 

  self.data[name] = data


148, 

  self.data[name] = data


149, 

  self.data[name] = data


150, 

  self.data[name] = data


151, 

  self.data[name] = data


152, 

  self.data[name] = data


153, 

  self.data[name] = data


154, 

  self.data[name] = data


155, 

  self.data[name] = data


156, 

  self.data[name] = data


157, 

  self.data[name] = data


158, 

  self.data[name] = data


159, 

  self.data[name] = data


160, 

  self.data[name] = data


161, 

  self.data[name] = data


162, 

  self.data[name] = data


163, 

  self.data[name] = data


164, 

  self.data[name] = data


165, 

  self.data[name] = data


166, 

  self.data[name] = data


167, 

  self.data[name] = data


168, 

  self.data[name] = data


169, 

  self.data[name] = data


170, 

  self.data[name] = data


171, 

  self.data[name] = data


172, 

  self.data[name] = data


173, 

  self.data[name] = data


174, 

  self.data[name] = data


175, 

  self.data[name] = data


176, 

  self.data[name] = data


177, 

  self.data[name] = data


178, 

  self.data[name] = data


179, 

  self.data[name] = data


180, 

  self.data[name] = data


181, 

  self.data[name] = data


182, 

  self.data[name] = data


183, 

  self.data[name] = data


184, 

  self.data[name] = data


185, 

  self.data[name] = data


186, 

  self.data[name] = data


187, 

  self.data[name] = data


188, 

  self.data[name] = data


189, 

  self.data[name] = data


190, 

  self.data[name] = data


191, 

  self.data[name] = data


192, 

  self.data[name] = data


193, 

  self.data[name] = data


194, 

  self.data[name] = data


195, 

  self.data[name] = data


196, 

  self.data[name] = data


197, 

  self.data[name] = data


198, 

  self.data[name] = data


199, 

  self.data[name] = data


200, 

  self.data[name] = data


201, 

  self.data[name] = data


202, 

  self.data[name] = data


203, 

  self.data[name] = data


204, 

  self.data[name] = data


205, 

  self.data[name] = data


206, 

  self.data[name] = data


207, 

  self.data[name] = data


208, 

  self.data[name] = data


209, 

  self.data[name] = data


210, 

  self.data[name] = data


211, 

  self.data[name] = data


212, 

  self.data[name] = data


213, 

  self.data[name] = data


214, 

  self.data[name] = data


215, 

  self.data[name] = data


216, 

  self.data[name] = data


217, 

  self.data[name] = data


218, 

  self.data[name] = data


219, 

  self.data[name] = data


220, 

  self.data[name] = data


221, 

  self.data[name] = data


222, 

  self.data[name] = data


223, 

  self.data[name] = data


224, 

  self.data[name] = data


225, 

  self.data[name] = data


226, 

  self.data[name] = data


227, 

  self.data[name] = data


228, 

  self.data[name] = data


229, 

  self.data[name] = data


230, 

  self.data[name] = data


231, 

  self.data[name] = data


232, 

  self.data[name] = data


233, 

  self.data[name] = data


234, 

  self.data[name] = data


235, 

  self.data[name] = data


236, 

  self.data[name] = data


237, 

  self.data[name] = data


238, 

  self.data[name] = data


239, 

  self.data[name] = data


240, 

  self.data[name] = data


241, 

  self.data[name] = data


242, 

  self.data[name] = data


243, 

  self.data[name] = data


244, 

  self.data[name] = data


245, 

  self.data[name] = data


246, 

  self.data[name] = data


247, 

  self.data[name] = data


248, 

  self.data[name] = data


249, 

  self.data[name] = data


In [17]:
res.analysis_tables

{'GroupSizes╰╰pre_spends[A]0':                      B
 control size    5000.0
 control size %    50.0
 test size       5000.0
 test size %       50.0,
 'GroupDifference╰╰pre_spends[A]0':                                   B
 pre_spends control mean  487.342600
 pre_spends difference     -0.497700
 pre_spends difference %   -0.102125
 pre_spends test mean     486.844900,
 'TTest╰╰pre_spends[A]0':   group  statistic   p-value   pass
 0     B   1.318771  0.187276  False,
 'KSTest╰╰pre_spends[A]0':   group  statistic   p-value   pass
 0     B      0.021  0.220219  False,
 'GroupSizes╰╰post_spends[A]0':                      B
 control size    5000.0
 control size %    50.0
 test size       5000.0
 test size %       50.0,
 'GroupDifference╰╰post_spends[A]0':                                    B
 post_spends control mean  452.718578
 post_spends difference     -1.108044
 post_spends difference %   -0.244753
 post_spends test mean     451.610533,
 'TTest╰╰post_spends[A]0':   group  statistic   

In [15]:
# analyser version

from hypex.dataset.roles import StatisticRole

new_res = {}
fields = ['pre_spends', 'post_spends'] # получаем столбцы по TargetRole 
tests = ['KSTest', 'TTest']
for j in fields:
    for i in list(res.analysis_tables.keys()):  
        if j in i and i.split('╰╰')[0] in tests: 
            test = i.split('╰╰')[0]
            if j not in new_res: 
                new_res[j] = {test: []}
            elif test not in new_res[j]: 
                new_res[j].update({test: []}) 
            new_res[j][test].append(list(res.analysis_tables[i].data['pass'])[0])
    new_res[j] = Dataset.from_dict(new_res[j], roles={}).mean()
    new_res[j].add_column(new_res[j].apply(
                lambda x: int(0.8 * 0.05 <= x['TTest'] <= 1.2 * 0.05), {'TTest passed': StatisticRole()}, axis=1
            ))
    new_res[j].add_column(new_res[j].apply(
                lambda x: int(0.8 * 0.05 <= x['KSTest'] <= 1.2 * 0.05), {'KSTest passed': StatisticRole()}, axis=1
            ))
new_res

{'pre_spends':       KSTest  TTest  TTest passed  KSTest passed
 mean     0.0    0.0             0              0,
 'post_spends':       KSTest  TTest  TTest passed  KSTest passed
 mean     0.0    0.0             0              0}