# Tutorial 2: How to perform AA and AB tests
*AB-test is shown below*

## 0. Import Libraries

In [61]:
import pandas as pd
import numpy as np
from hypex.ab_test.ab_tester import AATest, ABTest
from hypex.dataset.dataset import Dataset

pd.options.display.float_format = '{:,.2f}'.format

## 1. Create or upload your dataset
In this case we will create random dataset with known effect size  
If you have your own dataset, go to the part 2 

In [62]:
data = Dataset(num_treatments=0, na_columns=['feature_col_1', 'feature_col_2'], 
               num_outcomes=1, num_info_cols=1, num_main_causes_cols=2)
data.df

Unnamed: 0,info_col_1,feature_col_1,feature_col_2,feature_col_3,feature_col_4,outcome_1
0,14653,,Deposit,1.17,0.00,1.69
1,13027,female,,0.89,0.00,3.57
2,13882,male,Investment,0.66,2.00,2.53
3,8734,female,Credit,1.93,2.00,9.92
4,7252,male,Investment,0.19,3.00,2.91
...,...,...,...,...,...,...
4995,5410,female,Investment,0.67,0.00,2.46
4996,685,male,Credit,0.64,3.00,4.96
4997,3304,male,Credit,-0.31,3.00,1.76
4998,9568,female,Investment,-0.33,3.00,2.55


## 2. AATest 

### 2.0 Initialize parameters
`info_col` used to define informative attributes that should NOT be part of testing, such as user_id and signup_month <br>

In [63]:
info_cols = data.info_col_names[0]
target = data.outcome_name[0]

### 2.1 Simple AA-test
This is the easiest way to initialize and calculate metrics on a AA-test (default - on 10 iterations)<br>
Use it when you are clear about each attribute or if you don't have any additional task conditions (like grouping)

In [64]:
experiment = AATest(info_cols=info_cols, target_fields=target)

In [65]:
experiment_result, dict_of_datas = experiment.search_dist_uniform_sampling(data.df, iterations=10)

100%|██████████| 10/10 [00:00<00:00, 22.38it/s]


`experiment_result` is a table of results of experiments, which includes 
- means of all targets in a and b samples, 
- p_values of Student t-test and test Kolmogorova-Smirnova, 
- and results of tests (did data on the random_state passes the uniform test)

In [66]:
experiment_result.head(3)

Unnamed: 0,random_state,outcome_1 a mean,outcome_1 b mean,outcome_1 ab delta,outcome_1 ab delta %,outcome_1 t_test p_value,outcome_1 ks_test p_value,outcome_1 t_test passed,outcome_1 ks_test passed,mean_tests_score
0,0,3.93,3.84,-0.09,-2.4,0.3,0.24,True,True,0.27
1,1,3.93,3.84,-0.09,-2.47,0.28,0.58,True,True,0.43
2,2,3.88,3.9,0.02,0.56,0.81,0.91,True,True,0.86


`dict_of_datas` is a dictionary with random_states as keys and dataframes as values.<br>
Result of separation can be find in column 'group', it contains values 'test' and 'control'

In [67]:
dict_of_datas[0].head(3)

Unnamed: 0,info_col_1,feature_col_1,feature_col_2,feature_col_3,feature_col_4,outcome_1,group
0,13027,female,,0.89,0.0,3.57,test
1,13882,male,Investment,0.66,2.0,2.53,test
2,8734,female,Credit,1.93,2.0,9.92,test


#### - Single experiment
To get stable results lets fix `random_state`

In [68]:
random_state = 11

To perform single experiment you can use `sampling_metrics()`

In [69]:
experiment = AATest(info_cols=info_cols, target_fields=target)
metrics, dict_of_datas = experiment.sampling_metrics(data=data.df, random_state=random_state).values()

The results contains the same info as in multisampling, but on one experiment

In [70]:
metrics

{'random_state': 11,
 'outcome_1 a mean': 3.8341526109446464,
 'outcome_1 b mean': 3.938352786589131,
 'outcome_1 ab delta': 0.10420017564448436,
 'outcome_1 ab delta %': 2.645780642082307,
 'outcome_1 t_test p_value': 0.23904632400605272,
 'outcome_1 ks_test p_value': 0.024300232032562177,
 'outcome_1 t_test passed': True,
 'outcome_1 ks_test passed': False,
 'mean_tests_score': 0.13167327801930745}

In [71]:
dict_of_datas[random_state]

Unnamed: 0,info_col_1,feature_col_1,feature_col_2,feature_col_3,feature_col_4,outcome_1,group
0,13027,female,,0.89,0.00,3.57,test
1,13882,male,Investment,0.66,2.00,2.53,test
2,12388,female,Investment,0.23,3.00,2.96,test
3,11125,male,Deposit,-0.14,1.00,1.38,test
4,6130,male,,1.21,1.00,2.19,test
...,...,...,...,...,...,...,...
4995,11311,female,Investment,-0.11,2.00,1.82,control
4996,11788,female,Deposit,-0.27,1.00,2.63,control
4997,14494,female,,-0.79,0.00,0.58,control
4998,6559,male,Investment,0.67,2.00,6.85,control


### 2.2 AA-test with grouping

To perform experiment that separates samples by groups `group_col` can be used

In [72]:
info_cols = data.info_col_names[0]
target = data.outcome_name[0]

group_cols = 'feature_col_2'

In [73]:
experiment = AATest(info_cols=info_cols, target_fields=target, group_cols=group_cols)

In [74]:
experiment_result, dict_of_datas = experiment.search_dist_uniform_sampling(data=data.df)

100%|██████████| 10/10 [00:00<00:00, 19.23it/s]


The result is in the same format as without groups

In this regime groups equally divided on each sample (test and control):

In [75]:
dict_of_datas[0].groupby(['feature_col_2', 'group'])[['info_col_1']].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,info_col_1
feature_col_2,group,Unnamed: 2_level_1
Credit,control,720
Credit,test,720
Deposit,control,785
Deposit,test,785
Investment,control,745
Investment,test,745


## 3. AB-test

### 3.0 Data
Lets correct data to see how AB-test works

In [76]:
dataset_ab = Dataset(num_outcomes=2, num_treatments=0)
data_ab = dataset_ab.df.copy()
half_data = int(dataset_ab.df.shape[0]/2)
data_ab['group'] = ['test']*half_data + ['control']*half_data
data_ab.head(3)

Unnamed: 0,info_col_1,info_col_2,feature_col_1,feature_col_2,feature_col_3,feature_col_4,feature_col_5,feature_col_6,outcome_1,outcome_2,group
0,6145,A,male,Credit,-1.74,-0.83,-0.39,1.0,0.39,0.39,test
1,8650,A,female,Credit,-0.19,0.39,1.08,1.0,4.8,4.8,test
2,1915,X,male,Credit,1.9,1.95,1.1,2.0,15.82,15.82,test


### 3.1 Full AB-test

Full (basic) version of test includes calculation of all available metrics, which are: "diff in means", "diff in diff" and "cuped"<br>
Pay attention, that for "cuped" and "diff in diff" metrics requred target before pilot.

In [77]:
model = ABTest()
results = model.execute(
    data=data_ab, 
    target_field=dataset_ab.outcome_name[1], 
    target_field_before=dataset_ab.outcome_name[0], 
    group_field='group'
)
results

{'size': {'test': 2500, 'control': 2500},
 'difference': {'ate': 0.060895625039223716,
  'cuped': -0.04488201942390302,
  'diff_in_diff': -0.11341276596386951},
 'p_value': {'t_test': 0.5850901143113342, 'mann_whitney': 0.9042167521538437}}

To see results in more convenient way `show_beautiful_result` can be used

In [78]:
model.show_beautiful_result()

Unnamed: 0,size
test,2500
control,2500


Unnamed: 0,difference
ate,0.06
cuped,-0.04
diff_in_diff,-0.11


Unnamed: 0,p_value
t_test,0.59
mann_whitney,0.9


### 3.2 Simple AB-test
To estimate effect without target data before pilot `calc_difference_method='ate'` can be used - effect will be estimated with "diff in means" method

In [79]:
model = ABTest(calc_difference_method='ate')
model.execute(data=data_ab, target_field=dataset_ab.outcome_name[0], group_field='group')

model.show_beautiful_result()

Unnamed: 0,size
test,2500
control,2500


Unnamed: 0,difference
ate,0.17


Unnamed: 0,p_value
t_test,0.12
mann_whitney,0.23
