# Tutorial 3: How to perform AB test


## 0. Import libraries

In [9]:
import pandas as pd
import numpy as np
from hypex.ab_test import ABTest
from hypex.utils.tutorial_data_creation import create_test_data

pd.options.display.float_format = '{:,.2f}'.format

np.random.seed(52) # needed to create example data

## 1. Create or upload your dataset
In this case we will create random dataset with known effect size  
If you have your own dataset, go to the part 2 

In [3]:
data = create_test_data(rs=52, na_step=10, nan_cols=['age', 'gender'])
data

Unnamed: 0,user_id,signup_month,treat,pre_spends,post_spends,age,gender,industry
0,0,0,0,488.00,414.44,,M,E-commerce
1,3,0,0,501.50,424.33,31.00,,Logistics
2,10,0,0,522.50,416.22,64.00,M,E-commerce
3,12,0,0,472.00,423.78,43.00,M,E-commerce
4,13,0,0,508.50,424.22,36.00,F,E-commerce
...,...,...,...,...,...,...,...,...
5365,9991,0,0,482.50,421.89,23.00,F,E-commerce
5366,9992,0,0,491.50,424.00,44.00,M,E-commerce
5367,9994,0,0,486.00,423.78,27.00,F,Logistics
5368,9996,0,0,500.50,430.89,56.00,F,E-commerce


## 2. AB-test

### 2.0 Data
Let's correct data to see how AB-test works

In [4]:
data_ab = data.copy()

half_data = int(data.shape[0]/2)
data_ab['group'] = ['test']*half_data + ['control']*half_data
data_ab.head(3)

Unnamed: 0,user_id,signup_month,treat,pre_spends,post_spends,age,gender,industry,group
0,0,0,0,488.0,414.44,,M,E-commerce,test
1,3,0,0,501.5,424.33,31.0,,Logistics,test
2,10,0,0,522.5,416.22,64.0,M,E-commerce,test


### 3.1 Full AB-test

Full (basic) version of test includes calculation of all available metrics, which are: "diff in means", "diff in diff" and "cuped"<br>
Pay attention, that for "cuped" and "diff in diff" metrics required target before pilot.

In [5]:
model = ABTest()
results = model.execute(
    data=data_ab, 
    target_field='post_spends', 
    target_field_before='pre_spends', 
    group_field='group'
)
results

{'size': {'test': 2685, 'control': 2685},
 'difference': {'ate': 0.9805090006207325,
  'cuped': 0.9764245308837758,
  'diff_in_diff': 0.39224084419618066},
 'p-value': {'t-test': 0.20533212744131019,
  'mann_whitney': 0.08089945933651932}}

To see results in more convenient way `show_beautiful_result` can be used

In [6]:
model.show_beautiful_result()

Unnamed: 0,size
test,2685
control,2685


Unnamed: 0,difference
ate,0.98
cuped,0.98
diff_in_diff,0.39


Unnamed: 0,p-value
t-test,0.21
mann_whitney,0.08


### 2.2 Simple AB-test
To estimate effect without target data before pilot `calc_difference_method='ate'` can be used - effect will be estimated with "diff in means" method

In [7]:
model = ABTest(calc_difference_method='ate')
model.execute(data=data_ab, target_field='post_spends', group_field='group')

model.show_beautiful_result()

Unnamed: 0,size
test,2685
control,2685


Unnamed: 0,difference
ate,0.98


Unnamed: 0,p-value
t-test,0.21
mann_whitney,0.08
