# AB test


## 0. Import libraries

In [1]:
import numpy as np
import pandas as pd

from lightautoml.addons.hypex import ABTest
from lightautoml.addons.hypex.utils.tutorial_data_creation import create_test_data

pd.options.display.float_format = '{:,.2f}'.format

np.random.seed(42)  # needed to create example data

## 1. Create or upload your dataset
In this case we will create random dataset with known effect size  
If you have your own dataset, go to the part 2 

In [2]:
data = create_test_data(num_users=10000, rs=52, na_step=10, nan_cols=['age', 'gender'])
data

Unnamed: 0,user_id,signup_month,treat,pre_spends,post_spends,age,gender,industry
0,0,0,0,488.00,414.44,,M,E-commerce
1,1,8,1,512.50,462.22,26.00,,E-commerce
2,2,7,1,483.00,479.44,25.00,M,Logistics
3,3,0,0,501.50,424.33,39.00,M,E-commerce
4,4,1,1,543.00,514.56,18.00,F,E-commerce
...,...,...,...,...,...,...,...,...
9995,9995,10,1,538.50,450.44,42.00,M,Logistics
9996,9996,0,0,500.50,430.89,26.00,F,Logistics
9997,9997,3,1,473.00,534.11,22.00,F,E-commerce
9998,9998,2,1,495.00,523.22,67.00,F,E-commerce


## 2. AB-test

### 2.0 Data
Let's correct data to see how AB-test works

In [3]:
data_ab = data.copy()

half_data = int(data.shape[0] / 2)
data_ab['group'] = ['test'] * half_data + ['control'] * half_data
data_ab.head(3)

Unnamed: 0,user_id,signup_month,treat,pre_spends,post_spends,age,gender,industry,group
0,0,0,0,488.0,414.44,,M,E-commerce,test
1,1,8,1,512.5,462.22,26.0,,E-commerce,test
2,2,7,1,483.0,479.44,25.0,M,Logistics,test


### 3.1 Full AB-test

Full (basic) version of test includes calculation of all available metrics, which are: "diff in means", "diff in diff" and "cuped"<br>
Pay attention, that for "cuped" and "diff in diff" metrics required target before pilot.

In [4]:
model = ABTest()
results = model.execute(
    data=data_ab,
    target_field='post_spends',
    target_field_before='pre_spends',
    group_field='group'
)
results

{'size': {'test': 5000, 'control': 5000},
 'difference': {'ate': 1.108044444444488,
  'medain_diff': 0.16666666666668561,
  'cuped': 0.897496915890514,
  'diff_in_diff': 0.610344444444479},
 'p-value': {'t-test': 0.15973563889393272,
  'mann_whitney': 0.11494755666097989}}

### 2.2 Simple AB-test
To estimate effect without target data before pilot `calc_difference_method='ate'` can be used - effect will be estimated with "diff in means" method

In [5]:
model = ABTest(calc_difference_method='ate')
model.execute(data=data_ab, target_field='post_spends', group_field='group')

{'size': {'test': 5000, 'control': 5000},
 'difference': {'ate': 1.108044444444488},
 'p-value': {'t-test': 0.15973563889393272,
  'mann_whitney': 0.11494755666097989}}