In [28]:
import pandas as pd
import numpy as np

from main import set_ab_test 
from main import validate_ab_test_data
from main import test_hypothesis_relational
from main import test_hypothesis_continuous
from main import print_statistical_report
from main import save_report_to_html

In [29]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [30]:
EXPERIMENT_NAME = 'ab_price_discount'

exp_df = pd.DataFrame({
    'user_id': [1, 1, 2, 3, 3, 4, 5, 6],
    'dt': ['2021-01-01', '2021-01-02', '2021-01-01', '2021-01-02', '2021-01-03', '2021-01-02', '2021-01-05', '2021-01-01'],
    'group': [2, 1, 1, 2, 2, 2, 1, 3],
    'payment': [np.nan, 100, np.nan, 110, np.nan, 50, np.nan, 130],
    'clicks': [0, 11, 2, 9, 1, 4, 0, 15]
})

In [31]:
exp_df.describe()

Unnamed: 0,user_id,group,payment,clicks
count,8.0,8.0,4.0,8.0
mean,3.125,1.75,97.5,5.25
std,1.807722,0.707107,34.034296,5.700877
min,1.0,1.0,50.0,0.0
25%,1.75,1.0,87.5,0.75
50%,3.0,2.0,105.0,3.0
75%,4.25,2.0,115.0,9.5
max,6.0,3.0,130.0,15.0


In [32]:
m = set_ab_test(
    ab_test_name=EXPERIMENT_NAME, 
    dataframe=exp_df, 
    group_col='group', 
    date_col='dt', 
    uniq_id_col='user_id',
    control_group_name=1
)

In [33]:
validate_ab_test_data(EXPERIMENT_NAME)

Unnamed: 0,group,1,2,3
user_id,count,3.0,4.0,1.0
user_id,mean,2.666667,2.75,6.0
user_id,std,2.081666,1.258306,
user_id,min,1.0,1.0,6.0
user_id,25%,1.5,2.5,6.0
user_id,50%,2.0,3.0,6.0
user_id,75%,3.5,3.25,6.0
user_id,max,5.0,4.0,6.0
clicks,count,3.0,4.0,1.0
clicks,mean,4.333333,3.5,15.0


Unnamed: 0,group,user_id
0,1,5
1,2,1


In [34]:
test_hypothesis_relational(
    EXPERIMENT_NAME,
    nominator='payment',
    denominator='user_id',
    stat_test='chisquare', # ztest
    description='conversion to purchase',
    uniq_id_rel=True,
    significance_level=0.05
)

'test_hypothesis_relational'

In [35]:
test_hypothesis_relational(
    EXPERIMENT_NAME,
    nominator='clicks',
    denominator='user_id',
    stat_test='chisquare', # ztest
    description='CTR',
    uniq_id_rel=False,
    significance_level=0.05
)

'test_hypothesis_relational'

In [36]:
test_hypothesis_continuous(
    EXPERIMENT_NAME,
    value='payment',
    stat_test='ttest_welsh',
    description='ARPU (USD after fee)'
)

'test_hypothesis_continuous'

'not enough data to test "ARPU (USD after fee)   payment" hypothesis in groups 1-3'

In [37]:
print_statistical_report(EXPERIMENT_NAME)

'print_statistical_report'

Unnamed: 0_level_0,1,2,3,group 1-2 sign.,group 1-3 sign.
metrics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
conversion to purchase,0.333,0.333 (+0.00%),1.0 (+200.00%),- (H0 accepted),- (H0 accepted)
CTR,1.625,1.273 (-21.68%),2.5 (+53.85%),- (H0 accepted),- (H0 accepted)
ARPU (USD after fee),33.333,40.0 (+20.00%),,- (H0 accepted),


'Holm miltiple testing correction is applied'

Unnamed: 0,pval. 1-2,pval. 1-3,corrected pval. 1-2,corrected pval. 1-3
conversion to purchase,1.0,0.248,1.0,1.0
CTR,1.0,1.0,1.0,1.0
ARPU (USD after fee),0.882,,1.0,


In [38]:
save_report_to_html(EXPERIMENT_NAME)

'save_report_to_html'