# Using bayes package to calculate bayesian AB tests results
Trying to see if https://pypi.org/project/bayes-ab/ could help in calculating bayesian AB tests results
* Done with conversions -> BinaryDataTest
* TBD with RPU -> DeltaLognormalDataTest

---

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import scipy.stats as scs
from bayes_ab.experiments import BinaryDataTest

In [2]:
class Calculations(object):
    def __init__(self, visitors_A, conversions_A, visitors_B, conversions_B, test_duration, percent_traffic_in_test, aov, min_rev_yield):
        self.visitors_A = visitors_A
        self.conversions_A = conversions_A
        self.visitors_B = visitors_B
        self.conversions_B = conversions_B
        self.control_cr = conversions_A / visitors_A
        self.variant_cr = conversions_B / visitors_B
        self.relative_difference = self.variant_cr / self.control_cr - 1
        self.test_duration = test_duration
        self.percent_traffic_in_test = percent_traffic_in_test
        self.min_rev_yield = min_rev_yield
        self.aov = aov
        
    def calculate_probabilities(self):
        
        alpha_prior = 1
        beta_prior = 1
        samples = 50000

        test = BinaryDataTest()
        test.add_variant_data_agg("A", self.visitors_A,  self.conversions_A, a_prior=alpha_prior, b_prior=beta_prior)
        test.add_variant_data_agg("B", self.visitors_B,  self.conversions_B, a_prior=alpha_prior, b_prior=beta_prior)


        #test.evaluate(sim_count=samples, seed=314)
        test.evaluate(sim_count=samples)

        # access simulation samples and evaluation metrics
        results = test.data

        self.prob_A = results['A']['chance_to_beat']
        self.prob_B = 1 - self.prob_A
        
        #relative difference not absolute
        difference = results['B']['uplift_vs_a']

        six_months_in_days = 182.5
        visitors_in_six_months = (self.visitors_A + self.visitors_B) / (self.percent_traffic_in_test / 100) / self.test_duration * six_months_in_days
        
        self.expected_risk   = visitors_in_six_months * self.aov * results['B']['exp_loss']
        self.expected_uplift = visitors_in_six_months * self.aov * self.control_cr * difference

        
        self.total_contribution   =  self.expected_uplift * self.prob_B - self.expected_risk * self.prob_A 


In [3]:
#b = Calculations(visitors_A, conversions_A, visitors_B, conversions_B, test_duration, traffic_test, aov, min_rev_yield)
bayes = Calculations(5000, 1500, 5000, 1600, 14, 100, 100, 5000)
bayes.calculate_probabilities()
print("CR A:", f"{bayes.control_cr:.2%}", ", CR B:", f"{bayes.variant_cr:.2%}", ", relative difference:", f"{bayes.relative_difference:.2%}") 
print("Chance of being worse", f"{bayes.prob_A:.2%}", "chance of being best", f"{bayes.prob_B:.2%}")
print("Expected risk", f"${bayes.expected_risk:,.0f}", ", expected uplift", f"${bayes.expected_uplift:,.0f}", ", total contribution", f"${bayes.total_contribution:,.0f}")

+---------+--------+-----------+-------------+----------------+--------------------+---------------+----------------+------------------+
| Variant | Totals | Positives | Sample rate | Posterior rate | Chance to beat all | Expected loss | Uplift vs. "A" |     95% HDI      |
+---------+--------+-----------+-------------+----------------+--------------------+---------------+----------------+------------------+
|    B    |  5000  |    1600   |    32.00%   |     32.01%     |       98.51%       |     0.00%     |     6.66%      | [30.72%, 33.31%] |
|    A    |  5000  |    1500   |    30.00%   |     30.01%     |       1.49%        |     2.01%     |     0.00%      | [28.75%, 31.29%] |
+---------+--------+-----------+-------------+----------------+--------------------+---------------+----------------+------------------+ 

CR A: 30.00% , CR B: 32.00% , relative difference: 6.67%
Chance of being worse 1.49% chance of being best 98.51%
Expected risk $628 , expected uplift $260,532 , total contribut



## Bayesian calculations for RPU (TBD)

In [5]:
df = pd.read_csv('output/SampleRevenueData.csv')
df.describe()

Unnamed: 0,Control,Variant
count,9767.0,9726.0
mean,5.060612,4.063747
std,38.856811,25.932426
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,0.0
max,1204.0,617.0
