In [None]:
import pandas
import pymc3 as pm
import warnings
import arviz as az
from matplotlib import pyplot as plt

warnings.filterwarnings('ignore')

In [None]:
c_obs = 1300
v_obs = 1500
c_conversions = 273
v_conversions = 570


#group A
with pm.Model() as control:
    
    #Prior
    p_A=pm.Beta('Control', alpha = 1, beta= 1)  
    
    #fit the observed data
    obs=pm.Binomial("Observed", n=c_obs, p=p_A, observed=c_conversions)
    
    trace_control = pm.sample(1000)
    

with pm.Model() as variant:
    #Prior
    p_B=pm.Beta('Variant', alpha=1, beta=1)  
    
    #fit the observed data to our model 
    obs=pm.Binomial("Observed", n=v_obs, p=p_B, observed=v_conversions)
    
    trace_variant = pm.sample(1000)
    
    
    
    

In [None]:

plt.rcParams["figure.figsize"] = (20,4)
az.plot_posterior(trace_control)

In [None]:
samples_posterior_control=trace_control['Control']
samples_posterior_variant=trace_variant['Variant']



In [None]:
from matplotlib import pyplot as plt

plt.rcParams["figure.figsize"] = (20,4)
plt.hist(samples_posterior_control, bins=40, label='posterior of control', density=True)
plt.hist(samples_posterior_variant, bins=40, label='posterior of variant', density=True)
plt.xlabel('Value')
plt.ylabel('Density')
plt.title("Posterior distributions of the conversion rates of control and variant")
plt.legend()
plt.show()

In [None]:
difference=samples_posterior_variant-samples_posterior_control
plt.rcParams["figure.figsize"] = (20,4)
plt.hist(difference, bins=40, density=True)
plt.vlines(0.14, 0, 25, linestyle='--', color='red')
plt.title('Posterior distribution of the difference of the two means')
plt.show()

In [None]:
rel_difference=100*(samples_posterior_variant-samples_posterior_control)/samples_posterior_control
plt.rcParams["figure.figsize"] = (20,4)
plt.hist(rel_difference, bins=40, density=True)
plt.vlines(60, 0, 0.04, linestyle='--', color='red')
plt.title('Posterior distribution of the relative difference of the two means')
plt.xlabel("percentage")
plt.show()

In [None]:
100*len(rel_difference[rel_difference>50])*1.0/len(rel_difference)

In [None]:

print(f'Probability that Variant is better: {(samples_posterior_B > samples_posterior_A).mean():.1%}.')

In [None]:

az.plot_posterior(rel_difference, var_names=None, ref_val=0, kind = 'hist', hdi_prob = .95)

In [None]:

def check_significance(data, hdi_prob):
    
    #Not all values of hdi_prob area allowed
    
    
    lower = round(((1-(hdi_prob))/2)*100, 1)
    upper = 100*hdi_prob+lower
    
    summary = az.summary(data, hdi_prob=hdi_prob)
    
    assert summary.columns[2] == 'hdi_{}%'.format(lower)
    assert summary.columns[3] == 'hdi_{}%'.format(upper)
    
    
    print(
        not summary['hdi_{}%'.format(lower)][0] <= 0 <=  summary['hdi_{}%'.format(upper)][0], 
        summary['hdi_{}%'.format(lower)][0],
        summary['hdi_{}%'.format(upper)][0]
    )

    
    


In [None]:
check_significance(rel_difference, hdi_prob)

In [None]:
summary = az.summary(rel_difference, hdi_prob = 0.95)

#if 2 in range(summary['hdi_97.5%'][0],  summary['hdi_2.5%'][0]):
#    print('hell0')
    
summary['hdi_97.5%'][0] <= 0 <= summary['hdi_2.5%'][0]
    



In [None]:
summary['hdi_97.5%'][0]