# Bayesian A/B Test

In [None]:
import numpy as np
import pandas as pd

## Generate some data

In [None]:
blue_button_click_probability = .75
red_button_click_probability = .7

In [None]:
blue_button_clicks = np.random.binomial(n=1, p=blue_button_click_probability, size=250).astype(bool)

In [None]:
red_button_clicks = np.random.binomial(n=1, p=red_button_click_probability, size=250).astype(bool)

In [None]:
blue_button_clicks.mean()

In [None]:
red_button_clicks.mean()

### Create DataFrame

In [None]:
blue_button_data = pd.DataFrame({'button_clicked': blue_button_clicks, 'button_color': 'blue'})

In [None]:
red_button_data = pd.DataFrame({'button_clicked': red_button_clicks, 'button_color': 'red'})

In [None]:
data = pd.concat([blue_button_data, red_button_data])

### For the red button

#### Value counts

In [None]:
data[data['button_color'] == 'red']['button_clicked'].value_counts()

#### Click frequency

In [None]:
data[data['button_color'] == 'red']['button_clicked'].value_counts()[1] / (data[data['button_color'] == 'red']['button_clicked'].value_counts()[1] + data[data['button_color'] == 'red']['button_clicked'].value_counts()[0])

### For the blue button

#### Value counts

In [None]:
data[data['button_color'] == 'blue']['button_clicked'].value_counts()

#### Click frequency

In [None]:
data[data['button_color'] == 'blue']['button_clicked'].value_counts()[1] / (data[data['button_color'] == 'blue']['button_clicked'].value_counts()[1] + data[data['button_color'] == 'blue']['button_clicked'].value_counts()[0])

## Bayesian Inference

In [None]:
import pymc3 as pm

### Define random variables

In [None]:
basic_model = pm.Model()

with basic_model as model:
    p_click_red_button = pm.Uniform('p_click_red_button', lower=0, upper=1)
    p_click_blue_button = pm.Uniform('p_click_blue_button', lower=0, upper=1)
    
    relative_increase = pm.Deterministic('relative_increase', (p_click_red_button/p_click_blue_button) -1)

    obs_red_button = pm.Bernoulli('obs_red_button', p=p_click_red_button, observed=red_button_clicks)
    obs_blue_button = pm.Bernoulli('obs_blue_button', p_click_blue_button, observed=blue_button_clicks)

## Fit a model

In [None]:
#mcmc = pm.MCMC([p_click_red_button, p_click_blue_button, relative_increase, obs_red_button, obs_blue_button])
#mcmc.sample(20000, 10000)
with model:
    trace = pm.sample(20000)



In [None]:
trace.get_values("p_click_red_button")

## Visualize results

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt

### Individual posterior distributions

In [None]:
def plot_individual_posteriors(p_click_red_button_samples, p_click_blue_button_samples):
    plt.figure(figsize=(14.5, 6))
    ax = plt.subplot(211)

    plt.xlim(0, 1)
    plt.hist(p_click_red_button_samples, histtype='stepfilled', bins=25, alpha=0.85,
             label="Posterior of p_click_red_button", color="#A60628", normed=True)
    plt.legend(loc="upper left")
    plt.title("Posterior Distributions of p_click_red_button, p_click_blue_button")

    ax = plt.subplot(212)

    plt.xlim(0, 1)
    plt.hist(p_click_blue_button_samples, histtype='stepfilled', bins=25, alpha=0.85,
             label="Posterior of p_click_blue_button", color="#0a8ecd", normed=True)
    plt.legend(loc="upper left")

In [None]:
p_click_red_button_samples = trace.get_values("p_click_red_button")
p_click_blue_button_samples =trace.get_values("p_click_blue_button")

In [None]:
plot_individual_posteriors(p_click_red_button_samples, p_click_blue_button_samples)

### Relative increase distribution

In [None]:
def plot_relative_increase_posterior(relative_increase_samples):
    plt.figure(figsize=(14.5, 6))

    plt.hist(relative_increase_samples, histtype='stepfilled', bins=30, alpha=0.85,
             label=r"""$\frac{p_{click\_red}}{p_{click\_blue}} - 1$""", color="#7A68A6", normed=True)
    plt.axvline(0, color='k', linestyle='--')
    plt.title("""Relative Increase In Button Clicks, Red Button vs. Blue Button""", size=20)
    plt.legend(loc="upper right", fontsize=35)
    plt.tight_layout()

In [None]:
relative_increase_samples = trace.get_values("relative_increase")

plot_relative_increase_posterior(relative_increase_samples)

In [None]:
(relative_increase_samples < 0).mean()

In [None]:
fifth_percentile = np.percentile(relative_increase_samples, 5)

In [None]:
fifth_percentile

In [None]:
ninety_fifth_percentile = np.percentile(relative_increase_samples, 95)

In [None]:
ninety_fifth_percentile

In [None]:
(relative_increase_samples > .05).mean()