In [None]:
# Task List 7 - Beta-binomial model
import matplotlib.pyplot as plt
import scipy
from scipy.stats import binom, beta
import numpy as np
import pandas as pd

In [None]:
# Common
def norm(val):
    return val / sum(val)

def bb_model(theta, nb_positive, nb_negative, a, b):
    likelihood = norm(binom.pmf(k=nb_positive,
                                n=nb_positive+nb_negative,
                                p=theta))
    prior = norm(beta.pdf(x=theta, 
                          a=a,
                          b=b))
    posterior = norm(likelihood * prior)
    
    return prior, likelihood, posterior


def exact_posterior_beta_model(theta, nb_positive, nb_negative, a, b):
    exact_posterior = norm(beta.pdf(x=theta,
                                    a=nb_positive+a, 
                                    b=nb_negative+b))
    return exact_posterior
    

def calc_map(nb_positive, nb_negative, a, b):
    return (a + nb_positive - 1) / (a + b + nb_positive + nb_negative - 2)


In [None]:
# Exercise 1 - Beta-binomial model for coin tossing problem

def visualize_coin_tossing_bb_model(nb_heads, nb_tails, a, b):
    theta = np.linspace(0, 1, 100)

    prior, likelihood, posterior = bb_model(theta, nb_heads, nb_tails, a, b)
    exact_posterior = exact_posterior_beta_model(theta, nb_heads, nb_tails, a, b)
    MAP = calc_map(nb_heads, nb_tails, a, b)
    
    plt.xlabel('Theta')
    plt.plot(theta, prior, 
             color='green', 
             linestyle=':',
             label='Prior [Beta(%d, %d)]' % (a, b))
    plt.plot(theta, likelihood, 
             color='orange',
             linestyle=':',
             label='Likelihood [Binom(%d, %d)]' % (nb_heads, nb_tails))
    plt.plot(theta, posterior, 
             color='red', linestyle='--',
             label='Posterior = Likelihood * Prior')
    
    plt.plot(theta, exact_posterior, 
             color='blue',lw=5, alpha=0.5, 
             label='Exact Posterior [Beta (%d, %d)]' % (nb_heads + a, nb_tails + b))
    
    plt.axvline(MAP, 
                color='black', 
                label='MAP = %.3f' % MAP)
    
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


visualize_coin_tossing_bb_model(nb_heads=10, nb_tails=1,
                                a=2, b=2)

In [None]:
# Exercise 2 - Posterior predictive distribution of a future observation
def posterior_predictive(nb_positive, nb_negative, a, b):
    return (a + nb_positive)/(a + b + nb_positive + nb_negative)


def predict_future_observation(nb_heads, nb_tails, a, b):
    posterior = posterior_predictive(nb_heads, nb_tails, a, b)
    
    head_chance = np.round(100 * posterior, 2)
    tail_chance = np.round(100 * (1 - posterior), 2)
    
    if head_chance > tail_chance:
        predicted_outcome = "HEAD"
    elif head_chance == tail_chance:
        predicted_outcome = "HEAD/TAIL"
    else:
        predicted_outcome = "TAIL"
    
    print("Chance for head: %.2f %%, for tail: %.2f %%" % (head_chance, tail_chance))
    print("Therefore predicting \"%s\"" % predicted_outcome)
    

data = ['H', 'H', 'T', 'T', 'H', 'H', 'H']

nb_heads = data.count('H')
nb_tails = data.count('T')
prior_params = (2, 2)

predict_future_observation(nb_heads, nb_tails, *prior_params)

In [None]:
# Exercise 3 - Online learning
from matplotlib import animation, rc
from IPython.display import HTML

diabetes = pd.read_csv('https://gist.githubusercontent.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f/raw/819b69b5736821ccee93d05b51de0510bea00294/pima-indians-diabetes.csv',
                      skiprows=9, names=['Times pregnant', 'Glucose', 'Blood pressure', 'Skin fold', 'Insulin', 'BMI', 'Pedigree', 'Age', 'Class'])


In [None]:
class OnlineStreamLearningAlgoritm(object):
    def __init__(self, prior_a, prior_b, dataset):
        self.nb_positive = 0
        self.nb_negative = 0
        self.prior_a = prior_a
        self.prior_b = prior_b
        
        self.theta = np.linspace(0, 1, 1000)
        
        self.data_stream_iter = iter(dataset['Class'])
        self.prior = norm(beta.pdf(x=self.theta, a=prior_a, b=prior_b))
        self.posterior = None
        self.map_ = None
        
    def update_posterior(self, new_sample):
        if new_sample == 1:
            self.nb_positive += 1
        else:
            self.nb_negative += 1
        
        self.posterior = norm(binom.pmf(k=new_sample, n=1, p=self.theta) * self.prior)
        self.prior = self.posterior
        self.map_ = calc_map(self.nb_positive, self.nb_negative, self.prior_a, self.prior_b)
        
    def make_visualization(self, nb_steps):   
        def animate(i):
            if i == 0:
                return
            
            x = next(self.data_stream_iter)

            self.update_posterior(x)
            
            plt.clf()
            plt.plot(self.theta, self.posterior,
                     color='red',
                     label='Sick: %d\nHealthy: %d' % (self.nb_positive, self.nb_negative))
            plt.axvline(self.map_, 
                        linestyle='--',
                        color='blue',
                        label='MAP = %.3f' % self.map_)
            plt.legend(bbox_to_anchor=(0.70, 1), loc=2, borderaxespad=0.)
        
        anim = animation.FuncAnimation(plt.figure(), animate, 
                                       frames=range(nb_steps),
                                       interval=50)
        return anim
    

prior_a, prior_b = (2, 8)
data = diabetes[:20]
nb_steps = len(data)
anim = OnlineStreamLearningAlgoritm(prior_a, prior_b, data).make_visualization(nb_steps)
HTML(anim.to_jshtml())