# Probabilistic Graphical Models - HWK 3 

## Paul Dufossé & Matthieu Mazzolini

In [None]:
import numpy as np
import pandas as pd

from scipy.stats import multivariate_normal as mvn

from plotly.offline import init_notebook_mode, iplot

init_notebook_mode()

import plotly.plotly as py
import plotly.graph_objs as go

In [None]:
data = pd.read_csv('EMGaussian.data', delim_whitespace=True, header=None, names=['x', 'y'])
test = pd.read_csv('EMGaussian.test', delim_whitespace=True, header=None, names=['x', 'y'])

1) We implement the alpha & beta recursions

In [None]:
#log(sum(exp)) function 
def lse(v):
    return np.log(np.exp(v).sum())

def compute_cond_proba(X, pi, mu, sigma):
    P =np.zeros((T, K))
    for t in range(T):
        for k in range(K):
            P[t, k] = mvn.pdf(X[t], mu[k], sigma[k])
    return P 

def log_alpha_rec(X, A, P, pi, mu, sigma):
    (T, p) = X.shape
    alpha = np.ones((T,K))
    # The LOG of the messages alpha are contained in the matrix Alpha. 
    # The t-th row corresponds to the time t
    # The k-th column corresponds to the case where the state takes the value k

    # Computation of the first alpha(q_0)
    for k in range(K):
        alpha[0,k] = np.log(P[0, k]) + np.log(pi[k])

    for t in range(1,T):
        for k in range(K):
            # Alpha message formula p9 chp 12.4 of the book
            log_proba_vec = alpha[t-1] + np.log(A[:,k])
            m = max(log_proba_vec)
            alpha[t, k] = m + np.log(P[t, k]) + lse(log_proba_vec - m)
            
    return alpha

def log_beta_rec(X, A, P, pi, mu, sigma):
    (T,p) = X.shape
    beta = np.ones((T,K))
    
    # Initialization of the last time T
    # Maybe it should be something else,
    for k in range(K):
        beta[T-1,k] = np.log(P[T-1, k]) + np.log(pi[k])
    
    for t in range(T-1)[::-1]:
        for k in range(K):
            # Beta message formula 12.30 p10 chp 12.4 of the book
            # This time there is no constant term because the conditional probability
            # depends on q_(t+1) the index of the sum
            
            # Therefore we have to run another loop to compute 
            # this cond probability for K values
            cond_proba = [np.log(P[t+1, j]) for j in range(K)]
            log_proba_vec = beta[t+1] + np.log(A[k,:]) + cond_proba
            m = max(log_proba_vec)
            beta[t, k] = lse(log_proba_vec-m) + m 
            
    return beta
    

Then we can compute the probabilities : gamma(qt) and p_qt_qt1 (qt1 stands for q(t+1))

In [None]:
X = np.array(data)
(T, p) = X.shape
K  =4
pi = 1.0/4 * np.ones(4)

A = np.eye(K)*(1/2-1/6) + np.ones((K,K))*1/6

#we are not sure of the parameters for the previous homework 
#so we chose to use the EM estimation from the scikit algorithm
from sklearn.mixture import gmm

gm = gmm.GMM(n_components=4, covariance_type='full').fit(X)
mu_ = gm.means_
sigma_ = gm.covars_

print(mu_)
print(sigma_)

In [None]:
def compute_filtering(log_alpha, log_beta):
    filtering = np.zeros((T, K))
    for t in range(T):
        ai = log_alpha[t, :] + log_beta[t, :]
        max_ai = np.max(ai)
        log_normalization = max_ai + lse(log_alpha[t, :] + log_beta[t, :] - max_ai)
        filtering[t, :] = np.exp(log_alpha[t, :] + log_beta[t, :] - log_normalization)
        filtering[t,:] /= np.sum(filtering[t,:])
    return filtering

In [None]:
#p_qt 

P = compute_cond_proba(X, pi, mu_, sigma_)

log_alpha = log_alpha_rec(X, A, P, pi, mu_, sigma_)
log_beta = log_beta_rec(X, A, P, pi, mu_, sigma_)
gamma = compute_filtering(log_alpha, log_beta)

#p_qt_qt1 

In [None]:

#Plot for the first 100 points and state 1 
#trace = 
data_plot = []
for k in range(K):
    trace=go.Scatter(
        x=np.arange(100),
        y=gamma[0:99,k]
    )

    data_plot.append(trace)
    iplot([trace], filename="plot")
    
iplot(data_plot, filename="plot")
   



From alpha and gamma we compute the ksi variables and will use them in the next EM algorithm 

In [None]:
def compute_log_ksi(qt, qt1, t, A, pi, mu, sigma):
    return log_alpha[t, qt]+np.log(P[t+1, qt1])+np.log(gamma(qt1))*np.log(A[qt, qt1]) -log_alpha[t+1, qt1]

3) You can see the estimation equations for the EM algo in annex 

4) We implement now the EM algorithm 

5)

Comments

6) We compute and plot the loglikelihoods 

7) You can see the description of Viterbi algorithm in annex

8) Now we implement a new method : the Viterbi decoding algorithm (or max-product algorithm)

In [None]:
def viterbi(X, A, pi, mu, sigma):
    T1=np.zeros((T, K))
    T2=np.zeros((T, K))
    
    for k in range(K):
        T1[1, k] = np.log(pi[k]) + mvn.logpdf(X[0], mu[k], sigma[k]) 
        
    for t in range(1,T):
        for k in range(K):
            T1[t,k] = np.max(T1[t-1,:] + np.log(A[k,:])) + mvn.logpdf(X[t], mu[k], sigma[k])
            T2[t,k] = np.argmax(T1[t-1,:] + np.log(A[k,:]))
            
    seq = np.zeros(T)
    seq[T-1] = np.argmax(T1[T-1,:]) 
    for t in range(T-1)[::-1]:
        seq[t-1] = T2[t, seq[t] ] 
        
    return seq

In [None]:
res = viterbi(X, A, pi, _mu_k, _sigma)

trace=go.Scatter(
    x=np.arange(100),
    y=res[0:99],
    name="Most probable hidden states"
)

layout=go.Layout(
    title="Most probable hidden states"
)

fig=go.Figure(
    data=[trace],
    layout=layout
)
iplot(fig, filename="plot for state 1")
