In [3]:
import numpy as np
import h5py
import scipy

In [4]:
# Loading the data
with h5py.File('../HAR/preprocessed.hdf5','r') as hf:
    x_train = np.array(hf.get('x_train'))
    y_train = np.array(hf.get('y_train'))
    s_train = np.array(hf.get('s_train'))
    x_test = np.array(hf.get('x_test'))
    y_test = np.array(hf.get('y_test'))
    s_test = np.array(hf.get('s_test'))
    x_train_with_past = np.array(hf.get('x_train_with_past'))
    y_train_with_past = np.array(hf.get('y_train_with_past'))
    x_test_with_past = np.array(hf.get('x_test_with_past'))
    y_test_with_past = np.array(hf.get('y_test_with_past'))

In [8]:
s_train

array([ 1,  1,  1, ..., 30, 30, 30])

In [9]:
y_train

array([5, 5, 5, ..., 2, 2, 2])

In [10]:
np.sum(s_train == 1)

347

In [285]:
np.sum(transition_train, axis=0)

array([ 1.00833789,  0.99222336,  0.99488295,  0.99715828,  1.00285639,
        1.00454112])

In [310]:
# Learning a one component Gaussian over all the features
def compute_transition(y, alpha=0.1):
    '''
    Compute the transition matrice.
    Rows: states to
    cols: states from
    States are indexed starting from 1
    '''
    num_state = np.max(y)
    transition = alpha*np.ones((num_state, num_state))
    for i in xrange(y.shape[0]-1):
        transition[y[i+1]-1, y[i]-1] += 1
    # Normalisation (column should be normalized)
    transition /= np.sum(transition, axis=0)[:, np.newaxis]
    return transition

def compute_emission(x, y):
    '''
    Compute the parameters of the gaussian distribution
    of the emission given each state.
    We assume each emission distribution is independent,
    the covariance matrix is diagonal then.
    States are indexed starting from 1
    '''
    num_state = np.max(y)
    
    sigma_diag = np.zeros((num_state, x.shape[1]))
    mu = np.zeros((num_state, x.shape[1]))
    for s in xrange(num_state):
        x_s = x[(y == s+1), :]
        # Computing mu_s
        mu[s] = np.mean(x_s, axis=0)
        # Computing sigma_s (by column)
        sigma_diag[s] = np.std(x_s, axis=0)

    return mu, sigma_diag

def compute_logscore(data, log_transition, mu, sigma, C):
    y = np.zeros((C, C))
    for j in xrange(C):
        y[j, :] = np.log(compute_logB(data, mu, sigma_diag, j))

    return y + log_transition

def compute_logscore_pymc(data, log_transition, means, cov, C):
    y = np.zeros((C, C))
    for j in xrange(C):
        y[j, :] = scipy.stats.multivariate_normal.logpdf(data, mean=means[j], cov=covs[j])
    return y + log_transition

def viterbi(inputs, init, log_transition, mu, sigma, C):
    '''
    Evaluates the highest scoring sequence
    '''
    y = np.zeros((C, C))
    initial = np.zeros(C)

    initial[init] = 1
    initial = np.log(initial)

    n = inputs.shape[0]
    # To store the maxes
    max_table = np.zeros((n, C))
    backpointer_table = np.zeros((n, C))

    # first timestep
    # the initial most likely paths are the initial state distribution
    state_init = initial + compute_logscore(inputs[0,:], log_transition, mu, sigma, C)
    maxes = np.max(state_init, axis=1)
    backpointers = np.argmax(state_init, axis=1)
    max_table[0, :] = maxes

    for i in xrange(1, n):
        # COmputing the score
        y = compute_logscore(inputs[i, :], log_transition, mu, sigma, C)
        scores = y + np.repeat(maxes.reshape(1, C), C, axis=0)

        # compute new maxes
        maxes = np.max(scores, axis=1)
        backpointers = np.argmax(scores, axis=1)

        max_table[i, :] = maxes
        backpointer_table[i, :] = backpointers

    # follow backpointers to recover max path
    classes = np.zeros(n)
    classes[n-1] = np.argmax(maxes, axis=0)
    for i in xrange(n-1, 0, -1):
        classes[i-1] = backpointer_table[i, classes[i]]

    return classes

def standardize(x):
    '''
    Standardize each column of x
    '''
    x_std = np.std(x, axis=0)
    x_mu = np.mean(x, axis=0)
    
    return (x - x_mu)/x_std[np.newaxis, :]

def compute_accuracy(pred_classes, true_classes):
    '''
    Compute accuracy
    '''
    return np.sum(pred_classes == true_classes) /(1.*len(pred_classes))

def compute_logB(data_point, mu, sigma_diag, j):
    '''
    Compute log(p(x|s_j))
    '''
    return np.sum([scipy.stats.norm.logpdf(d, loc=mu[j, i], scale=sigma_diag[j, i]) for i, d in enumerate(data_point)])

def compute_B(data_point, mu, sigma_diag, j):
    '''
    Compute p(x|s_j)
    '''
    return np.prod([scipy.stats.norm.pdf(d, loc=mu[j, i], scale=sigma_diag[j, i]) for i, d in enumerate(data_point)])

In [176]:
C=6
init = 4
y = np.zeros((C, C))
initial = np.zeros(C)

initial[init] = 1
initial = np.log(initial)
print(initial)

state_init = initial + compute_logscore(x_standard[0,:], log_transition_train, mu, sigma_diag, C)
print(state_init)
print(np.max(state_init, axis=1))

[-inf -inf -inf -inf   0. -inf]
[[        -inf         -inf         -inf         -inf -10.65349168
          -inf]
 [        -inf         -inf         -inf         -inf -14.8395688
          -inf]
 [        -inf         -inf         -inf         -inf -12.82739194
          -inf]
 [        -inf         -inf         -inf         -inf  -7.88384383
          -inf]
 [        -inf         -inf         -inf         -inf  -0.84924053
          -inf]
 [        -inf         -inf         -inf         -inf -20.8956498
          -inf]]
[-10.65349168 -14.8395688  -12.82739194  -7.88384383  -0.84924053
 -20.8956498 ]




## 1) Sequence Prediction

### Subsample + MLE

In [300]:
# We retain 6 features (known to be independent)

x = np.concatenate((x_train[:, :3], x_train[:, 41:44]), axis=1)
x_sub_test = np.concatenate((x_test[:, :3], x_test[:, 41:44]), axis=1)
print(x.shape)

(7352, 6)


In [301]:
# Learning the HMM

# standardization
x_standard = standardize(x)
print(x_standard.shape)

# ### TRANSITION
transition_train = compute_transition(y_train)
log_transition_train = np.log(transition_train)
print(transition_train.shape)

# ### EMISSION
mu, sigma_diag = compute_emission(x_standard, y_train)
print(mu.shape)
print(sigma_diag.shape)

(7352, 6)
(6, 6)
(6, 6)
(6, 6)


In [304]:
%%time
# Sequence prediction
C = 6
sample_size = 3000
seq_pred = viterbi(x_standard[:sample_size,:], 4, log_transition_train, mu, sigma_diag, C)
# Shifting the index of 1
seq_pred += 1
print 'ACCURACY train: {}'.format(compute_accuracy(seq_pred, y_train[:sample_size]))



ACCURACY train: 0.876666666667
CPU times: user 7.27 s, sys: 67.8 ms, total: 7.34 s
Wall time: 7.91 s




In [293]:
%%time
x_sub_test_standard = standardize(x_sub_test)
seq_pred_test = viterbi(x_sub_test_standard[:sample_size,:], 4, log_transition_train, mu, sigma_diag, C)
seq_pred_test += 1
print 'ACCURACY test: {}'.format(compute_accuracy(seq_pred_test, y_test[:sample_size]))



ACCURACY test: 0.768917543264
CPU times: user 7.64 s, sys: 97.1 ms, total: 7.73 s
Wall time: 8.47 s


In [193]:
print seq_pred_test[:100]

[ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.
  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.
  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.
  5.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.
  6.  6.  6.  6.  6.  6.  6.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]


### Subsample + pyMC

In [220]:
# We use here the model fited by PyMc
means = []
covs = []

with h5py.File('../HAR/means_covs.hdf5', "r") as f:
    for k in xrange(6):
        means.append(np.array(f.get('mu_{}'.format(k+1))))
        # Pymc provides tau, cov = (tau)^{-1}
        covs.append(np.linalg.pinv(np.array(f.get('cov_{}'.format(k+1)))))
means = np.array(means)
covs = np.array(covs)

In [221]:
%%time
# Sequence prediction
C = 6
sample_size = 1000
seq_pred = viterbi(x_standard[:sample_size,:], 4, log_transition_train, means, covs, C)
# Shifting the index of 1
seq_pred += 1
print 'ACCURACY train: {}'.format(compute_accuracy(seq_pred, y_train[:sample_size]))



CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 16 µs
ACCURACY train: 0.278


### More features

# Forward Backward algorithm

In [322]:
print(compute_B(x[0], mu, sigma_diag, 4))
print(compute_logB(x[0], mu, sigma_diag, 4))

0.0364018687618
-3.31313516605


In [329]:
def forward(x, init, end, transition, mu, sigma_diag):
    '''
    Compute p(X|lambda) with lambda the HMM parameters
    (here transition, mu, sigma_diag)
    alpha[t, j] = p(x_1, ... x_t, S(t) = s_j|lambda)
    NB:
        alpha[0, :] used only as initialization
    '''
    C = mu.shape[0]
    T = x.shape[0]
    # Initialization
    alpha = np.zeros((T, C))
    alpha[0, init] = 1
    
    # Recursion
    for t in xrange(1, T):
        for j in xrange(C):
            b_t_j = compute_B(x[t], mu, sigma_diag, j)
            alpha[t, j] = b_t_j*np.sum([alpha[t-1, i]*transition[j,i] for i in xrange(C)])
    
    # Termination
    alpha[T, end] = np.sum([alpha[T-1, i]*transition[i,end] for i in xrange(C)])
    
    return alpha

def backward(x, init, end, transition, mu, sigma_diag):
    '''
    Compute the backward probabilities.
    beta[t, j] = p(x_{t+1}, ..., x_{T}| S(t) = s_j, lambda)
    '''
    C = mu.shape[0]
    T = x.shape[0]
    # Initialization
    beta = np.zeros((T, C))
    for i in xrange(C):
        beta[T-1, i] = transition[end, i]
    
    # Recursion
    for t in xrange(T-2, 0, -1):
        for j in xrange(C):
            for i in xrange(C):
                b_t1_i = compute_B(x[t+1], mu, sigma_diag, i)
                beta[t, j] += b_t1_i * beta[t+1, i] * transition[j,i]
    
    # Termination
    for i in xrange(C):
        b_1_i = compute_B(x[0], mu, sigma_diag, i)
        beta[0, init] += b_1_i * beta[1, i] * transition[i,init]
        
    return beta

def compute_state_probability(alpha, beta, end):
    gamma = np.multiply(alpha, beta)
    return gamma/alpha[-1, end]

def compute_state_transition(x, alpha, beta, transition, mu, sigma_diag):
    T = x.shape[0]
    C = mu.shape[0]
    psi = np.zeros((T, C, C))
    for t in xrange(T-1):
        for i in xrange(C):
            for j in xrange(C):
                b_t_j = compute_B(x[t+1], mu, sigma_diag, j)
                psi[t, i, j] = alpha[t, i]*transition[i, j]*beta[t+1, j]*b_t_j
    
    return psi/alpha[-1, end]
    
    
def forward_backward(x, init, end, transition, mu, sigma_diag, n_iterations):
    '''
    EM algorithm to fit an HMM to a sequence of observation.
    Take as argnument an initial HMM and returns a finer one.
    '''
    # E-step: estimate the state occupation probabilities
    alpha = forward(x, init, end, transition, mu, sigma_diag)
    beta = backward(x, init, end, transition, mu, sigma_diag)
    state_probability = compute_state_probability(alpha, beta, end)
    state_transition = compute_state_transition(x, alpha, beta, transition, mu, sigma_diag)
    
    # M-step: re-estimate HMM parameers
    denom = np.sum(gamma, axis=0)[np.newaxis, :]
    mu = np.dot(state_probability.T, x)/denom
    for j in xrange(C):
        diff = x - mu[j]
        sigma_diag[:, j] = np.std(np.multiply(np.sqrt(np.abs(state_probability[:,j])), diff), axis=1)
    sigma_diag /= denom
    for i in xrange(C):
        for j in xrange(C):
            transition[i, j] = np.sum(state_transition[:, i, j])
        transition[i,:] /= np.sum(state_transition[:, i, :])
    
    return transition, mu, sigma_diag
    
    

In [314]:
%%time
alpha = forward(x_standard, 4, 1, transition_train, mu, sigma_diag)

CPU times: user 18.4 s, sys: 150 ms, total: 18.6 s
Wall time: 19.6 s


In [319]:
alpha[1]

array([  2.36182301e-05,   3.59469350e-07,   2.68617598e-06,
         3.76781984e-04,   4.27428492e-01,   8.41655943e-10])

In [330]:
%%time
beta = backward(x_standard, 4, 1, transition_train, mu, sigma_diag)

CPU times: user 1min 48s, sys: 966 ms, total: 1min 49s
Wall time: 1min 54s


In [332]:
beta[-100]

array([  6.63621964e-206,   1.80264607e-208,   1.95977922e-208,
         1.68171074e-207,   1.40660423e-208,   1.87373755e-204])