In [63]:
import numpy as np
import h5py
import scipy
from scipy import misc,optimize
import marshal
import copy

In [79]:
# Loading the data
with h5py.File('../HAR/preprocessed.hdf5','r') as hf:
    x_train = np.array(hf.get('x_train'))
    y_train = np.array(hf.get('y_train'))
    s_train = np.array(hf.get('s_train'))
    x_test = np.array(hf.get('x_test'))
    y_test = np.array(hf.get('y_test'))
    s_test = np.array(hf.get('s_test'))
    x_train_with_past = np.array(hf.get('x_train_with_past'))
    y_train_with_past = np.array(hf.get('y_train_with_past'))
    x_test_with_past = np.array(hf.get('x_test_with_past'))
    y_test_with_past = np.array(hf.get('y_test_with_past'))

# Pretrained scores for use in CRF feature functions
# These scores were trained using the nn.lua script which contains very similar functions to the ones
# in the MEMM iTorch notebook
with h5py.File('crf.features.hdf5','r') as crff:
    # T x C' x C matrix where:
    # T = Number of time steps in the sequence
    # C' = Label of previous step
    # C = Label of current step
    # Note that C' = C = 6
    scores_train = np.array(crff.get('scores_train'))
    scores_test = np.array(crff.get('scores_test'))

In [68]:
print 'scores_train.shape'
print scores_train.shape
print 'scores_test.shape'
print scores_test.shape
print 's_train'
print s_train
print 'x_test.shape'
print x_test.shape
print 'x_train.shape'
print x_train.shape
print 'y_train.shape'
print y_train.shape

scores_train.shape
(7351L, 6L, 6L)
scores_test.shape
(2946L, 6L, 6L)
s_train
[ 1  1  1 ..., 30 30 30]
x_test.shape
(2947L, 561L)
x_train.shape
(7352L, 561L)
y_train.shape
(7352L,)


In [69]:
def convert(T, slice_features = False):
    '''
    Convert data into a list of sequences of length T
    '''
    if slice_features:
        x_trn = np.concatenate((x_train[:, :3], x_train[:, 41:44]), axis=1)
        x_tst = np.concatenate((x_test [:, :3], x_test [:, 41:44]), axis=1)
    else:
        x_trn = x_train
        x_tst = x_test
        
    X_train_list = [x_trn  [i*T:(i+1)*T,:] for i in range(x_trn.shape  [0] / T + 1)]
    Y_train_list = [y_train[i*T:(i+1)*T]-1 for i in range(y_train.shape[0] / T + 1)]

    X_test_list = [x_tst [i*T:(i+1)*T,:] for i in range(x_tst.shape [0] / T + 1)]
    Y_test_list = [y_test[i*T:(i+1)*T]-1 for i in range(y_test.shape[0] / T + 1)]

    print 'X_train_list = {} sequences of size {}'.format(len(X_train_list), X_train_list[0].shape)
    print 'Y_train_list = {} sequences of size {}'.format(len(Y_train_list), Y_train_list[0].shape)
    print 'X_test_list  = {} sequences of size {}'.format(len(X_test_list) , X_test_list[0].shape)
    print 'Y_test_list  = {} sequences of size {}'.format(len(Y_test_list) , Y_test_list[0].shape)
    
    return X_train_list, Y_train_list, X_test_list, Y_test_list

In [80]:
class CRF:
    def __init__(self,C,sigma=10):
        self.v = sigma ** 2
        self.v2 = self.v * 2
        self.C = C # number of classes
        self.theta  = np.random.randn(1)

    def get_features(self, x, ind, training = True):
        '''
        x: 2D feature array of size T x D
        ind: Index of this sequence in the list
        training: Indicates whether in training or testing mode
        return: 4D array of size T x C x C x 1 where dimension:
                0 = T or time or sequence index
                1 = y' or previous label
                2 = y  or current  label
                3 = feature
        '''
        result = np.zeros((len(x), self.C, self.C, 1))
        score_offset = ind * x.shape[0] - 1
        if training:
            scores = scores_train
        else:
            scores = scores_test
        for i in range(len(x)):
            current_features = x[i]
            for j in range(self.C):
                for k in range(self.C):
                    if i == 0 and ind == 0:
                        # first time step so no previous information
                        result[i,j,k] = 0
                    else:
                        # use pre-trained features
                        result[i,j,k] = scores[score_offset + i, j, k]
        return result
    
    def get_features_labels(self, X_list, Y_list):
        '''
        Get features and labels.
        X_list: List of features sequence
        Y_list: List of label sequence
        '''
        observations = [ self.get_features(X_list[i], i) for i in range(len(X_list)) ]
        labels = len(Y_list) * [None]
        for i in range(len(Y_list)):
            # start from end of previous sequence
            start = Y_list[i-1][-1] if i > 0 else Y_list[i][0]
            end = Y_list[i+1][0] if i < len(Y_list) - 1 else Y_list[i][-1]
            labels[i] = np.array([start] + list(Y_list[i]) + [end])
        return (observations, labels)
        
    def regularizer(self,w):
        return np.sum(w ** 2) /self.v2

    def regularizer_deriv(self,w):
        return np.sum(w) / self.v

    def log_dot_vm(self, loga, logM):
        '''
        Add loga to each column of logM then perform logsumexp.
        loga: 1D array of size C
        logM: 2D array of size C x C
        '''
        return misc.logsumexp(loga.reshape(loga.shape + (1,)) + logM, axis=0)
    
    def log_dot_mv(self, logM, logb):
        '''
        Add logb to each row of logM then perform logsumexp.
        logM: 2D array of size C x C
        logb: 1D array of size C
        '''
        return misc.logsumexp(logM + logb.reshape((1,) + logb.shape), axis=1)

    def forward(self, M, start=0):
        '''
        M: 3D matrix of size (T + 1) x C x C
        '''
        alphas = np.NINF * np.ones((M.shape[0],M.shape[1])) # (T + 1) x C
        alpha  = alphas[0] # 1 x C
        alpha[start] = 0
        for i in range(M.shape[0]-1):
            alpha = alphas[i+1] = self.log_dot_vm(alpha, M[i])
        alpha = self.log_dot_vm(alpha,M[-1])
        return (alphas,alpha)

    def backward(self, M, end=-1):
        '''
        M: 3D matrix of size (T + 1) x C x C
        '''
        betas = np.zeros((M.shape[0],M.shape[1])) # (T + 1) x C
        beta  = betas[-1]
        beta[end] = 0
        for i in reversed(range(M.shape[0]-1)):
            beta = betas[i] = self.log_dot_mv(M[i+1], beta)
        beta = self.log_dot_mv(M[0],beta)
        return (betas,beta)

    def neg_likelihood_and_deriv(self, x_vec_list, y_vec_list, theta, debug=False):
        '''
        Compute negative log-likelihood and derivative for use in L-BFGS optimizer.
        '''
        likelihood = 0
        derivative = np.zeros(len(self.theta))
        for x_vec,y_vec in zip(x_vec_list,y_vec_list):
            features        = x_vec # T x C x C x 1
            length          = x_vec.shape[0]
            yp_vec_ids      = y_vec[:-2]
            y_vec_ids       = y_vec[2:]
            log_M           = np.dot(features,theta) # T x C x C 
            log_alphas,last = self.forward(log_M, y_vec[0]) # alphas: T x C
            log_betas, zero = self.backward(log_M, y_vec[-1]) # betas: T x C
            time,state      = log_alphas.shape

            # reshape
            log_alphas1 = log_alphas.reshape(time,state,1)
            log_betas1  = log_betas.reshape(time,1,state)
            log_Z       = misc.logsumexp(last) # partition function
            log_probs   = log_alphas1 + log_M + log_betas1 - log_Z # P(y',y|x)
            #                 TxCx1     TxCxC      Tx1xC      1
            log_probs   = log_probs.reshape(log_probs.shape+(1,)) # T x C x C x 1
            
            """
            log_probs:    T x C x C x 1
            features:     T x C x C x D'
            exp_features: D'
            emp_features: D'
            yp_vec_ids  : T
            y_vec_ids   : T
            """
            # Compute expected value of feature functions under empirical distribution
            exp_features = np.sum( np.exp(log_probs) * features, axis = (0,1,2) )
            # Compute expected value of feature functions under model distribution
            emp_features = np.sum( features[ range(length), yp_vec_ids, y_vec_ids ], axis = 0 )
            
            likelihood += np.sum(log_M[ range(length), yp_vec_ids, y_vec_ids ]) - log_Z
            derivative += emp_features - exp_features
        
        print 'likelihood = {}'.format(likelihood - self.regularizer(theta))
        return (
            - ( likelihood - self.regularizer(theta)), 
            - ( derivative - self.regularizer_deriv(theta))
        )
    
    def predict(self, x_vec, ind, debug=False):
        # small overhead, no copying is done
        """
        features:       len(x_vec+1) x Y' x Y x K
        log_potential:  len(x_vec+1) x Y' x Y
        argmaxes:       len(x_vec+1) x Y'
        """
        features  = self.get_features(x_vec, ind, training = False)
        log_potential = np.dot(features,self.theta)
        return self.viterbi_bp(log_potential,len(x_vec),self.C)
    
    def viterbi_bp(self, log_score, N, K, debug=False):
        '''
        Viterbi with backpointers to find most likely sequence labeling.
        '''
        g0 = log_score[0,0]
        g  = log_score[1:]

        B = np.ones((N,K), dtype=np.int32) * -1
        # compute max-marginals and backtrace matrix
        V = g0
        for t in xrange(1,N):
            U = np.empty(K)
            for y in xrange(K):
                w = V + g[t-1,:,y]
                B[t,y] = b = w.argmax()
                U[y] = w[b]
            V = U
        # extract the best path by brack-tracking
        y = V.argmax()
        trace = []
        for t in reversed(xrange(N)):
            trace.append(y)
            y = B[t, y]
        trace.reverse()
        return trace

    def train(self,x_seq,y_seq,debug=False):
        '''
        x_seq: List of 2D feature array of size T x D
        y_seq: List of 1D label array of size T
        '''
        X,Y = self.get_features_labels(x_seq,y_seq)
        l = lambda theta: self.neg_likelihood_and_deriv(X,Y,theta,debug)
        val = optimize.fmin_l_bfgs_b(l,self.theta)
        if debug: print val
        self.theta,_,_  = val
        return self.theta

Break original time series data into chunks of size $T$

In [81]:
T = 100 # number of time steps in each sequence
X_train_list, Y_train_list, X_test_list, Y_test_list = convert(T)

C = 6 # number of classes
crf = CRF(C)
crf.train(X_train_list, Y_train_list, debug=False)

total_correct = 0
total_count = 0
Y_hat_list = []
for i in range(len(X_test_list)):
    Y_hat = crf.predict(X_test_list[i], i)
    total_correct += np.sum(Y_hat == Y_test_list[i])
    total_count += len(Y_test_list[i])
    Y_hat_list.append(Y_hat)
print 'Accuracy = {0:.2f}%'.format(float(total_correct) * 100 / total_count)

X_train_list = 74 sequences of size (100L, 561L)
Y_train_list = 74 sequences of size (100L,)
X_test_list  = 30 sequences of size (100L, 561L)
Y_test_list  = 30 sequences of size (100L,)
likelihood = -18922.7071363
likelihood = -2555.98463968
likelihood = -2521.5400147
likelihood = -2419.78342679
likelihood = -2406.15088929
likelihood = -2396.33634711
likelihood = -2396.05295878
likelihood = -2396.05004282
likelihood = -2396.05004186
Accuracy = 95.35%
