In [80]:
import numpy as np
import pandas as pd
import sklearn.preprocessing as preprocessing
import theano.tensor as T
from theano import function, printing
import theano

from theano import config
# config.device = 'cpu'
config.mode = 'DebugMode'
# config.gcc.cxxflags = "-D_hypot=hypot"
config.compute_test_value = 'warn'


# updates = OrderedDict()

In [2]:
def calculate_AER(S, P, A):
    S, P, A = np.array(S), np.array(P), np.array(A)
    s_a, p_a, len_s, len_a = 0, 0, 0, 0
    for s, p, a in zip(S, P, A):
        s_a += len(list(set(s).intersection(a)))
        p_a += len(list(set(p).intersection(a)))
        len_s += len(s[s != ""])
        len_a += len(a[a != ""])
    print ("s_a", s_a)
    p_a += s_a
    print ("p_a", p_a)
    aer = (s_a + p_a) / (len_s + len_a)
    print ("aer", 1.-aer)
    
    return 1. - aer 


def calculate_one_AER(S, P, A):
    S, P, A = np.array(S), np.array(P), np.array(A)
    s_a = len(list(set(S).intersection(A)))
    print ("s_a", s_a)
    p_a = len(list(set(P).intersection(A))) + s_a
    print ("p_a", p_a)
    aer = (s_a + p_a) / (len(S[S != ""]) + len(A[A != ""]))
    print ("aer", 1.-aer)
    
    return 1. - aer 
    
def write_file(strs, file_name):
    alignment_test = open(file_name,"w", encoding='utf8') 
    for s in strs:
        alignment_test.write(s + "\n") 
    alignment_test.close()

In [3]:
X = np.arange(9)
np.split(X, 3)

[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8])]

In [11]:
n_labels = 5
target_vector = [2, 4, 0]
np.eye(n_labels)[target_vector].T

array([[ 0.,  0.,  1.],
       [ 0.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  1.,  0.]])

In [126]:
class EmissionModel:
    """ Simple emission model without CNN
    word embedding layer -> ReLU layer -> softmax layer
    """
    
    def init_weights_bias(self, vocab_input_size, layer_size, output_size, seed=1402):
        random_state = np.random.RandomState(seed)
        
        size_list = np.concatenate(([vocab_input_size], layer_size, [output_size]), axis=0)
        w = []
        b = []
        
        for i in range(len(size_list) - 1):
            w.append(theano.shared(
                    value=np.asarray(
                        random_state.uniform(low=-1.0, high=1.0, size=(size_list[i+1], size_list[i])), 
                        dtype=theano.config.floatX
                    ), borrow=True
            ))
            b.append(theano.shared(
                    value=np.asarray(
                        random_state.uniform(low=-1.0, high=1.0, size=(size_list[i+1], 1)), 
                        dtype=theano.config.floatX
                    ), 
                    borrow=True,
                    broadcastable=(False,True)
            ))
        
        return w, b
    
    #[7,512]
    def __init__(self, vocab_input_size, layer_size, vocab_output_size, baum_welch_model, 
                 epoch=1, batch=1, learning_rate = .01, seed=1412):
        
        self.epoch = epoch
        self.batch = batch
        self.learning_rate = learning_rate
        self.seed = seed
        self.posteriors = []
        self.baum_welch_model = baum_welch_model
        
        self.vocab_input_size = vocab_input_size
        self.d_embedding_size = layer_size[0]
        
        x_training_input = T.matrix().astype(config.floatX)
        x_training_input.tag.test_value = np.asarray([
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  1.,  0.],
            [ 0.,  0.,  1.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  1.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 1.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  1.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.]
        ]).astype(x_training_input.dtype)
        
        self.w, self.b = self.init_weights_bias(vocab_input_size, layer_size, vocab_output_size, seed)
        
        # Word embedding layer
        word_embedding_layer = T.dot(self.w[0], x_training_input) # [7, 10] * [10, 5] = [7, 5]
        
        # ReLU layer
        z_relu_layer = T.dot(self.w[1], word_embedding_layer) + self.b[1] # [512, 7] * [7, 5] = [512, 5]
        z_relu_layer_shape = T.shape(z_relu_layer)
        z_reshaped_relu_layer = T.reshape(z_relu_layer, [z_relu_layer_shape[0]*z_relu_layer_shape[1], 1])
        relu_layer = T.nnet.relu(z_reshaped_relu_layer)
        relu_layer_reshaped = T.reshape(relu_layer, z_relu_layer_shape) # [512, 5]
        
        # Softmax layer
        z_softmax_layer = T.dot(self.w[2], relu_layer_reshaped) + self.b[2] # [12, 512] * [512, 5] = [12, 5]
        softmax_layer = T.transpose(T.nnet.softmax(T.transpose(z_softmax_layer))) # Output: [12, 5]
        
        # Calculate new gradient
        posteriors = T.matrix().astype(config.floatX)
        posteriors.tag.test_value = np.asarray([
            [-0.15,  0.04, -0.26, -0.61, -0.93, -0.72, -0.15, -0.62,  0.62, 0.24, 0.71, 0.81],
            [ 0.07,  0.42,  0.11,  0.95, -0.86, -0.17, -0.22, -0.69, -0.55, 0.11, 0.37, 0.18],
            [-0.79,  0.3 ,  0.06, -0.79,  0.71,  0.86, -0.58,  0.38,  0.05, 0.62, 0.17, 0.29],
            [ 0.92, -0.33, -0.63,  0.99,  0.67, -0.79, -0.08,  0.64, -0.51, 0.19, 0.67, 0.52],
            [-0.08, -0.29,  0.87,  0.6 ,  0.31,  0.75,  0.38, -0.42,  0.11, 0.44, 0.37, 0.14]
        ]).astype(posteriors.dtype)
        
        cost = T.sum(T.transpose(posteriors) * T.log(softmax_layer))
        # TODO: use dw[] and db[] abstractly 
        dw0,dw1,dw2,db1,db2 = T.grad(
            cost=cost, wrt=[self.w[0],self.w[1],self.w[2],self.b[1],self.b[2]]
        )

        # Update w and b
        updates = [
            (self.w[0], self.w[0] - self.learning_rate * dw0), 
            (self.w[1], self.w[1] - self.learning_rate * dw1), 
            (self.b[1], self.b[1] - self.learning_rate * db1),
            (self.w[2], self.w[2] - self.learning_rate * dw2), 
            (self.b[2], self.b[2] - self.learning_rate * db2)
        ]
        
        # Compile model
        self.test = theano.function(
            inputs=[x_training_input, posteriors], 
            outputs=[dw1, softmax_layer]
        ) 
        self.train_mini_batch_function = theano.function(
            inputs=[x_training_input, posteriors], 
            outputs=softmax_layer, 
            updates=updates
        )
        self.test_values = theano.function(
            inputs=[x_training_input], 
            outputs=softmax_layer
        )
    
    def train_mini_batch(self, testing_target, testing_source):
        one_hot_input = np.eye(self.vocab_input_size)[testing_target].T
        one_hot_input = np.asarray(one_hot_input).astype(config.floatX)
        print("one_hot_input", one_hot_input, np.shape(one_hot_input))
        emission_matrix = self.test_values(one_hot_input)
        print("emission_matrix 1", emission_matrix, np.shape(emission_matrix))
        
        emission_posterior_vout = np.zeros_like(emission_matrix.T) # [V_f_size, e_size]
        new_emission_matrix = [] # [f_size, e_size]
        for indice in testing_source:
            new_emission_matrix.append(emission_matrix[indice])
        print("new_emission_matrix", new_emission_matrix, np.shape(new_emission_matrix))
        emission_posterior, transition_posterior = \
            baum_welch_model.calculate_baum_welch_posteriors(len(testing_target), np.transpose(new_emission_matrix))
        print("emission_posterior", emission_posterior, np.shape(emission_posterior))
        
        # transform emission size to [target_size, v_out]
        for i, indice in enumerate(testing_source):
            emission_posterior_vout[:, indice] = emission_posterior[:, i]
        print("emission_posterior_vout", emission_posterior_vout, np.shape(emission_posterior_vout))
        
        return emission_posterior_vout
#         return self.train_mini_batch_function(one_hot_input, np.asarray(emission_posterior).astype(config.floatX))
        
    def train_model(inputs):
        pass
#         for i in range(self.epoch):
#             for x_input in np.split(inputs, self.batch):
#                 self.posteriors = 
#                 self.train_mini_batch(x_input, posteriors)
            # TODO: create train_batch function 

x = np.asarray([
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]
    ]).astype(config.floatX)

posteriors = np.asarray([
    [-0.15,  0.04, -0.26, -0.61, -0.93, -0.72, -0.15, -0.62,  0.62, 0.24, 0.71, 0.81],
    [ 0.07,  0.42,  0.11,  0.95, -0.86, -0.17, -0.22, -0.69, -0.55, 0.11, 0.37, 0.18],
    [-0.79,  0.3 ,  0.06, -0.79,  0.71,  0.86, -0.58,  0.38,  0.05, 0.62, 0.17, 0.29],
    [ 0.92, -0.33, -0.63,  0.99,  0.67, -0.79, -0.08,  0.64, -0.51, 0.19, 0.67, 0.52],
    [-0.08, -0.29,  0.87,  0.6 ,  0.31,  0.75,  0.38, -0.42,  0.11, 0.44, 0.37, 0.14]
]).astype(config.floatX)

vocab_input_size = np.shape(x)[0]
d_embedding = 7
layer_size = [d_embedding, 512]
vocab_output_size = 12

model = EmissionModel(vocab_input_size=vocab_input_size, layer_size=layer_size, 
                      vocab_output_size=vocab_output_size, baum_welch_model=None)

result = model.train_mini_batch_function(x, posteriors)
print(np.shape(result[0]))
print(np.shape(result[1]))
print(np.shape(result[2]))
# print(np.shape(model.evaluate_model(x)))
# print(model.calculate_gradient(posteriors))

(5,)
(5,)
(5,)


In [None]:
# Emission model variables
vocab_input_size = 10
d_embedding = 7
layer_size = [d_embedding, 512]
vocab_output_size = 12

x = np.asarray([
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]
]).astype(config.floatX)
posteriors = np.asarray(result).astype(config.floatX)

testing_target = [6, 8, 2, 1, 3]
testing_source = [2, 7, 10, 0, 4, 5]

model = EmissionModel(vocab_input_size=vocab_input_size, layer_size=layer_size, vocab_output_size=vocab_output_size, baum_welch_model=None)
result = model.train_mini_batch_function(x, posteriors)
print(result)

In [166]:
print(model.test(x, posteriors)[0])
print("")
print(model.test(x, posteriors)[1])

[[ -4.25463021e-01  -1.74731529e+00  -3.40455741e-01 ...,   8.40714455e-01
    1.16684544e+00  -1.74363041e+00]
 [ -1.73396075e+00  -3.52505970e+00  -1.80466461e+00 ...,   1.56047273e+00
    2.06868815e+00  -2.42936778e+00]
 [ -5.50276101e-01   5.80202416e-02  -1.51815784e+00 ...,   9.53709185e-01
    2.68967301e-01   3.69657218e-01]
 ..., 
 [ -9.68661189e-01   5.42034388e-01   1.15126371e+00 ...,  -8.52006793e-01
   -6.70304239e-01   4.77848887e-01]
 [  3.67000699e-01   4.21549737e-01   8.97033513e-03 ...,   2.57458866e-01
    1.96427971e-01   7.65285119e-02]
 [ -4.27518426e-05  -1.21131372e-02   3.77787501e-02 ...,  -8.82423893e-02
    4.54101712e-03   5.87681048e-02]]

[[ -1.84710197e+01  -7.45869827e+01  -6.41599178e+00  -2.60746651e+01
   -1.36473475e+01]
 [ -3.23260193e+01  -8.30433655e+01  -8.10556221e+00  -3.15460014e+01
   -2.47860146e+01]
 [ -4.28678932e+01  -1.31970825e+02  -5.94393015e+00  -4.57771158e+00
   -1.53612356e+01]
 [ -3.80866966e+01  -1.26840454e+02  -1.99077091e

In [167]:
posteriors = np.asarray([
    [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
    [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
    [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
    [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
    [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
]).astype(config.floatX)

print(model.test(x, posteriors))

[array([[ -4.25463021e-01,  -1.74731529e+00,  -3.40455741e-01, ...,
          8.40714455e-01,   1.16684544e+00,  -1.74363041e+00],
       [ -1.73396075e+00,  -3.52505970e+00,  -1.80466461e+00, ...,
          1.56047273e+00,   2.06868815e+00,  -2.42936778e+00],
       [ -5.50276101e-01,   5.80202416e-02,  -1.51815784e+00, ...,
          9.53709185e-01,   2.68967301e-01,   3.69657218e-01],
       ..., 
       [ -9.68661189e-01,   5.42034388e-01,   1.15126371e+00, ...,
         -8.52006793e-01,  -6.70304239e-01,   4.77848887e-01],
       [  3.67000699e-01,   4.21549737e-01,   8.97033513e-03, ...,
          2.57458866e-01,   1.96427971e-01,   7.65285119e-02],
       [ -4.27518426e-05,  -1.21131372e-02,   3.77787501e-02, ...,
         -8.82423893e-02,   4.54101712e-03,   5.87681048e-02]], dtype=float32), array([[ -1.84710197e+01,  -7.45869827e+01,  -6.41599178e+00,
         -2.60746651e+01,  -1.36473475e+01],
       [ -3.23260193e+01,  -8.30433655e+01,  -8.10556221e+00,
         -3.15460014

# BaumWelchModel

[[ 63.  69.  76.  58.   0.   0.   0.   0.]
 [ 87.  63.  69.  76.   0.   0.   0.   0.]
 [ 93.  87.  63.  69.   0.   0.   0.   0.]
 [ 53.  93.  87.  63.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.]]


array([[ 0.21283784,  0.22115385,  0.25762712,  0.21804511,  0.3       ,
         0.        ,  0.        ,  0.        ],
       [ 0.29391892,  0.20192308,  0.23389831,  0.28571429,  0.        ,
         0.3       ,  0.        ,  0.        ],
       [ 0.31418919,  0.27884615,  0.21355932,  0.2593985 ,  0.        ,
         0.        ,  0.3       ,  0.        ],
       [ 0.17905405,  0.29807692,  0.29491525,  0.23684211,  0.        ,
         0.        ,  0.        ,  0.3       ],
       [ 0.21283784,  0.22115385,  0.25762712,  0.21804511,  0.3       ,
         0.        ,  0.        ,  0.        ],
       [ 0.29391892,  0.20192308,  0.23389831,  0.28571429,  0.        ,
         0.3       ,  0.        ,  0.        ],
       [ 0.31418919,  0.27884615,  0.21355932,  0.2593985 ,  0.        ,
         0.        ,  0.3       ,  0.        ],
       [ 0.17905405,  0.29807692,  0.29491525,  0.23684211,  0.        ,
         0.        ,  0.        ,  0.3       ]])

Transition matrix and "Baum Welch Algorithm"

Compute forward messages: alpha <br>
Compute backward messages: beta <br>
Compute posteriors: <br>
    p(z|x) = alpha * beta <br>
    p(z_i, z_i+1 | x) <br>

In [62]:
class BaumWelchModel:
    
    def normalize_matrix(self, x, axis=1, whole_matrix=False):
        """Compute softmax values for each sets of scores in x.
            axis=1: row
            axis=0: column 
        Input
        -----
        
        Output
        ------
        """
        if len(np.shape(x)) == 1 or whole_matrix:
#             e_x = np.exp(x - np.max(x))
            e_x = x
            return e_x / np.sum(e_x)
        if axis == 0:
#             e_x = np.exp( np.subtract(x, np.max(x, axis=axis)[None, :]) )
            e_x = x
            return e_x / np.sum(e_x, axis=axis)[None, :]
        else: 
#             e_x = np.exp( np.subtract(x, np.max(x, axis=axis)[:, None]) )
            e_x = x
            return e_x / np.sum(e_x, axis=axis)[:, None]
        
    def generate_transition_distant_matrix(self, sentence_length, po=0., nomalized=True):
        """ Generate a transition matrix based on jump distance in the latent sentence.
        We extend the latent sentence for 2*length in which each word has 
        an empty word to represent no-alignment state.
        where [sentence_length:end] elements are empty words considered as 
        latent words having no direct aligment.

        Input
        -----
        sentence_length: the length of latent sentence
                      int value
        non_negative_set: random non-negative set as max_distance size
        po: default value for A->A_empty_word

        Output
        ------
        trans_distant_matrix
        """
        if po==0.:
            po = self.po
        trans_distant_matrix = np.zeros((2*sentence_length, 2*sentence_length))

        for i in range(sentence_length):
            for j in range(sentence_length):
                indice = i - j + self.max_distance + 1
                if indice < 0:
                    p_ = self.non_negative_set[0]
                elif (indice > 2*self.max_distance + 2):
                    p_ = self.non_negative_set[-1]
                else:
                    p_ = self.non_negative_set[indice]
                trans_distant_matrix[i][j] = p_

        print(trans_distant_matrix)

        for i in range(sentence_length):
            trans_distant_matrix[i+sentence_length][i+sentence_length] = po
            trans_distant_matrix[i][i+sentence_length] = po

            sum_d = np.sum(trans_distant_matrix[:sentence_length, i])
            trans_distant_matrix[:sentence_length, i] = \
                    np.divide(
                        trans_distant_matrix[:sentence_length, i], 
                        sum_d
                    )
            trans_distant_matrix[sentence_length:, i] = \
                    np.copy(trans_distant_matrix[:sentence_length, i])

        return trans_distant_matrix
    
    def generate_transition_matrix(self, sentence_length, po=0., nomalized=True):
        """ Generate a transition matrix based on jump distance in the latent sentence.

        Input
        -----
        sentence_length: the length of latent sentence
                      int value
        non_negative_set: random non-negative set as max_distance size
        po: default value for A->A_empty_word

        Output
        ------
        trans_matrix
        """
        if po==0.:
            po = self.po
        trans_matrix = np.zeros((sentence_length, sentence_length))

        for i in range(sentence_length):
            for j in range(sentence_length):
                indice = i - j + self.max_distance + 1
                if indice < 0:
                    p_ = self.non_negative_set[0]
                elif (indice > 2*self.max_distance + 2):
                    p_ = self.non_negative_set[-1]
                else:
                    p_ = self.non_negative_set[indice]
                trans_matrix[i][j] = p_
        if nomalized:
            return self.normalize_matrix(trans_matrix, axis=1)
        return trans_matrix
        
    def __init__(self, max_distance, po=0.3, seed=1402):
        np.random.seed(seed)
        self.max_distance = max_distance
        self.non_negative_set = np.random.randint(
                                    low=1, high=100, 
                                    size=[max_distance + max_distance + 3]
        )
        self.po = po
        
    def calc_forward_messages(self, unary_matrix, transition_matrix, emission_matrix):
        """Calcualte the forward messages ~ alpha values.
        
        
        Input
        -----
        unary_matrix: emission posteriors - marginal probabilities ~ initial matrix.
                      size ~ [1, target_len]
        transition_matrix: size ~ [target_len, target_len]
        emission_matrix: size ~ [target_len, source_len]

        Return
        ------
        alpha
        """

        # TODO: verify matrix length
        source_len = np.shape(emission_matrix)[1]
        target_len = np.shape(emission_matrix)[0]

        alpha = np.zeros(np.shape(emission_matrix))
        print("emission_matrix[:,0]", emission_matrix[:, 0])
        print("unary_matrix", unary_matrix)
        alpha.T[0] = np.multiply(emission_matrix[:,0], unary_matrix)
        print("alpha.T[0]", alpha.T[0])
        
        for t in np.arange(1, source_len):
            for i in range(target_len):
                sum_al = 0.0;
#                 print("alpha : ", t, i, " :: ", emission_matrix[i][t])
                for j in range(target_len):
                    sum_al += alpha[j][t-1] * transition_matrix[j][i]
#                     print("   sum_al: ", t, i, j, alpha[j][t-1], transition_matrix[j][i])

                alpha[i][t] = emission_matrix[i][t] * sum_al

        return alpha
    
    
    def calc_backward_messages(self, transition_matrix, emission_matrix):
        """Calcualte the backward messages ~ beta values.

        Return
        ------
        beta
        """
        # TODO: verify matrix length
        source_len = np.shape(emission_matrix)[1]
        target_len = np.shape(emission_matrix)[0]

        beta = np.zeros(np.shape(emission_matrix))
        beta[:,-1] = [1]*target_len

        for t in reversed(range(source_len-1)):
            for i in range(target_len):
    #             print("beta ", t, i)
                for j in range(target_len):
                    beta[i][t] += beta[j][t+1] * transition_matrix[i][j] * emission_matrix[j][t+1]
    #                 print("    ", beta[t+1][j], transition_matrix[i][j], emission_matrix[ observation_sentence[t+1] ][j], beta[t][i])

        return beta

    def calc_posterior_matrix(self, alpha, beta, transition_matrix, emission_matrix):
        """Calcualte the gama and epsilon values in order to reproduce 
        better transition and emission matrix.
        
        gamma: P(e_aj|f_j)
        epsilon: P(e_aj,e_a(j+1)|f_j)

        Return
        ------
        unary_matrix, posterior_gamma, posterior_epsilon
        """
        # TODO: verify matrix length
        source_len = np.shape(alpha)[1]
        target_len = np.shape(alpha)[0]

        gamma = np.multiply(alpha, beta)
        epsilon = np.zeros((source_len, target_len, target_len))

        # Normalization on columns
        gamma = self.normalize_matrix(gamma, axis=0)

        for t in range(source_len-1):   
            for i in range(target_len):
                for j in range(target_len):
                    epsilon[t][i][j] = alpha[i][t] * transition_matrix[i][j] * \
                                        beta[j][t+1] * emission_matrix[j][t+1]
            # Normalization
            epsilon[t] = self.normalize_matrix(epsilon[t], whole_matrix=True)

        # Update unary matrix
        # Normalization unary
        new_unary_matrix = np.copy(gamma[:,0])#self.normalize_matrix(np.copy(gamma[:,0]), axis=1)

#         new_transition_matrix = np.zeros( (latent_indice_len, latent_indice_len) )
#         new_emission_matrix = np.zeros( (observation_len, latent_indice_len) )
            
#         # Update emission matrix
#         sum_gamma = [np.sum(gamma.T[i]) for i in range(latent_indice_len)]   
#         for i in range(latent_indice_len):
#             new_emission_matrix.T[i] = np.divide(gamma.T[i], sum_gamma[i])

        return new_unary_matrix, gamma, epsilon


    def calculate_baum_welch_posteriors(self, sentence_length, emission_matrix, unary_matrix=None):
        if unary_matrix == None:
            unary_matrix = [0.01]*sentence_length
            unary_matrix[0] = 1 - np.sum(unary_matrix) + 0.01
        transition_matrix = self.generate_transition_matrix(sentence_length)
        alpha = self.calc_forward_messages(unary_matrix, transition_matrix, emission_matrix)
        beta = self.calc_backward_messages(transition_matrix, emission_matrix)

        new_unary_matrix, emission_posterior, transition_posterior = self.calc_posterior_matrix(alpha, beta, transition_matrix, emission_matrix)
        return emission_posterior, transition_posterior # gamma, epsilon
    
    def update_non_negative_transition_set(self, emission_posteriors, transition_posteriors):
        pass
        # TODO 1: update non-negative set: s[-1] = 
        # TODO 1.1: calculate new transition matrix
        transition_list = np.array([])
        for gamma, epsilon in zip(emission_posteriors, transition_posteriors):
            source_len = np.shape(gamma)[1]
            target_len = np.shape(gamma)[0]
            new_transition_matrix = np.zeros((target_len, target_len))

            for i in range(target_len):
                sum_gamma = np.sum(gamma[i][:-1])
                for j in range(target_len):
                    sum_ep = np.sum(epsilon[:-1][i][j])
                    new_transition_matrix[i][j] = sum_ep/sum_gamma
            # Normalization
            new_transition_matrix = self.normalize_matrix(new_transition_matrix, axis=1)
            transition_list.append(new_transition_matrix)
            
        # TODO 1.2: update
        new_non_negative_set = np.zeros(max_distance)
        
        return new_non_negative_set

In [5]:
target_length = 4
max_distance = 2

baum_welch_model = BaumWelchModel(max_distance)
print("non_negative_set", baum_welch_model.non_negative_set)
unary_matrix = [0.97, .01, .01, .01]
# transition_matrix = np.array([
#     [.3, .7], 
#     [.1, .9]
# ])
transition_matrix = baum_welch_model.generate_transition_matrix(target_length, nomalized=True)
print("transition_matrix", transition_matrix)
# print("unnormalized transition", baum_welch_model.generate_transition_matrix(target_length, nomalized=False))
emission_matrix = baum_welch_model.normalize_matrix(np.array([
    [.4, .5, .7, .1, .4, .8],
    [.6, .5, .4, .2, .7, .4],
    [.2, .3, .8, .9, .2, .3],
    [.6, .2, .7, .1, .4, .5]
]), axis=0)
print("emission_matrix", emission_matrix)

alpha = baum_welch_model.calc_forward_messages(unary_matrix, transition_matrix, emission_matrix)
beta = baum_welch_model.calc_backward_messages(transition_matrix, emission_matrix)

print("alpha", alpha)
print("beta", beta)

new_unary_matrix, emission_posterior, transition_posterior = baum_welch_model.calc_posterior_matrix(alpha, beta)
print("new_unary_matrix: ", new_unary_matrix)
print("gamma: ", emission_posterior)
print("epsilon: ", transition_posterior)

non_negative_set [58 48 56 10 59 11 62]
transition_matrix [[ 0.05813953  0.3255814   0.27906977  0.3372093 ]
 [ 0.34104046  0.05780347  0.32369942  0.27745665]
 [ 0.08088235  0.43382353  0.07352941  0.41176471]
 [ 0.43661972  0.07746479  0.41549296  0.07042254]]
emission_matrix [[ 0.22222222  0.33333333  0.26923077  0.07692308  0.23529412  0.4       ]
 [ 0.33333333  0.33333333  0.15384615  0.15384615  0.41176471  0.2       ]
 [ 0.11111111  0.2         0.30769231  0.69230769  0.11764706  0.15      ]
 [ 0.33333333  0.13333333  0.26923077  0.07692308  0.23529412  0.25      ]]
alpha [[  2.15555556e-01   5.07145654e-03   3.69359339e-03   2.08747262e-04
    1.19557985e-04   1.18326645e-04]
 [  3.33333333e-03   2.37045996e-02   1.41985426e-03   5.33144041e-04
    4.69317390e-04   2.29307705e-05]
 [  1.11111111e-03   1.25401425e-02   4.34674510e-03   2.34877976e-03
    6.13782685e-05   4.61472491e-05]
 [  3.33333333e-03   9.90726028e-03   3.80919096e-03   2.84427012e-04
    2.83644837e-04   5.

In [7]:
emission_posterior, transition_posterior = \
    baum_welch_model.calculate_baum_welch_posteriors(target_length, emission_matrix, unary_matrix)
print("emission_posterior", emission_posterior)
print("transition_posterior", transition_posterior)

emission_posterior [[ 0.96283366  0.09506816  0.29725378  0.0585225   0.10627471  0.49027076]
 [ 0.01495464  0.47569605  0.12404325  0.11056501  0.51704914  0.09501061]
 [ 0.00518545  0.25614949  0.16733684  0.77624234  0.0592773   0.19120501]
 [ 0.01702626  0.1730863   0.41136613  0.05467015  0.31739885  0.22351363]]
transition_posterior [[[  7.83090416e-02   4.69455540e-01   2.45749714e-01   1.69319363e-01]
  [  7.10339212e-03   1.28886761e-03   4.40800096e-03   2.15437717e-03]
  [  5.61555135e-04   3.22438032e-03   3.33764078e-04   1.06574786e-03]
  [  9.09417329e-03   1.72726413e-03   5.65800626e-03   5.46812281e-04]]

 [[  6.38862283e-03   2.21926062e-02   1.67644584e-02   4.97224747e-02]
  [  1.75162655e-01   1.84163068e-02   9.08905178e-02   1.91226572e-01]
  [  2.19765359e-02   7.31192276e-02   1.09221364e-02   1.50131586e-01]
  [  9.37259617e-02   1.03151116e-02   4.87597274e-02   2.02854996e-02]]

 [[  4.63104877e-03   3.83679394e-02   2.35839239e-01   1.84155492e-02]
  [  1.

# Alignment with Unsupervised neural hidden markov model

In [110]:
# BW model variables
max_distance = 8
baum_welch_model = BaumWelchModel(max_distance, seed=1111)
print("non_negative_set", baum_welch_model.non_negative_set)

non_negative_set [29 56 82 13 35 53 25 23 21 12 15  9 13 87  9 63 62 52 43]


In [127]:
x = np.asarray([
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]
    ]).astype(config.floatX)

# posteriors = np.asarray([
#     [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
#     [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
#     [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
#     [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
#     [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
# ]).astype(config.floatX)

# Emission model variables
vocab_input_size = 10
d_embedding = 7
layer_size = [d_embedding, 512]
vocab_output_size = 12
emission_model = EmissionModel(vocab_input_size=vocab_input_size, layer_size=layer_size, 
                               vocab_output_size=vocab_output_size, baum_welch_model=baum_welch_model)

In [128]:
testing_target = [6, 8, 2, 1, 3]
testing_source = [2, 10, 7, 0]
result = emission_model.train_mini_batch(testing_target, testing_source)

one_hot_input [[ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]] (10, 5)
emission_matrix 1 [[  1.00170076e-02   1.20083610e-10   1.39718434e-15   4.68368556e-17
    7.48935809e-12]
 [  1.22934855e-12   7.03421055e-09   1.19096011e-09   2.06476147e-09
    2.72379169e-04]
 [  2.44000323e-11   1.05879911e-13   3.30180978e-12   3.29048344e-05
    7.82270106e-08]
 [  1.74506021e-10   5.58641628e-16   5.26185981e-14   5.10033259e-13
    8.75874785e-13]
 [  1.14724586e-04   1.34182380e-06   9.58455260e-11   9.28782995e-10
    1.30416092e-05]
 [  8.76298368e-01   6.43898308e-01   2.79844013e-11   4.83316711e-08
    3.32501895e-06]
 [  3.90381893e-10   8.50837387e-06   5.96516020e-06   3.58708603e-05
    2.44137453e-04]
 [  3.38336337e-09   8.17109505e-11   1.03117272e-11   8.37082140e-11
    1.07731218e-07]
 [  1.483

In [None]:
result