In [137]:
import numpy as np
import sklearn.preprocessing as preprocessing
import theano.tensor as T
from theano import function, printing
import theano

from theano import config
config.gcc.cxxflags = "-D_hypot=hypot"
config.compute_test_value = 'raise'

# updates = OrderedDict()

In [None]:
class Evolution:

    def calculate_AER(self, S, P, A):
        s_a, p_a, len_s, len_a = 0, 0, 0, 0
        for s, p, a in zip(S, P, A):
            s_a += len(list(set(s).intersection(a)))
            p_a += len(list(set(p).intersection(a)))
            len_s += len(s)
            len_a += len(a)
        print ("s_a", s_a)
        p_a += s_a
        print ("p_a", p_a)
        aer = (s_a + p_a) / (len_s + len_a)
        print ("aer", 1.-aer)

        return 1. - aer 


    def calculate_one_AER(self, S, P, A):
        s_a = len(list(set(S).intersection(A)))
        print ("s_a", s_a)
        p_a = len(list(set(P).intersection(A))) + s_a
        print ("p_a", p_a)
        aer = (s_a + p_a) / (len(S) + len(A))
        print ("aer", 1.-aer)

        return 1. - aer 

In [192]:
X = np.arange(9)
np.split(X, 3)

[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8])]

In [177]:
class EmissionModel:
    """ Simple emission model without CNN
    word embedding layer -> ReLU layer -> softmax layer
    """
    
    def init_weights_bias(self, input_size, layer_size, output_size, seed=1402):
        random_state = np.random.RandomState(seed)
        
        size_list = np.concatenate(([input_size[0]], layer_size, [output_size]), axis=0)
        w = []
        b = []
        
        for i in range(len(size_list) - 1):
            w.append(
                theano.shared(
                    value=np.asarray(
                        random_state.uniform(low=-1.0, high=1.0, size=(size_list[i+1], size_list[i])), 
                        dtype=theano.config.floatX
                    ), 
                    borrow=True
                )
            )
            b.append(
                theano.shared(
                    value=np.asarray(
                        random_state.uniform(low=-1.0, high=1.0, size=(size_list[i+1], 1)), 
                        dtype=theano.config.floatX
                    ), 
                    borrow=True,
                    broadcastable=(False,True)
                )
            )
        
        return w, b
    
    #[7,512]
    def __init__(self, input_size, layer_size, output_size, epoch=1, batch=1, learning_rate = .01, seed=1412):
        
        self.epoch = epoch
        self.batch = batch
        self.learning_rate = learning_rate
        self.seed = seed
        self.posteriors = []
        
        x_training_input = T.matrix().astype(config.floatX)
        x_training_input.tag.test_value = np.asarray([
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  1.,  0.],
            [ 0.,  0.,  1.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  1.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 1.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  1.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.]
        ]).astype(x_training_input.dtype)
        
        self.w, self.b = self.init_weights_bias(input_size, layer_size, output_size, seed)
        
        # word embedding layer
        word_embedding_layer = T.dot(self.w[0], x_training_input) # [7, 10] * [10, 5] = [7, 5]
        
        # ReLU layer
        z_relu_layer = T.dot(self.w[1], word_embedding_layer) + self.b[1] # [512, 7] * [7, 5] = [512, 5]
        z_relu_layer_shape = T.shape(z_relu_layer)
        z_reshaped_relu_layer = T.reshape(z_relu_layer, [z_relu_layer_shape[0]*z_relu_layer_shape[1], 1])
        relu_layer = T.nnet.relu(z_reshaped_relu_layer)
        relu_layer_reshaped = T.reshape(relu_layer, z_relu_layer_shape) # [512, 5]
        
        # Softmax layer
        z_softmax_layer = T.dot(self.w[2], relu_layer_reshaped) + self.b[2] # [9, 512] * [512, 5] = [9, 5]
        softmax_layer = T.transpose(T.nnet.softmax(T.transpose(z_softmax_layer))) # [9, 5]
        
        # calculate new gradient
        posteriors = T.matrix().astype(config.floatX)
        posteriors.tag.test_value = np.asarray([
            [-0.15,  0.04, -0.26, -0.61, -0.93, -0.72, -0.15, -0.62,  0.62],
            [ 0.07,  0.42,  0.11,  0.95, -0.86, -0.17, -0.22, -0.69, -0.55],
            [-0.79,  0.3 ,  0.06, -0.79,  0.71,  0.86, -0.58,  0.38,  0.05],
            [ 0.92, -0.33, -0.63,  0.99,  0.67, -0.79, -0.08,  0.64, -0.51],
            [-0.08, -0.29,  0.87,  0.6 ,  0.31,  0.75,  0.38, -0.42,  0.11]
        ]).astype(posteriors.dtype)
        
        cost = T.sum(T.transpose(posteriors) * T.log(softmax_layer))
        # TODO: use dw[] and db[] abstractly 
        dw0,dw1,dw2,db1,db2 = T.grad(
            cost=cost, wrt=[self.w[0],self.w[1],self.w[2],self.b[1],self.b[2]]
        )

        # Update w and b
        updates = [
            (self.w[0], self.w[0] - self.learning_rate * dw0), 
            (self.w[1], self.w[1] - self.learning_rate * dw1), 
            (self.b[1], self.b[1] - self.learning_rate * db1),
            (self.w[2], self.w[2] - self.learning_rate * dw2), 
            (self.b[2], self.b[2] - self.learning_rate * db2)
        ]
        
        # Compile model
        self.test = theano.function(
            inputs=[x_training_input, posteriors], 
            outputs=[dw1, softmax_layer]
        ) 
        self.train_mini_batch = theano.function(
            inputs=[x_training_input, posteriors], 
            outputs=[dw2, self.w[2], softmax_layer], 
            updates=updates
        )
        self.test_values = theano.function(
            inputs=[x_training_input], 
            outputs=[softmax_layer]
        )
        
    def train_model(inputs):
        for i in range(self.epoch):
            for x_input in np.split(inputs, self.batch):
                self.posteriors = 
                self.train_mini_batch(x_input, posteriors)
            # TODO: create train_batch function 
    
x = np.asarray([
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]
]).astype(config.floatX)

posteriors = np.asarray([
    [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
    [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
    [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
    [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
    [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
]).astype(config.floatX)

input_size = np.shape(x)
d_embedding = 7
layer_size = [d_embedding, 512]
output_size = 9

model = EmissionModel(input_size=input_size, layer_size=layer_size, output_size=output_size)

result = model.train_mini_batch(x, posteriors)
print(np.shape(result[0]))
print(np.shape(result[1]))
print(np.shape(result[2]))
# print(np.shape(model.evaluate_model(x)))
# print(model.calculate_gradient(posteriors))


(9, 512)
(9, 512)
(9, 5)


In [166]:
print(model.test(x, posteriors)[0])
print("")
print(model.test(x, posteriors)[1])

[[ -4.25463021e-01  -1.74731529e+00  -3.40455741e-01 ...,   8.40714455e-01
    1.16684544e+00  -1.74363041e+00]
 [ -1.73396075e+00  -3.52505970e+00  -1.80466461e+00 ...,   1.56047273e+00
    2.06868815e+00  -2.42936778e+00]
 [ -5.50276101e-01   5.80202416e-02  -1.51815784e+00 ...,   9.53709185e-01
    2.68967301e-01   3.69657218e-01]
 ..., 
 [ -9.68661189e-01   5.42034388e-01   1.15126371e+00 ...,  -8.52006793e-01
   -6.70304239e-01   4.77848887e-01]
 [  3.67000699e-01   4.21549737e-01   8.97033513e-03 ...,   2.57458866e-01
    1.96427971e-01   7.65285119e-02]
 [ -4.27518426e-05  -1.21131372e-02   3.77787501e-02 ...,  -8.82423893e-02
    4.54101712e-03   5.87681048e-02]]

[[ -1.84710197e+01  -7.45869827e+01  -6.41599178e+00  -2.60746651e+01
   -1.36473475e+01]
 [ -3.23260193e+01  -8.30433655e+01  -8.10556221e+00  -3.15460014e+01
   -2.47860146e+01]
 [ -4.28678932e+01  -1.31970825e+02  -5.94393015e+00  -4.57771158e+00
   -1.53612356e+01]
 [ -3.80866966e+01  -1.26840454e+02  -1.99077091e

In [167]:
posteriors = np.asarray([
    [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
    [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
    [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
    [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
    [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
]).astype(config.floatX)

print(model.test(x, posteriors))

[array([[ -4.25463021e-01,  -1.74731529e+00,  -3.40455741e-01, ...,
          8.40714455e-01,   1.16684544e+00,  -1.74363041e+00],
       [ -1.73396075e+00,  -3.52505970e+00,  -1.80466461e+00, ...,
          1.56047273e+00,   2.06868815e+00,  -2.42936778e+00],
       [ -5.50276101e-01,   5.80202416e-02,  -1.51815784e+00, ...,
          9.53709185e-01,   2.68967301e-01,   3.69657218e-01],
       ..., 
       [ -9.68661189e-01,   5.42034388e-01,   1.15126371e+00, ...,
         -8.52006793e-01,  -6.70304239e-01,   4.77848887e-01],
       [  3.67000699e-01,   4.21549737e-01,   8.97033513e-03, ...,
          2.57458866e-01,   1.96427971e-01,   7.65285119e-02],
       [ -4.27518426e-05,  -1.21131372e-02,   3.77787501e-02, ...,
         -8.82423893e-02,   4.54101712e-03,   5.87681048e-02]], dtype=float32), array([[ -1.84710197e+01,  -7.45869827e+01,  -6.41599178e+00,
         -2.60746651e+01,  -1.36473475e+01],
       [ -3.23260193e+01,  -8.30433655e+01,  -8.10556221e+00,
         -3.15460014

# BaumWelchModel

[[ 63.  69.  76.  58.   0.   0.   0.   0.]
 [ 87.  63.  69.  76.   0.   0.   0.   0.]
 [ 93.  87.  63.  69.   0.   0.   0.   0.]
 [ 53.  93.  87.  63.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.]]


array([[ 0.21283784,  0.22115385,  0.25762712,  0.21804511,  0.3       ,
         0.        ,  0.        ,  0.        ],
       [ 0.29391892,  0.20192308,  0.23389831,  0.28571429,  0.        ,
         0.3       ,  0.        ,  0.        ],
       [ 0.31418919,  0.27884615,  0.21355932,  0.2593985 ,  0.        ,
         0.        ,  0.3       ,  0.        ],
       [ 0.17905405,  0.29807692,  0.29491525,  0.23684211,  0.        ,
         0.        ,  0.        ,  0.3       ],
       [ 0.21283784,  0.22115385,  0.25762712,  0.21804511,  0.3       ,
         0.        ,  0.        ,  0.        ],
       [ 0.29391892,  0.20192308,  0.23389831,  0.28571429,  0.        ,
         0.3       ,  0.        ,  0.        ],
       [ 0.31418919,  0.27884615,  0.21355932,  0.2593985 ,  0.        ,
         0.        ,  0.3       ,  0.        ],
       [ 0.17905405,  0.29807692,  0.29491525,  0.23684211,  0.        ,
         0.        ,  0.        ,  0.3       ]])

Transition matrix and "Baum Welch Algorithm"

Compute forward messages: alpha <br>
Compute backward messages: beta <br>
Compute posteriors: <br>
    p(z|x) = alpha * beta <br>
    p(z_i, z_i+1 | x) <br>

In [255]:
test = np.asarray([
    [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
    [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
    [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
    [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
    [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
])
print(np.max(test, axis=0))
np.subtract(test, np.max(test, axis=0))

[ 0.65  0.86  0.96  0.71  0.08  0.85  0.89  0.43  0.76]


array([[ 0.  , -1.18, -0.52, -0.75, -0.44, -1.66, -0.51, -1.27, -1.69],
       [-1.06, -0.91,  0.  ,  0.  ,  0.  ,  0.  , -0.77,  0.  , -0.84],
       [-1.1 , -0.82, -1.9 , -0.3 , -0.04, -1.15,  0.  , -0.52, -1.18],
       [-0.84, -0.54, -0.96, -0.69, -0.74, -1.26, -0.78, -0.48,  0.  ],
       [-0.97,  0.  , -0.87, -1.12, -0.65, -1.4 , -1.74, -0.52, -1.03]])

In [262]:
x1 = np.arange(9.0).reshape((3, 3))
x2 = np.arange(9.0).reshape((3, 3))
np.multiply(x1, x2)

array([[  0.,   1.,   4.],
       [  9.,  16.,  25.],
       [ 36.,  49.,  64.]])

In [260]:
class BaumWelchModel:
    
    def normalize_matrix(self, x, axis=1, whole_matrix=False):
        """Compute softmax values for each sets of scores in x.
            axis=1: row
            axis=0: column 
        Input
        -----
        
        Output
        ------
        """
        if len(np.shape(x)) == 1 or whole_matrix:
            e_x = np.exp(x - np.max(x))
            return e_x / np.sum(e_x)
        if axis == 0:
            e_x = np.exp( np.subtract(x, np.max(x, axis=axis)[None, :]) )
            return e_x / np.sum(e_x, axis=axis)[None, :]
        else: 
            e_x = np.exp( np.subtract(x, np.max(x, axis=axis)[:, None]) )
            return e_x / np.sum(e_x, axis=axis)[:, None]
        
    def generate_transition_distant_matrix(self, sentence_length, po=0., nomalized=True):
        """ Generate a transition matrix based on jump distance in the latent sentence.
        We extend the latent sentence for 2*length in which each word has 
        an empty word to represent no-alignment state.
        where [sentence_length:end] elements are empty words considered as 
        latent words having no direct aligment.

        Input
        -----
        sentence_length: the length of latent sentence
                      int value
        non_negative_set: random non-negative set as max_distance size
        po: default value for A->A_empty_word

        Output
        ------
        trans_distant_matrix
        """
        if po==0.:
            po = self.po
        trans_distant_matrix = np.zeros((2*sentence_length, 2*sentence_length))

        for i in range(sentence_length):
            for j in range(sentence_length):
                indice = i - j + self.max_distance + 1
                if indice < 0:
                    p_ = self.non_negative_set[0]
                elif (indice > 2*self.max_distance + 2):
                    p_ = self.non_negative_set[-1]
                else:
                    p_ = self.non_negative_set[indice]
                trans_distant_matrix[i][j] = p_

        print(trans_distant_matrix)

        for i in range(sentence_length):
            trans_distant_matrix[i+sentence_length][i+sentence_length] = po
            trans_distant_matrix[i][i+sentence_length] = po

            sum_d = np.sum(trans_distant_matrix[:sentence_length, i])
            trans_distant_matrix[:sentence_length, i] = \
                    np.divide(
                        trans_distant_matrix[:sentence_length, i], 
                        sum_d
                    )
            trans_distant_matrix[sentence_length:, i] = \
                    np.copy(trans_distant_matrix[:sentence_length, i])

        return trans_distant_matrix
    
    def generate_transition_matrix(self, sentence_length, po=0., nomalized=True):
        """ Generate a transition matrix based on jump distance in the latent sentence.

        Input
        -----
        sentence_length: the length of latent sentence
                      int value
        non_negative_set: random non-negative set as max_distance size
        po: default value for A->A_empty_word

        Output
        ------
        trans_matrix
        """
        if po==0.:
            po = self.po
        trans_matrix = np.zeros((sentence_length, sentence_length))

        for i in range(sentence_length):
            for j in range(sentence_length):
                indice = i - j + self.max_distance + 1
                if indice < 0:
                    p_ = self.non_negative_set[0]
                elif (indice > 2*self.max_distance + 2):
                    p_ = self.non_negative_set[-1]
                else:
                    p_ = self.non_negative_set[indice]
                trans_matrix[i][j] = p_
        if nomalized:
            return self.normalize_matrix(trans_matrix, axis=1)
        return trans_matrix
        
    def __init__(self, max_distance, po=0.3, seed=1402):
        np.random.seed(seed)
        self.max_distance = max_distance
        self.non_negative_set = np.random.randint(
                                    low=1, high=100, 
                                    size=[max_distance + max_distance + 3]
        )
        self.po = po
        
    def calc_forward_messages(self, unary_matrix, transition_matrix, emission_matrix):
        """Calcualte the forward messages ~ alpha values.
        
        
        Input
        -----
        unary_matrix: emission posteriors - marginal probabilities ~ initial matrix.
                      size ~ [1, target_len]
        transition_matrix: size ~ [target_len, target_len]
        emission_matrix: size ~ [target_len, source_len]

        Return
        ------
        alpha
        """

        # TODO: verify matrix length
        source_len = np.shape(emission_matrix)[1]
        target_len = np.shape(emission_matrix)[0]

        alpha = np.zeros(np.shape(emission_matrix))
        alpha[:,0] = np.multiply(emission_matrix[:,0], unary_matrix)
        print("first alpha", alpha)
        
        for t in np.arange(1, source_len):
            for i in range(target_len):
                sum_al = 0.0;
#                 print("alpha : ", t, i, " :: ", emission_matrix[i][t])
                for j in range(target_len):
                    sum_al += alpha[j][t-1] * transition_matrix[j][i]
#                     print("   sum_al: ", t, i, j, alpha[j][t-1], transition_matrix[j][i])

                alpha[i][t] = emission_matrix[i][t] * sum_al

        return alpha
    
    
    def calc_backward_messages(self, transition_matrix, emission_matrix):
        """Calcualte the backward messages ~ beta values.

        Return
        ------
        beta
        """
        # TODO: verify matrix length
        source_len = np.shape(emission_matrix)[1]
        target_len = np.shape(emission_matrix)[0]

        beta = np.zeros(np.shape(emission_matrix))
        beta[:,-1] = [1]*target_len

        for t in reversed(range(source_len-1)):
            for i in range(target_len):
    #             print("beta ", t, i)
                for j in range(target_len):
                    beta[i][t] += beta[j][t+1] * transition_matrix[i][j] * emission_matrix[j][t+1]
    #                 print("    ", beta[t+1][j], transition_matrix[i][j], emission_matrix[ observation_sentence[t+1] ][j], beta[t][i])

        return beta

    def calc_posterior_matrix(self, alpha, beta):
        """Calcualte the gama and epsilon values in order to reproduce 
        better transition and emission matrix.
        
        gamma: P(e_aj|f_j)
        epsilon: P(e_aj,e_a(j+1)|f_j)

        Return
        ------
        unary_matrix, posterior_gamma, posterior_epsilon
        """
        # TODO: verify matrix length
        source_len = np.shape(alpha)[1]
        target_len = np.shape(alpha)[0]

        gamma = np.multiply(alpha, beta)
        epsilon = np.zeros((source_len, target_len, target_len))

        # Normalization on columns
        gamma = self.normalize_matrix(gamma, axis=0)

        for t in range(source_len-1):   
            for i in range(target_len):
                for j in range(target_len):
                    epsilon[t][i][j] = alpha[i][t] * transition_matrix[i][j] * \
                                        beta[j][t+1] * emission_matrix[j][t+1]
            # Normalization
            epsilon[t] = self.normalize_matrix(epsilon[t], whole_matrix=True)

        # Update unary matrix
        # Normalization unary
        new_unary_matrix = np.copy(gamma[:,0])#self.normalize_matrix(np.copy(gamma[:,0]), axis=1)

#         new_transition_matrix = np.zeros( (latent_indice_len, latent_indice_len) )
#         new_emission_matrix = np.zeros( (observation_len, latent_indice_len) )
            
#         # Update emission matrix
#         sum_gamma = [np.sum(gamma.T[i]) for i in range(latent_indice_len)]   
#         for i in range(latent_indice_len):
#             new_emission_matrix.T[i] = np.divide(gamma.T[i], sum_gamma[i])

        return new_unary_matrix, gamma, epsilon


    def calculate_baum_welch_posteriors(self, sentence_length, unary_matrix, emission_matrix):
        transition_matrix = self.generate_transition_matrix(sentence_length)
        alpha = self.calc_forward_messages(unary_matrix, transition_matrix, emission_matrix)
        beta = self.calc_backward_messages(transition_matrix, emission_matrix)

        new_unary_matrix, emission_posterior, transition_posterior = self.calc_posterior_matrix(alpha, beta)
        return emission_posterior, transition_posterior # gamma, epsilon
    
    def update_non_negative_transition_set(self, emission_posteriors, transition_posteriors):
        pass
        # TODO 1: update non-negative set: s[-1] = 
        # TODO 1.1: calculate new transition matrix
        transition_list = np.array([])
        for gamma, epsilon in zip(emission_posteriors, transition_posteriors):
            source_len = np.shape(gamma)[1]
            target_len = np.shape(gamma)[0]
            new_transition_matrix = np.zeros((target_len, target_len))

            for i in range(target_len):
                sum_gamma = np.sum(gamma[i][:-1])
                for j in range(target_len):
                    sum_ep = np.sum(epsilon[:-1][i][j])
                    new_transition_matrix[i][j] = sum_ep/sum_gamma
            # Normalization
            new_transition_matrix = self.normalize_matrix(new_transition_matrix, axis=1)
            transition_list.append(new_transition_matrix)
            
        # TODO 1.2: update
        new_non_negative_set = np.zeros(max_distance)
        
        return new_non_negative_set

In [261]:
target_length = 4
max_distance = 2

baum_welch_model = BaumWelchModel(max_distance)
print("non_negative_set", baum_welch_model.non_negative_set)
unary_matrix = [.50, .1, .25, .15]
# transition_matrix = np.array([
#     [.3, .7], 
#     [.1, .9]
# ])
transition_matrix = baum_welch_model.generate_transition_matrix(target_length, nomalized=True)
print("transition_matrix", transition_matrix)
emission_matrix = baum_welch_model.normalize_matrix(np.array([
    [.4, .5, .7, .1, .4, .8],
    [.6, .5, .4, .2, .7, .4],
    [.2, .3, .8, .9, .2, .3],
    [.6, .2, .7, .1, .4, .5]
]), axis=0)
print("emission_matrix", emission_matrix)

alpha = baum_welch_model.calc_forward_messages(unary_matrix, transition_matrix, emission_matrix)
beta = baum_welch_model.calc_backward_messages(transition_matrix, emission_matrix)

print("alpha", alpha)
print("beta", beta)

new_unary_matrix, emission_posterior, transition_posterior = baum_welch_model.calc_posterior_matrix(alpha, beta)
print("new_unary_matrix: ", new_unary_matrix)
print("gamma: ", emission_posterior)
print("epsilon: ", transition_posterior)

non_negative_set [58 48 56 10 59 11 62]
transition_matrix [[  1.25523017e-21   1.19198156e-01   3.99865265e-05   8.80761858e-01]
 [  9.52558972e-01   4.99415778e-22   4.74251187e-02   1.59093549e-05]
 [  1.35757443e-21   9.52574127e-01   4.99423723e-22   4.74258732e-02]
 [  9.52574127e-01   6.75896510e-23   4.74258732e-02   2.48648431e-23]]
emission_matrix [[ 0.23465716  0.28093447  0.26001113  0.18759221  0.23988929  0.33138161]
 [ 0.2866109   0.28093447  0.19262099  0.20732145  0.32381667  0.22213174]
 [ 0.19212103  0.23000969  0.28735674  0.41749413  0.19640474  0.20099311]
 [ 0.2866109   0.20812137  0.26001113  0.18759221  0.23988929  0.24549354]]
first alpha [[ 0.11732858  0.          0.          0.          0.          0.        ]
 [ 0.02866109  0.          0.          0.          0.          0.        ]
 [ 0.04803026  0.          0.          0.          0.          0.        ]
 [ 0.04299164  0.          0.          0.          0.          0.        ]]
alpha [[  1.17328580e-01   

In [243]:
emission_posterior, transition_posterior = \
    baum_welch_model.calculate_baum_welch_posteriors(sentence_length, unary_matrix, emission_matrix)
print("emission_posterior", emission_posterior)
print("transition_posterior", transition_posterior)

first alpha [[ 0.11732858  0.          0.          0.          0.          0.        ]
 [ 0.02866109  0.          0.          0.          0.          0.        ]
 [ 0.04803026  0.          0.          0.          0.          0.        ]
 [ 0.04299164  0.          0.          0.          0.          0.        ]]
emission_posterior [[  3.22930939e-06   8.16988927e-06   5.06318676e-06   7.16789256e-06
    5.07044613e-06   8.25982156e-06]
 [  2.39984566e-06   2.41359773e-06   7.59571907e-07   7.78932507e-07
    1.95912414e-06   9.21736944e-07]
 [  2.23113160e-06   3.38434358e-07   3.97050630e-07   1.34145548e-06
    3.88866479e-07   2.49592902e-07]
 [  6.10829605e-06   3.04666133e-06   7.74877341e-06   4.68030215e-06
    6.55014595e-06   4.53743129e-06]]
transition_posterior [[[ 0.06249991  0.06249997  0.06249991  0.06250042]
  [ 0.06250016  0.06249991  0.06249992  0.06249991]
  [ 0.06249991  0.06250009  0.06249991  0.06249992]
  [ 0.06250028  0.06249991  0.06249993  0.06249991]]

 [[ 0.06

# Alignment with Unsupervised neural hidden markov model

In [None]:
# BW model variables

# Emission model variables
input_size = np.shape(x)
d_embedding = 7
layer_size = [d_embedding, 512]
output_size = 9

emission_model = EmissionModel(input_size=input_size, layer_size=layer_size, output_size=output_size)

result = emission_model.train_mini_batch(x, posteriors)