In [172]:
import numpy as np
import theano.tensor as T
from theano import function, printing
import theano

from theano import config
config.compute_test_value = 'raise'

# updates = OrderedDict()

In [177]:
class EmissionModel:
    """ Simple emission model without CNN
    word embedding layer -> ReLU layer -> softmax layer
    """
    def init_weight_bias(self, input_size, layer_size, output_size, seed=1402):
        rng = np.random.RandomState(seed)
        
        size_list = np.concatenate(([input_size[0]], layer_size, [output_size]), axis=0)
        w = []
        b = []
        
        for i in range(len(size_list) - 1):
            w.append(
                theano.shared(
                    value=np.asarray(
                        rng.uniform(low=-1.0, high=1.0, size=(size_list[i+1], size_list[i])), 
                        dtype=theano.config.floatX
                    ), 
                    borrow=True
                )
            )
            b.append(
                theano.shared(
                    value=np.asarray(
                        rng.uniform(low=-1.0, high=1.0, size=(size_list[i+1], 1)), 
                        dtype=theano.config.floatX
                    ), 
                    borrow=True,
                    broadcastable=(False,True)
                )
            )
        
        return w, b
    
    #[7,512]
    def __init__(self, input_size, layer_size, output_size, epoch=1, batch=1, learning_rate = .01, seed=1412):
        
        self.epoch = 1
        self.learning_rate = learning_rate
        self.seed = seed
        
        x_input = T.matrix().astype(config.floatX)
        x_input.tag.test_value = np.asarray([
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  1.,  0.],
            [ 0.,  0.,  1.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  1.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 1.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  1.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.]
        ]).astype(x_input.dtype)
        
        self.w, self.b = self.init_weight_bias(input_size, layer_size, output_size, seed)
        
        # word embedding layer
        word_embedding_layer = T.dot(self.w[0], x_input) # [7, 10] * [10, 5] = [7, 5]
        
        # ReLU layer
        z_relu_layer = T.dot(self.w[1], word_embedding_layer) + self.b[1] # [512, 7] * [7, 5] = [512, 5]
        z_relu_layer_shape = T.shape(z_relu_layer)
        z_reshaped_relu_layer = T.reshape(z_relu_layer, [z_relu_layer_shape[0]*z_relu_layer_shape[1], 1])
        relu_layer = T.nnet.relu(z_reshaped_relu_layer)
        relu_layer_reshaped = T.reshape(relu_layer, z_relu_layer_shape) # [512, 5]
        
        # Softmax layer
        z_softmax_layer = T.dot(self.w[2], relu_layer_reshaped) + self.b[2] # [9, 512] * [512, 5] = [9, 5]
        softmax_layer = T.transpose(T.nnet.softmax(T.transpose(z_softmax_layer))) # [9, 5]
        
        # calculate new gradient
        posteriors = T.matrix().astype(config.floatX)
        posteriors.tag.test_value = np.asarray([
            [-0.15,  0.04, -0.26, -0.61, -0.93, -0.72, -0.15, -0.62,  0.62],
            [ 0.07,  0.42,  0.11,  0.95, -0.86, -0.17, -0.22, -0.69, -0.55],
            [-0.79,  0.3 ,  0.06, -0.79,  0.71,  0.86, -0.58,  0.38,  0.05],
            [ 0.92, -0.33, -0.63,  0.99,  0.67, -0.79, -0.08,  0.64, -0.51],
            [-0.08, -0.29,  0.87,  0.6 ,  0.31,  0.75,  0.38, -0.42,  0.11]
        ]).astype(posteriors.dtype)
        
        cost = T.sum(T.transpose(posteriors) * T.log(softmax_layer))
        dw0,dw1,dw2,db1,db2 = T.grad(
            cost=cost, wrt=[self.w[0],self.w[1],self.w[2],self.b[1],self.b[2]]
        )

        # Update w and b
        updates = [
            (self.w[0], self.w[0] - self.learning_rate * dw0), 
            (self.w[1], self.w[1] - self.learning_rate * dw1), 
            (self.b[1], self.b[1] - self.learning_rate * db1),
            (self.w[2], self.w[2] - self.learning_rate * dw2), 
            (self.b[2], self.b[2] - self.learning_rate * db2)
        ]
        
        # Compile model
        self.test = theano.function(
            inputs=[x_input, posteriors], 
            outputs=[dw1, softmax_layer]
        ) 
        self.train = theano.function(
            inputs=[x_input, posteriors], 
            outputs=[dw2, self.w[2], softmax_layer], 
            updates=updates
        )

x = np.asarray([
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]
]).astype(config.floatX)

posteriors = np.asarray([
    [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
    [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
    [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
    [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
    [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
]).astype(config.floatX)

input_size = np.shape(x)
d_embedding = 7
layer_size = [d_embedding, 512]
output_size = 9

model = EmissionModel(input_size=input_size, layer_size=layer_size, output_size=output_size)

result = model.train(x, posteriors)
print(np.shape(result[0]))
print(np.shape(result[1]))
print(np.shape(result[2]))
# print(np.shape(model.evaluate_model(x)))
# print(model.calculate_gradient(posteriors))


(9, 512)
(9, 512)
(9, 5)


In [166]:
print(model.test(x, posteriors)[0])
print("")
print(model.test(x, posteriors)[1])

[[ -4.25463021e-01  -1.74731529e+00  -3.40455741e-01 ...,   8.40714455e-01
    1.16684544e+00  -1.74363041e+00]
 [ -1.73396075e+00  -3.52505970e+00  -1.80466461e+00 ...,   1.56047273e+00
    2.06868815e+00  -2.42936778e+00]
 [ -5.50276101e-01   5.80202416e-02  -1.51815784e+00 ...,   9.53709185e-01
    2.68967301e-01   3.69657218e-01]
 ..., 
 [ -9.68661189e-01   5.42034388e-01   1.15126371e+00 ...,  -8.52006793e-01
   -6.70304239e-01   4.77848887e-01]
 [  3.67000699e-01   4.21549737e-01   8.97033513e-03 ...,   2.57458866e-01
    1.96427971e-01   7.65285119e-02]
 [ -4.27518426e-05  -1.21131372e-02   3.77787501e-02 ...,  -8.82423893e-02
    4.54101712e-03   5.87681048e-02]]

[[ -1.84710197e+01  -7.45869827e+01  -6.41599178e+00  -2.60746651e+01
   -1.36473475e+01]
 [ -3.23260193e+01  -8.30433655e+01  -8.10556221e+00  -3.15460014e+01
   -2.47860146e+01]
 [ -4.28678932e+01  -1.31970825e+02  -5.94393015e+00  -4.57771158e+00
   -1.53612356e+01]
 [ -3.80866966e+01  -1.26840454e+02  -1.99077091e

In [167]:
posteriors = np.asarray([
    [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
    [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
    [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
    [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
    [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
]).astype(config.floatX)

print(model.test(x, posteriors))

[array([[ -4.25463021e-01,  -1.74731529e+00,  -3.40455741e-01, ...,
          8.40714455e-01,   1.16684544e+00,  -1.74363041e+00],
       [ -1.73396075e+00,  -3.52505970e+00,  -1.80466461e+00, ...,
          1.56047273e+00,   2.06868815e+00,  -2.42936778e+00],
       [ -5.50276101e-01,   5.80202416e-02,  -1.51815784e+00, ...,
          9.53709185e-01,   2.68967301e-01,   3.69657218e-01],
       ..., 
       [ -9.68661189e-01,   5.42034388e-01,   1.15126371e+00, ...,
         -8.52006793e-01,  -6.70304239e-01,   4.77848887e-01],
       [  3.67000699e-01,   4.21549737e-01,   8.97033513e-03, ...,
          2.57458866e-01,   1.96427971e-01,   7.65285119e-02],
       [ -4.27518426e-05,  -1.21131372e-02,   3.77787501e-02, ...,
         -8.82423893e-02,   4.54101712e-03,   5.87681048e-02]], dtype=float32), array([[ -1.84710197e+01,  -7.45869827e+01,  -6.41599178e+00,
         -2.60746651e+01,  -1.36473475e+01],
       [ -3.23260193e+01,  -8.30433655e+01,  -8.10556221e+00,
         -3.15460014

In [108]:
np.random.randint(low=-100, high=100, size=(5, 9))/100

array([[ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
       [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
       [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
       [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
       [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]])