In [None]:
import numpy as np
import theano.tensor as T
from theano import function, printing
import theano

from theano import config
config.compute_test_value = 'raise'

# updates = OrderedDict()

In [154]:
class EmissionModel:
    """ Simple emission model without CNN
    word embedding layer -> ReLU layer -> softmax layer
    """
    def init_weight_bias(self, n_x, n_y, seed=1402):
        rng = np.random.RandomState(seed)
        
        w = theano.shared(
            value=np.asarray(
                rng.uniform(low=-1.0, high=1.0, size=(n_x, n_y)), 
                dtype=theano.config.floatX
            ), 
            borrow=True
        )
        b = theano.shared(
            value=np.asarray(
                rng.uniform(low=-1.0, high=1.0, size=(n_x, 1)), 
                dtype=theano.config.floatX
            ), 
            borrow=True,
            broadcastable=(False,True)
        )
        
        return w, b
    
    #[7,512]
    def __init__(self, input_size, layer_size, output_size, epoch=1, batch=1, learning_rate = .01, seed=1412):
        
        self.epoch = 1
        self.learning_rate = learning_rate
        self.seed = seed
        
        x_input = T.matrix().astype(config.floatX)
        x_input.tag.test_value = np.asarray([
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  1.,  0.],
            [ 0.,  0.,  1.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  1.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 1.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  1.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.]
        ]).astype(x_input.dtype)
        
        # word embedding layer
        self.w1, self.b1 = self.init_weight_bias(layer_size[0], input_size[0], seed) # 7, 10
        word_embedding_layer = T.dot(self.w1, x_input) # [7, 10] * [10, 5] = [7, 5]
        
        # ReLU layer
        self.w2, self.b2 = self.init_weight_bias(layer_size[1], layer_size[0], seed) # [512, 7] 
        z_relu_layer = T.dot(self.w2, word_embedding_layer) + self.b2 # [512, 7] * [7, 5] = [512, 5]
        z_relu_layer_shape = T.shape(z_relu_layer)
        z_reshaped_relu_layer = T.reshape(z_relu_layer, [z_relu_layer_shape[0]*z_relu_layer_shape[1], 1])
        relu_layer = T.nnet.relu(z_reshaped_relu_layer)
        relu_layer_reshaped = T.reshape(relu_layer, z_relu_layer_shape) # [512, 5]
        
        # Softmax layer
        self.w3, self.b3 = self.init_weight_bias(output_size, layer_size[1], seed) # [9, 512], [9, 1]
        z_softmax_layer = T.dot(self.w3, relu_layer_reshaped) + self.b3 # [9, 512] * [512, 5] = [9, 5]
        log_softmax_layer = T.transpose(T.nnet.logsoftmax(T.transpose(z_softmax_layer))) # [9, 5]
        
        # calculate new gradient
        posteriors = T.matrix().astype(config.floatX)
        posteriors.tag.test_value = np.asarray([
            [-0.15,  0.04, -0.26, -0.61, -0.93, -0.72, -0.15, -0.62,  0.62],
            [ 0.07,  0.42,  0.11,  0.95, -0.86, -0.17, -0.22, -0.69, -0.55],
            [-0.79,  0.3 ,  0.06, -0.79,  0.71,  0.86, -0.58,  0.38,  0.05],
            [ 0.92, -0.33, -0.63,  0.99,  0.67, -0.79, -0.08,  0.64, -0.51],
            [-0.08, -0.29,  0.87,  0.6 ,  0.31,  0.75,  0.38, -0.42,  0.11]
        ]).astype(posteriors.dtype)
        
        cost = T.sum(T.transpose(posteriors) * log_softmax_layer)
        dw1,dw2,dw3,db2,db3 = T.grad(cost=cost, wrt=[self.w1,self.w2,self.w3,self.b2,self.b3])

        # Update w and b
        updates = [
            (self.w1, self.w1 - self.learning_rate * dw1), 
            (self.w2, self.w2 - self.learning_rate * dw2), 
#             (self.b2, self.b2 - self.learning_rate * db2),
            (self.w3, self.w3 - self.learning_rate * dw3), 
#             (self.b3, self.b3 - self.learning_rate * db3)
        ]
        
        # Compile model
        self.test = theano.function(inputs=[x_input, posteriors], outputs=[dw2, log_softmax_layer]) 
        self.train = theano.function(inputs=[x_input, posteriors], outputs=[dw3, self.w3, log_softmax_layer], updates=updates)
#         self.update_model = theano.function(inputs=[dw1,dw2,dw3,db2,db3], updates=updates)



x = np.asarray([
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]
]).astype(config.floatX)

posteriors = np.asarray([
    [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
    [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
    [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
    [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
    [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
]).astype(config.floatX)

input_size = np.shape(x)
d_embedding = 7
layer_size = [d_embedding, 512]
output_size = 9

model = EmissionModel(input_size=input_size, layer_size=layer_size, output_size=output_size)
print(np.shape(model.train(x, posteriors)[0]))
print(np.shape(model.train(x, posteriors)[1]))
print(np.shape(model.train(x, posteriors)[2]))
# print(np.shape(model.evaluate_model(x)))
# print(model.calculate_gradient(posteriors))


(9, 512)
(9, 512)
(9, 5)


In [144]:
print(model.test(x, posteriors)[0])
print("")
print(model.test(x, posteriors)[1])

[[ 1.54168916 -1.95611656 -0.80767059 ...,  0.0150075  -1.01582396
   0.24273789]
 [-0.73727387 -0.14424112 -0.14707598 ...,  0.35589412  0.5348444
  -0.73343819]
 [ 1.35415351  0.5465734   0.2863223  ...,  1.95762193  0.72093588
  -1.74118936]
 ..., 
 [ 1.46260095 -0.90012485  0.12521118 ...,  2.85903215 -1.9474256
  -1.77752268]
 [ 0.13362998 -0.61641389 -0.29073629 ...,  0.15156674 -0.06774771
  -0.23446766]
 [ 0.1291711   0.28002194 -0.85524631 ...,  0.3341288   0.66809851
   0.81234461]]

[[ -90.02972412  -96.960289    -55.47723389  -61.02746582  -60.13369751]
 [ -44.75253296  -80.55125427  -57.31509781  -75.90653992  -76.2182312 ]
 [ -82.14437866 -109.63210297  -64.72553253  -94.17676544  -78.32346344]
 [ -81.82365417 -114.93276215  -85.48132324  -87.57543945  -72.5562973 ]
 [ -70.1398468   -75.07691193  -48.06699371  -43.70638275  -48.73448181]
 [ -51.00871277  -90.19257355  -50.91851807  -42.42922592  -47.50081635]
 [ -91.45339966 -107.12472534  -81.9734726   -76.25728607  -60.

In [109]:
posteriors = np.asarray([
    [ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
    [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
    [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
    [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
    [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]
]).astype(config.floatX)

print(model.test(x, posteriors))

[[  0.           5.57754898  -9.18525124   0.7199952    0.           0.
   -7.89429569   0.          -1.46904933   0.        ]
 [  0.           0.64274013 -13.09244156  -2.32017946   0.           0.
   -0.86876476   0.         -11.55151653   0.        ]
 [  0.          -5.76161098  -4.97917032 -24.14969444   0.           0.
  -25.05790901   0.           8.03187561   0.        ]
 [  0.           5.93158436  -5.77795553  10.00747681   0.           0.
  -12.86737061   0.           5.82973385   0.        ]
 [  0.          -9.92446327   6.61135435   1.3710705    0.           0.
   11.70029926   0.           0.85022926   0.        ]
 [  0.           7.5245719   18.20536995  18.03045273   0.           0.
    6.99127102   0.          -6.09848785   0.        ]
 [  0.          14.53976059 -15.50912952 -15.97651482   0.           0.
  -13.53068733   0.           7.26264811   0.        ]]


In [108]:
np.random.randint(low=-100, high=100, size=(5, 9))/100

array([[ 0.65, -0.32,  0.44, -0.04, -0.36, -0.81,  0.38, -0.84, -0.93],
       [-0.41, -0.05,  0.96,  0.71,  0.08,  0.85,  0.12,  0.43, -0.08],
       [-0.45,  0.04, -0.94,  0.41,  0.04, -0.3 ,  0.89, -0.09, -0.42],
       [-0.19,  0.32,  0.  ,  0.02, -0.66, -0.41,  0.11, -0.05,  0.76],
       [-0.32,  0.86,  0.09, -0.41, -0.57, -0.55, -0.85, -0.09, -0.27]])