In [38]:
import numpy as np
import theano.tensor as T
from theano import function, printing
import theano

from theano import config
config.compute_test_value = 'warn'

# updates = OrderedDict()

In [63]:
class EmissionModel:
    """ Simple emission model without CNN
    word embedding layer -> ReLU layer -> softmax layer
    """
    def init_weight_bias(self, n_in, n_out, seed=1402):
        rng = np.random.RandomState(seed)
        return theano.shared(
            value=np.asarray(
                rng.uniform(low=-1.0, high=1.0, size=(n_in, n_out)), 
                dtype=theano.config.floatX
            ), 
            borrow=True
        ), T.addbroadcast(theano.shared(
            value=np.asarray(
                rng.uniform(low=-1.0, high=1.0, size=(n_in, 1)), 
                dtype=theano.config.floatX
            ), 
            borrow=True
        ), 1)
    
    #[7,512]
    def __init__(self, input_size, layer_size, output_size, epoch=1, learning_rate = .01, seed=1412):
        
        self.epoch = 1
        self.learning_rate = learning_rate
        self.seed = seed
        
        x_input = T.matrix().astype(config.floatX)
        x_input.tag.test_value = np.asarray([
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  1.,  0.],
            [ 0.,  0.,  1.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  1.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 1.,  0.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.],
            [ 0.,  1.,  0.,  0.,  0.],
            [ 0.,  0.,  0.,  0.,  0.]
        ]).astype(x_input.dtype)
        
        # word embedding layer
        self.w1, self.b1 = self.init_weight_bias(layer_size[0], input_size[0], seed) # 7, 10
        word_embedding_layer = T.dot(self.w1, x_input) # [7, 10] * [10, 5] = [7, 5]
        
        # ReLU layer
        self.w2, self.b2 = self.init_weight_bias(layer_size[1], layer_size[0], seed) # [512, 7] 
        z_relu_layer = T.dot(self.w2, word_embedding_layer) + self.b2 # [512, 7] * [7, 5] = [512, 5]
        z_relu_layer_shape = T.shape(z_relu_layer)
        z_reshaped_relu_layer = T.reshape(z_relu_layer, [z_relu_layer_shape[0]*z_relu_layer_shape[1], 1])
        relu_layer = T.nnet.relu(z_reshaped_relu_layer)
        relu_layer_reshaped = T.reshape(relu_layer, z_relu_layer_shape)
        
        # Softmax layer
        self.w3, self.b3 = self.init_weight_bias(output_size, input_size[1], seed)
        z_softmax_layer = T.dot(self.w3, relu_layer_reshaped) + self.b3
        softmax_layer = T.transpose(T.nnet.softmax(T.transpose(relu_layer)))
        
        # calculate new gradient
        new_emission = T.matrix().astype(config.floatX)
        dw1,dw2,dw3,db2,db3 = T.grad(T.le(softmax_layer),[self.w1,self.w2,self.w3,self.b2,self.b3])
        
        # Update w and b
        updates = [
            (self.w1, self.w1 - self.learning_rate * T.grad(cost, dw1)), 
            (self.w2, self.w2 - self.learning_rate * T.grad(cost, dw2)), 
            (self.b2, self.b2 - self.learning_rate * T.grad(cost, db2)),
            (self.w3, self.w3 - self.learning_rate * T.grad(cost, dw3)), 
            (self.b3, self.b3 - self.learning_rate * T.grad(cost, db3))
        ]
        
        # Compile model
        self.test = theano.function(inputs=[x_input], outputs=word_embedding_layer) 
#         self.evaluate_model = theano.function(inputs=x_input, outputs=softmax_layer)
#         self.calculate_gradient = theano.function(inputs=new_emission, outputs=[dw1,dw2,dw3,db2,db3])
#         self.update_model = theano.function(inputs=[dw1,dw2,dw3,db2,db3], updates=updates)




x = np.asarray([
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]
    ]).astype(config.floatX)

input_size = np.shape(x)
d_embedding = 7
layer_size = [d_embedding,512]
output_size = 5

model = EmissionModel(input_size, layer_size, output_size)
np.shape(model.test())
# model.evaluate_model(x)

ValueError: shapes (5,5) and (512,5) not aligned: 5 (dim 1) != 512 (dim 0)

In [None]:
# X = theano.shared(value=np.asarray([[1, 0], [0, 0], [0, 1], [1, 1]]), name='X')
# y = theano.shared(value=np.asarray([[1], [0], [1], [0]]), name='y')
# rng = np.random.RandomState(1234)
# LEARNING_RATE = 0.01
 
# def layer(n_in, n_out):
#     return theano.shared(
#         value=np.asarray(
#             rng.uniform(
#                 low=-1.0, high=1.0, size=(n_in, n_out)
#             ), 
#             dtype=theano.config.floatX
#         ), 
#         name='W', borrow=True
#     )
 
# W1 = layer(2, 3)
# W2 = layer(3, 1)
 
# output = T.nnet.sigmoid(T.dot(T.nnet.sigmoid(T.dot(X, W1)), W2))
# cost = T.sum((y - output) ** 2)
# updates = [(W1, W1 - LEARNING_RATE * T.grad(cost, W1)), (W2, W2 - LEARNING_RATE * T.grad(cost, W2))]
 
# train = theano.function(inputs=[], outputs=[], updates=updates)
# test = theano.function(inputs=[], outputs=[output])
 
# for i in range(60000):
#     if (i+1) % 10000 == 0:
#         print(i+1)
#     train()
 
# print(test())