In [1]:
import theano
import theano.tensor as T


def sigmoid(x):
    return theano.tensor.nnet.sigmoid(x)


def rectify(x):
    return T.maximum(0.0, x)


def get_by_name(name):
    if name == 'sigmoid':
        return sigmoid
    elif name == 'rectify':
        return rectify
    else:
        raise ValueError('There is no such name:{}'.format(name))

In [2]:
import numpy as np
from numpy import random


rng = random.RandomState(seed=42)


def get_constant(shape, val=0.0):
    c = np.empty(shape)
    c.fill(val)
    return c


def get_uniform(shape, init_range):
    init_range = (-init_range, init_range)
    return rng.uniform(low=init_range[0], high=init_range[1], size=shape)

In [3]:
class Layer(object):
    def __init__(self, W_init, b_init, activation_fun, name):
        self.name = name
        self.W = theano.shared(value=W_init, name=name+'_W', borrow=True)
        self.b = theano.shared(value=b_init, name=name+'_b', borrow=True)
        self.activation_fun = activation_fun

    def get_forward_pass_expr(self, matrix):
        pre_activation = T.dot(matrix, self.W) + self.b
        return self.activation_fun(pre_activation)

    def get_parameters(self):
        return [self.W, self.b]

In [4]:
import cPickle
from itertools import izip_longest


class NeuralNetwork(object):
    def __init__(self):
        self.layers = []
        self.predict_expr = None
        self.cross_entropy_expr = None
        self.batch_matrix = T.matrix('batch_matrix')
        self.true_labels = T.matrix('true_labels')
        self.__cross_entropy = None
        self.__predict = None

    def add_layer(self, layer):
        self.layers.append(layer)

    def get_parameters(self):
        return sum([layer.get_parameters() for layer in self.layers], [])

    def finalize(self):
        self.predict_expr = self.batch_matrix
        for layer in self.layers:
            self.predict_expr = layer.get_forward_pass_expr(self.predict_expr)
        self.cross_entropy_expr = \
            -T.mean(T.log(self.true_labels * self.predict_expr +
                          (1.0 - self.true_labels) * (1.0 - self.predict_expr) + 1e-5))
        self.__cross_entropy = \
            theano.function(inputs=[self.batch_matrix, self.true_labels],
                            outputs=self.cross_entropy_expr)
        self.__predict = theano.function(inputs=[self.batch_matrix],
                                         outputs=self.predict_expr)

    def get_cross_entropy_loss(self, batch_matrix, true_labels):
        return self.__cross_entropy(batch_matrix, true_labels)

    def predict(self, features):
        return self.__predict(features)

    def save(self, file_path):
        params = []
        for layer in self.layers:
            params.append(layer.name)
            params.append(layer.activation_fun.__name__)
            params.append(layer.W.get_value())
            params.append(layer.b.get_value())
        with open(file_path, 'wb') as f:
            cPickle.dump(params, f)

    @staticmethod
    def load(model_path):
        grouper = lambda iterable: izip_longest(*([iter(iterable)] * 4))

        with open(model_path) as f:
            params = cPickle.load(f)

        nn = NeuralNetwork()
        for layer_name, act_fun_name, W, b in grouper(params):
            act_fun = get_by_name(act_fun_name)
            l = Layer(W, b, act_fun, layer_name)
            nn.add_layer(l)
        nn.finalize()
        return nn

In [11]:
def get_momentum_updates(loss_expr, parameters):
    grads = T.grad(cost=loss_expr, wrt=parameters)
    updates = []
    for param, grad in zip(parameters, grads):
        velocity = theano.shared(np.zeros_like(param.get_value()))
        v = 0.95 * velocity - 0.1 * grad
        p = param + v
        updates.append((velocity, v))
        updates.append((param, p))
    return updates

In [14]:
nn = NeuralNetwork()

W_init = get_uniform(shape=(600, 300), init_range=0.1)
b_init = get_uniform(shape=300, init_range=0.05)
l = Layer(W_init, b_init, rectify, 'layer_1_rectify')
nn.add_layer(l)

W_init = get_uniform(shape=(300, 100), init_range=0.1)
b_init = get_uniform(shape=100, init_range=0.05)
l = Layer(W_init, b_init, rectify, 'layer_2_rectify')
nn.add_layer(l)

W_init = get_constant(shape=(100, 1))
b_init = get_constant(shape=1)
l = Layer(W_init, b_init, sigmoid, 'layer_3_sigmoid')
nn.add_layer(l)

nn.finalize()

In [None]:
from itertools import count
from utils.DataIterator import DataIterator

train_data_iterator = DataIterator("data/t.txt", batch_size=128)
validation_data_iterator = DataIterator("data/v.txt", batch_size=500)
print('train_dataset_size: {}'.format(len(train_data_iterator.adjective_noun_phrases)))
print('valid_dataset_size: {}'.format(len(validation_data_iterator.adjective_noun_phrases)))
train_data_iterator = train_data_iterator.get_infinite_iterator()

In [None]:
from itertools import count

momentum_updates = get_momentum_updates(loss_expr=nn.cross_entropy_expr,
                                        parameters=nn.get_parameters())

update_nn_params = theano.function(inputs=[nn.batch_matrix, nn.true_labels],
                                   outputs=nn.cross_entropy_expr,
                                   updates=momentum_updates)

save_freq = 1000
valid_freq = 500
max_iters = 5000
train_cross_entropy = []
for iter_num in count():
    if iter_num % valid_freq == 0:
        print('iter_num: {}'.format(iter_num))
        losses = []
        for batch_matrix, true_labels in validation_data_iterator.get_iterator():
            loss = nn.get_cross_entropy_loss(batch_matrix, true_labels)
            losses.append(loss)
        print('valid_loss: {:1.10f}'.format(np.mean(losses)))

    batch_matrix, true_labels = train_data_iterator.next()
    loss = update_nn_params(batch_matrix, true_labels)
    train_cross_entropy.append(loss)

    if iter_num % save_freq == 0 and iter_num != 0:
        print('iter_num: {}'.format(iter_num))
        print('saving model...')
        nn.save('data/models/{}.mdl'.format(iter_num))
        print('saved')

    if iter_num >= max_iters:
        print('iter_num: {}'.format(iter_num))
        print('max iters limit!!!')
        print('saving model...')
        nn.save('data/models/{}.mdl'.format(iter_num))
        print('saved')
        break

In [18]:
nn = NeuralNetwork.load('data/models/5000.mdl')

In [None]:
def p(adj, noun):
    features = DataIterator.get_features(adj, noun)
    print '%20s %-20s %0.5f' % (adj, noun, nn.predict(features)[0][0])

p('colourless', 'ideas')
p('interesting', 'ideas')
p('monthly', 'democracy')
p('reproducible', 'experiment')
p('fragrant', 'stench')
p('direct', 'democracy')
p('reproducible', 'accelerator')
p('unbiased', 'neighborhood')
p('unbiased', 'opinion')
p('noisy', 'semiconductor')