Skip to content

Commit

Permalink
RNN
Browse files Browse the repository at this point in the history
  • Loading branch information
pchavanne committed Jan 31, 2017
1 parent 75fdeda commit 981804b
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 124 deletions.
91 changes: 42 additions & 49 deletions examples/lstm_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,83 +5,76 @@
This example show you how to train an LSTM for text generation.
"""
import os
import numpy as np
import yadll

import logging

logging.basicConfig(level=logging.DEBUG, format='%(message)s')

# load the data
datafile = 'nietzsche.txt'
if not os.path.isfile(datafile):
import urllib
origin = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt'
print 'Downloading data from %s' % origin
urllib.urlretrieve(origin, datafile)
data = yadll.data.Data(datafile)
# Creat the data
alphabet = 'abcdefghijklmnopqrstuvwxyz'
number_of_chars = len(alphabet)
sequence_length = 2
sentences = [alphabet[i: i + sequence_length] for i in range(len(alphabet) - sequence_length)]
next_chars = [alphabet[i + sequence_length] for i in range(len(alphabet) - sequence_length)]

# Transform sequences and labels into 'one-hot' encoding
X = np.zeros((len(sentences), sequence_length, number_of_chars), dtype=np.bool)
y = np.zeros((len(sentences), number_of_chars), dtype=np.bool)
for i, sentence in enumerate(sentences):
for t, char in enumerate(sentence):
X[i, t, ord(char) - ord('a')] = 1
y[i, ord(next_chars[i]) - ord('a')] = 1
data = yadll.data.Data(data=[(X, y), (X, y), (X, y)])

# create the model
model = yadll.model.Model(name='lstm', data=data)

# Hyperparameters
hp = yadll.hyperparameters.Hyperparameters()
hp('batch_size', 128)
hp('n_epochs', 1000)
hp('learning_rate', 0.9)
hp('momentum', 0.5)
hp('l1_reg', 0.00)
hp('l2_reg', 0.0000)
hp('patience', 10000)
hp('batch_size', 1)
hp('n_epochs', 60)

# add the hyperparameters to the model
model.hp = hp

# Create connected layers
# Input layer
l_in = yadll.layers.InputLayer(input_shape=(hp.batch_size, 28 * 28), name='Input')
# Dropout Layer 1
l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.4, name='Dropout 1')
# Dense Layer 1
l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, n_units=100, W=yadll.init.glorot_uniform,
l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
name='Hidden layer 1')
# Dropout Layer 2
l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.2, name='Dropout 2')
# Dense Layer 2
l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, n_units=100, W=yadll.init.glorot_uniform,
l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
name='Hidden layer 2')
l_in = yadll.layers.InputLayer(input_shape=(hp.batch_size, sequence_length, number_of_chars))
# LSTM 1
l_lstm1 = yadll.layers.LSTM(incoming=l_in, n_units=16, last_only=False)
# LSTM 2
l_lstm2 = yadll.layers.LSTM(incoming=l_lstm1, n_units=16)
# Logistic regression Layer
l_out = yadll.layers.LogisticRegression(incoming=l_hid2, n_class=10, l1=hp.l1_reg,
l2=hp.l2_reg, name='Logistic regression')
l_out = yadll.layers.LogisticRegression(incoming=l_lstm2, n_class=number_of_chars)

# Create network and add layers
net = yadll.network.Network('2 layers mlp with dropout')
net = yadll.network.Network('stacked lstm')
net.add(l_in)
net.add(l_dro1)
net.add(l_hid1)
net.add(l_dro2)
net.add(l_hid2)
net.add(l_lstm1)
net.add(l_lstm2)
net.add(l_out)

# add the network to the model
model.network = net

# updates method
model.updates = yadll.updates.newton
model.updates = yadll.updates.adam

# train the model and save it to file at each best
model.train()

# saving network paramters
net.save_params('net_params.yp')

# make prediction
# We can test it on some examples from test
test_set_x = data.test_set_x.get_value()
test_set_y = data.test_set_y.eval()

predicted_values = model.predict(test_set_x[:30])
# prime the model with 'ab' sequence and let it generate the learned alphabet
sentence = alphabet[:sequence_length]
generated = sentence
for iteration in range(number_of_chars - sequence_length):
x = np.zeros((1, sequence_length, number_of_chars))
for t, char in enumerate(sentence):
x[0, t, ord(char) - ord('a')] = 1.
preds = model.predict(x)[0]
next_char = chr(np.argmax(preds) + ord('a'))
generated += next_char
sentence = sentence[1:] + next_char

print ("Model 1, predicted values for the first 30 examples in test set:")
print predicted_values
print test_set_y[:30]
# check that it did generate the alphabet correctly
assert(generated == alphabet)
6 changes: 4 additions & 2 deletions yadll/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,13 @@ class Model(object):
"""
def __init__(self, network=None, data=None, hyperparameters=None, name='model',
updates=sgd, file=None):
updates=sgd, objective=categorical_crossentropy, file=None):
self.network = network
self.data = data # data [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
self.name = name
self.hp = hyperparameters
self.updates = updates
self.objective = objective
self.file = file
self.save_mode = None # None, 'end' or 'each'
self.index = T.iscalar() # index to a [mini]batch
Expand Down Expand Up @@ -170,7 +171,8 @@ def train(self, unsupervised_training=True, save_mode=None):
n_valid_batches = self.data.valid_set_x.get_value(borrow=True).shape[0] / self.hp.batch_size
n_test_batches = self.data.test_set_x.get_value(borrow=True).shape[0] / self.hp.batch_size

cost = -T.mean(T.log(self.network.get_output(stochastic=True))[T.arange(self.y.shape[0]), self.y])
#cost = -T.mean(T.log(self.network.get_output(stochastic=True))[T.arange(self.y.shape[0]), self.y])
cost = - self.objective(prediction=self.network.get_output(stochastic=True), target=self.y)
# add regularisation
cost += self.network.reguls

Expand Down
117 changes: 44 additions & 73 deletions yadll/objectives.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,122 +10,93 @@

def mean_squared_error(prediction, target):
r"""
Mean Squared Error
Mean Squared Error: MSE
.. math:: MSE_i = \frac{1}{n} \sum_{j}{(target_{i,j} - prediction_{i,j})^2}
.. math:: MSE_i = \frac{1}{n} \sum_{j}{(prediction_{i,j} - target_{i,j})^2}
Parameters
----------
prediction : Theano tensor
The predicted values
target : Theano tensor
The target values
Returns
-------
MSE
"""
return T.mean(T.square(prediction - target), axis=-1)


def root_mean_squared_error(prediction, target):
r"""
Root Mean Squared Error
Root Mean Squared Error: RMSE
.. math:: RMSE_i = \sqrt{\frac{1}{n} \sum_{j}{(target_{i,j} - prediction_{i,j})^2}}
Parameters
----------
prediction : Theano tensor
The predicted values
target : Theano tensor
The target values
Returns
-------
RMSE
"""
return T.sqrt(T.mean(T.square(prediction - target), axis=-1))


def mean_absolute_error(prediction, target):
r"""
Mean Absolute Error
Mean Absolute Error: MAE
.. math:: MAE_i = \frac{1}{n} \sum_{j}{\big|{target_{i,j} - prediction_{i,j}}}
.. math:: MAE_i = \frac{1}{n} \sum_{j}{\big|{target_{i,j} - prediction_{i,j}\big|}
Parameters
----------
prediction : Theano tensor
The predicted values
target : Theano tensor
The target values
Returns
-------
MAE
"""
return T.mean(T.abs_(prediction - target), axis=-1)


def hinge(prediction, target):
def binary_hinge_error(prediction, target):
r"""
Hinge Error
Binary Hinge Error: BHE
.. math:: hinge_i = \frac{1}{n} \sum_{j}{\max(1. - target_{i,j} * prediction_{i,j}, 0.)}
"""
return T.mean(T.maximum(1. - target * prediction, 0.), axis=-1)

.. math:: MAE_i = \frac{1}{n} \sum_{j}{\max(1. - target_{i,j} * prediction_{i,j}, 0.)}

Parameters
----------
prediction : Theano tensor
The predicted values
target : Theano tensor
The target values
def categorical_hinge_error(prediction, target):
r"""
Categorical Hinge Error: CHE
.. math:: hinge_i = \frac{1}{n} \sum_{j}{\max(1. - target_{i,j} * prediction_{i,j}, 0.)}
Returns
-------
Hinge
"""
return T.mean(T.maximum(1. - target * prediction, 0.), axis=-1)


def binary_crossentropy(prediction, target):
def binary_crossentropy_error(prediction, target):
r"""
Binary Crossentropy Error
.. math:: MAE_i = \frac{1}{n} \sum_{j}{\big|{target_{i,j} - prediction_{i,j}}}
Binary Cross-entropy Error: BCE
Parameters
----------
prediction : Theano tensor
The predicted values
target : Theano tensor
The target values
.. math:: BCE_i = \frac{1}{n} \sum_{j}{-(target_{i,j} * \log(prediction_{i,j})
+ (1 - target_{i,j}) * \log(1 - prediction_{i,j}))}
Returns
-------
Binary crossentropy
"""
clip_pred = T.clip(prediction, EPSILON, 1 - EPSILON)
return T.mean(T.nnet.binary_crossentropy(clip_pred, target), axis=-1)


def categorical_crossentropy(prediction, target):
def categorical_crossentropy_error(prediction, target):
r"""
Categorical Crossentropy Error
Categorical Cross-entropy Error: CCE
.. math:: MAE_i = \frac{1}{n} \sum_{j}{\big|{target_{i,j} - prediction_{i,j}}}
.. math:: MAE_i = \frac{1}{n} \sum_{j}{\big|{target_{i,j} - prediction_{i,j}\big|}
"""
prediction /= prediction.sum(axis=-1, keepdims=True)
prediction = T.clip(prediction, EPSILON, 1 - EPSILON)
return T.mean(T.nnet.categorical_crossentropy(prediction, target), axis=-1)


def kullback_leibler_divergence(prediction, target):
r"""
Kullback Leibler Divergence: KLD
Parameters
----------
prediction : Theano tensor
The predicted values
target : Theano tensor
The target values
.. math:: MAE_i = \frac{1}{n} \sum_{j}{\big|{target_{i,j} - prediction_{i,j}\big|}
Returns
-------
Categorical crossentropy
"""
prediction /= prediction.sum(axis=-1, keepdims=True)
prediction = T.clip(prediction, EPSILON, 1 - EPSILON)
return T.mean(T.nnet.categorical_crossentropy(prediction, target), axis=-1)
return T.mean(T.nnet.categorical_crossentropy(prediction, target), axis=-1)

# Aliases
mse = MSE = mean_squared_error
rmse = RMSE = root_mean_squared_error
mae = MAE = mean_absolute_error
bhe = BHE = binary_hinge_error
che = CHE = categorical_hinge_error
bce = BCE = binary_crossentropy_error
cce = CCE = categorical_crossentropy_error
kld = KLD = kullback_leibler_divergence

0 comments on commit 981804b

Please sign in to comment.