In [2]:
import numpy as np
import scipy

In [3]:
import keras
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Input, Embedding
from keras.layers.core import Reshape, Flatten, Dense, Dropout, Lambda
from keras.optimizers import SGD, RMSprop

Using Theano backend.


In [4]:
vocab_size = 250
n_words = 3 # because we use 3-grams as inputs
n_embed = 50
n_hidden = 200
n_output = vocab_size

batch_size = 100

In [14]:
model = Sequential()

# Tricky part : needs to turn words into embedded vectors
#    Documentation : https://keras.io/layers/embeddings/
#    The model will take as Input an integer matrix of size (batch_size, n_words).
#    The largest integer (i.e. word index) in the input should be no larger than vocab_size.
#    Now model.output_shape == (None, n_words, n_embed), where None is the batch dimension.
model.add(Embedding(vocab_size, n_embed, input_length=n_words))
model.add(Reshape((n_words*n_embed, ))) # WARNING : does reshape need dim permut' as with TF ?

# Standard neural-network layers
model.add(Dense(n_hidden, input_shape=(n_words*n_embed,), activation='sigmoid')) # embed_to_hidden
model.add(Dense(n_output, input_shape=(n_hidden,), activation='softmax')) # hidden_to_output

if False:
    # optimizer with default parameters
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
else:
    # specify LearningRate and Momentum to be consistent with G Hinton Octave code
    sgd = SGD(lr=0.1, momentum=0.9, nesterov=False)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
DEBUG = True
TEST = True

In [16]:
if DEBUG:
    input_array = np.random.randint(vocab_size, size=(batch_size, n_words))
    output_array = model.predict(input_array)
    assert output_array.shape == (batch_size, vocab_size)

In [17]:
if TEST:
    modelE = Sequential()
    # Tricky part : needs to turn words into embedded vectors
    #    Documentation : https://keras.io/layers/embeddings/
    #    The model will take as Input an integer matrix of size (batch_size, n_words).
    #    The largest integer (i.e. word index) in the input should be no larger than vocab_size.
    #    Now model.output_shape == (None, n_words, n_embed), where None is the batch dimension.
    modelE.add(Embedding(vocab_size, n_embed, input_length=n_words))
    modelE.compile('rmsprop', 'mse')
    input_array = np.random.randint(vocab_size, size=(batch_size, n_words))
    output_array = modelE.predict(input_array)
    print(output_array.shape)
    assert output_array.shape == (batch_size, n_words, n_embed)

(100, 3, 50)


In [18]:
if TEST:
    modelER = Sequential()
    modelER.add(Embedding(vocab_size, n_embed, input_length=n_words))
    modelER.add(Reshape((n_words*n_embed, )))
    # now: model.output_shape == (None, n_words*n_embed, 4)
    # note: `None` is the batch dimension
    modelER.compile('rmsprop', 'mse')
    input_array = np.random.randint(vocab_size, size=(batch_size, n_words))
    output_array = modelER.predict(input_array)
    print(output_array.shape)
    assert output_array.shape == (batch_size, n_words*n_embed)

(100, 150)


In [19]:
if TEST:
    # generate dummy data
    n_test_samples = 300
    data = np.random.randint(vocab_size, size=(n_test_samples, n_words))
    labels = np.random.randint(vocab_size, size=(n_test_samples, 1))
    # we convert the labels to a binary matrix of size (n_test_samples, vocab_size)
    # for use with categorical_crossentropy
    labels = to_categorical(labels, vocab_size)
    # train the model
    model.fit(data, labels, nb_epoch=5, batch_size=batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [20]:
from utilities import load_data

In [21]:
''' SMALL SCALE VERSION TO CHECK IT RUNS FINE '''
if TEST:
    # get data into (mostly) a single batch as keras.fit takes care of batching
    [train_input, train_target, valid_input, valid_target, test_input, test_target, vocab] = load_data(1500)
    print(train_input.shape)
    data = train_input[:,:,0].T
    target = train_target[:,:,0].T
    labels = to_categorical(target, vocab_size)
    # train the model
    model.fit(data, labels, nb_epoch=1, batch_size=batch_size)

(3, 1500, 248)
Epoch 1/1


In [22]:
# get data into (mostly) a single batch as keras.fit takes care of batching
[train_input, train_target, valid_input, valid_target, test_input, test_target, vocab] = load_data(150000)
assert(train_input.shape[-1]==2) # to insure most of the data is covered
data = train_input[:,:,0].T
target = train_target[:,:,0].T
labels = to_categorical(target, vocab_size)
# some debug information
print(train_input.shape)
print(data.shape)
print(target.shape)
print(labels.shape)
# train the model
print('FIT THE MODEL')
model.fit(data, labels, nb_epoch=10, batch_size=batch_size)

(3, 150000, 2)
(150000, 3)
(150000, 1)
(150000, 250)
FIT THE MODEL
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f01197bb490>