In [None]:
import numpy as np
import scipy

In [None]:
import keras
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Input, Embedding
from keras.layers.core import Reshape, Flatten, Dense, Dropout, Lambda
#from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop
from keras.preprocessing import image

In [None]:
vocab_size = 250
n_words = 3 # because we use 3-grams as inputs
n_embed = 50
n_hidden = 200
n_output = vocab_size

batch_size = 100

model = Sequential()

# Tricky part : needs to turn words into embedded vectors
#    Documentation : https://keras.io/layers/embeddings/
#    The model will take as Input an integer matrix of size (batch_size, n_words).
#    The largest integer (i.e. word index) in the input should be no larger than vocab_size.
#    Now model.output_shape == (None, n_words, n_embed), where None is the batch dimension.
model.add(Embedding(vocab_size, n_embed, input_length=n_words))
model.add(Reshape((n_words*n_embed, ))) # WARNING : does reshape need dim permut' as with TF ?

# Standard neural-network layers
model.add(Dense(n_hidden, input_shape=(n_words*n_embed,), activation='sigmoid')) # embed_to_hidden
model.add(Dense(n_output, input_shape=(n_hidden,), activation='softmax')) # hidden_to_output

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
DEBUG = True

if DEBUG:
    input_array = np.random.randint(vocab_size, size=(batch_size, n_words))
    output_array = model.predict(input_array)
    assert output_array.shape == (batch_size, vocab_size)

In [None]:
TEST = True

if TEST:
    modelE = Sequential()
    # Tricky part : needs to turn words into embedded vectors
    #    Documentation : https://keras.io/layers/embeddings/
    #    The model will take as Input an integer matrix of size (batch_size, n_words).
    #    The largest integer (i.e. word index) in the input should be no larger than vocab_size.
    #    Now model.output_shape == (None, n_words, n_embed), where None is the batch dimension.
    modelE.add(Embedding(vocab_size, n_embed, input_length=n_words))
    modelE.compile('rmsprop', 'mse')
    input_array = np.random.randint(vocab_size, size=(batch_size, n_words))
    output_array = modelE.predict(input_array)
    print(output_array.shape)
    assert output_array.shape == (batch_size, n_words, n_embed)

if TEST:
    modelER = Sequential()
    modelER.add(Embedding(vocab_size, n_embed, input_length=n_words))
    modelER.add(Reshape((n_words*n_embed, )))
    # now: model.output_shape == (None, n_words*n_embed, 4)
    # note: `None` is the batch dimension
    modelER.compile('rmsprop', 'mse')
    input_array = np.random.randint(vocab_size, size=(batch_size, n_words))
    output_array = modelER.predict(input_array)
    print(output_array.shape)
    assert output_array.shape == (batch_size, n_words*n_embed)