In [19]:
"""Define constant variables."""

# define empty and end-of-sentence vocab idx
empty = 0
eos = 1

# input data (X) is made from maxlend description words followed by eos followed by
# headline words followed by eos if description is shorter than maxlend it will be
# left padded with empty if entire data is longer than maxlen it will be clipped and
# if it is shorter it will be right padded with empty. labels (Y) are the headline
# words followed by eos and clipped or padded to maxlenh. In other words the input is
# made from a maxlend half in which the description is padded from the left and a
# maxlenh half in which eos is followed by a headline followed by another eos if there
# is enough space. The labels match only the second half and the first label matches
# the eos at the start of the second half (following the description in the first half)
maxlend = 100
maxlenh = 15
maxlen = maxlend + maxlenh
activation_rnn_size = 40 if maxlend else 0
nb_unknown_words = 10

# function names
FN0 = 'vocabulary-embedding'  # filename of vocab embeddings
FN1 = 'train'  # filename of model weights

# training variables
seed = 42
optimizer = 'adam'
p_W, p_U, p_dense, p_emb, weight_decay = 0, 0, 0, 0, 0
regularizer = None


In [20]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.wrappers import TimeDistributed
from keras.layers.recurrent import LSTM
from keras.layers.embeddings import Embedding
from keras.layers.core import Lambda
import keras.backend as K
import numpy as np

#from utils import str_shape
#from constants import maxlend, maxlenh, maxlen, activation_rnn_size, optimizer, p_W, p_U, p_dense, p_emb, regularizer


def inspect_model(model):
    """Print the structure of Keras `model`."""
    for i, l in enumerate(model.layers):
        print(i, 'cls={} name={}'.format(type(l).__name__, l.name))
        weights = l.get_weights()
        print_str = ''
#         for weight in weights:
#             print_str += str_shape(weight) + ' '
        print(print_str)
        print()


class SimpleContext(Lambda):
    """Class to implement `simple_context` method as a Keras layer."""

    def __init__(self, fn, rnn_size, **kwargs):
        """Initialize SimpleContext."""
        self.rnn_size = rnn_size
        super(SimpleContext, self).__init__(fn, **kwargs)
        self.supports_masking = True

    def compute_mask(self, input, input_mask=None):
        """Compute mask of maxlend."""
        return input_mask[:, maxlend:]

    def get_output_shape_for(self, input_shape):
        """Get output shape for a given `input_shape`."""
        nb_samples = input_shape[0]
        n = 2 * (self.rnn_size - activation_rnn_size)
        return (nb_samples, maxlenh, n)


def create_model(vocab_size, embedding_size, LR, rnn_layers, rnn_size, embedding=None):
    """Construct and compile LSTM model."""
    # create a standard stacked LSTM
    if embedding is not None:
        embedding = [embedding]
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_size,
                        input_length=maxlen,
                        W_regularizer=regularizer, dropout=p_emb, weights=embedding, mask_zero=True,
                        name='embedding_1'))
    for i in range(rnn_layers):
        lstm = LSTM(rnn_size, return_sequences=True,
                    W_regularizer=regularizer, U_regularizer=regularizer,
                    b_regularizer=regularizer, dropout_W=p_W, dropout_U=p_U,
                    name='lstm_{}'.format(i + 1))
        model.add(lstm)
        model.add(Dropout(p_dense, name='dropout_{}'.format(i + 1)))

    def simple_context(X, mask, n=activation_rnn_size):
        """Reduce the input just to its headline part (second half).
        For each word in this part it concatenate the output of the previous layer (RNN)
        with a weighted average of the outputs of the description part.
        In this only the last `rnn_size - activation_rnn_size` are used from each output.
        The first `activation_rnn_size` output is used to computer the weights for the averaging.
        """
        desc, head = X[:, :maxlend, :], X[:, maxlend:, :]
        head_activations, head_words = head[:, :, :n], head[:, :, n:]
        desc_activations, desc_words = desc[:, :, :n], desc[:, :, n:]

        # RTFM http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.batched_tensordot
        # activation for every head word and every desc word
        activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2))
        # make sure we dont use description words that are masked out
        activation_energies = activation_energies + -1e20 * K.expand_dims(
            1. - K.cast(mask[:, :maxlend], 'float32'), 1)

        # for every head word compute weights for every desc word
        activation_energies = K.reshape(activation_energies, (-1, maxlend))
        activation_weights = K.softmax(activation_energies)
        activation_weights = K.reshape(activation_weights, (-1, maxlenh, maxlend))

        # for every head word compute weighted average of desc words
        desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
        return K.concatenate((desc_avg_word, head_words))

    if activation_rnn_size:
        model.add(SimpleContext(simple_context, rnn_size, name='simplecontext_1'))

    model.add(TimeDistributed(Dense(
        vocab_size,
        W_regularizer=regularizer,
        b_regularizer=regularizer,
        name='timedistributed_1')))
    model.add(Activation('softmax', name='activation_1'))

    # opt = Adam(lr=LR)  # keep calm and reduce learning rate
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    K.set_value(model.optimizer.lr, np.float32(LR))
    return model

In [None]:
# %load constants.py
"""Define constant variables."""

# define empty and end-of-sentence vocab idx
empty = 0
eos = 1

# input data (X) is made from maxlend description words followed by eos followed by
# headline words followed by eos if description is shorter than maxlend it will be
# left padded with empty if entire data is longer than maxlen it will be clipped and
# if it is shorter it will be right padded with empty. labels (Y) are the headline
# words followed by eos and clipped or padded to maxlenh. In other words the input is
# made from a maxlend half in which the description is padded from the left and a
# maxlenh half in which eos is followed by a headline followed by another eos if there
# is enough space. The labels match only the second half and the first label matches
# the eos at the start of the second half (following the description in the first half)
maxlend = 100
maxlenh = 15
maxlen = maxlend + maxlenh
activation_rnn_size = 40 if maxlend else 0
nb_unknown_words = 10

# function names
FN0 = 'vocabulary-embedding'  # filename of vocab embeddings
FN1 = 'train'  # filename of model weights

# training variables
seed = 42
optimizer = 'adam'
p_W, p_U, p_dense, p_emb, weight_decay = 0, 0, 0, 0, 0
regularizer = None


In [27]:
import os
import time
import random
import argparse
import json


import numpy as np
from keras.callbacks import TensorBoard

import config
from sample_gen import gensamples
from utils import prt, load_embedding, process_vocab, load_split_data
from model import create_model, inspect_model
from generate import gen
#from constants import FN1, seed, nb_unknown_words

# parse arguments
parser = argparse.ArgumentParser()
parser.add_argument('--batch-size', type=int, default=32, help='input batch size')
parser.add_argument('--epochs', type=int, default=10, help='number of epochs')
parser.add_argument('--rnn-size', type=int, default=512, help='size of RNN layers')
parser.add_argument('--rnn-layers', type=int, default=3, help='number of RNN layers')
parser.add_argument('--nsamples', type=int, default=640, help='number of samples per epoch')
parser.add_argument('--nflips', type=int, default=0, help='number of flips')
parser.add_argument('--temperature', type=float, default=.8, help='RNN temperature')
parser.add_argument('--lr', type=float, default=0.0001, help='learning rate, default=0.0001')
parser.add_argument('--warm-start', action='store_true')
args = parser.parse_args()
batch_size = args.batch_size

# set sample sizes
nb_train_samples = np.int(np.floor(args.nsamples / batch_size)) * batch_size  # num training samples
nb_val_samples = nb_train_samples  # num validation samples

# seed weight initialization
random.seed(seed)
np.random.seed(seed)

embedding, idx2word, word2idx, glove_idx2idx = load_embedding(nb_unknown_words)
vocab_size, embedding_size = embedding.shape
oov0 = vocab_size - nb_unknown_words
idx2word = process_vocab(idx2word, vocab_size, oov0, nb_unknown_words)
X_train, X_test, Y_train, Y_test = load_split_data(nb_val_samples, seed)

# print a sample recipe to make sure everything looks right
print('Random head, description:')
i = 811
prt('H', Y_train[i], idx2word)
prt('D', X_train[i], idx2word)

# save model initialization parameters
model_params = (dict(
    vocab_size=vocab_size,
    embedding_size=embedding_size,
    LR=args.lr,
    rnn_layers=args.rnn_layers,
    rnn_size=args.rnn_size,
))
with open(os.path.join(config.path_models, 'model_params.json'), 'w') as f:
    json.dump(model_params, f)


model = create_model(
    vocab_size=vocab_size,
    embedding_size=embedding_size,
    LR=args.lr,
    embedding=embedding,
    rnn_layers=args.rnn_layers,
    rnn_size=args.rnn_size,
)
inspect_model(model)

# load pre-trained model weights
FN1_filename = os.path.join(config.path_models, '{}.hdf5'.format(FN1))
if args.warm_start and FN1 and os.path.exists(FN1_filename):
    model.load_weights(FN1_filename)
    print('Model weights loaded from {}'.format(FN1_filename))

# print samples before training
gensamples(
    skips=2,
    k=10,
    batch_size=batch_size,
    short=False,
    temperature=args.temperature,
    use_unk=True,
    model=model,
    data=(X_test, Y_test),
    idx2word=idx2word,
    oov0=oov0,
    glove_idx2idx=glove_idx2idx,
    vocab_size=vocab_size,
    nb_unknown_words=nb_unknown_words,
)

# get train and validation generators
r = next(gen(X_train, Y_train, batch_size=batch_size, nb_batches=None, nflips=None, model=None, debug=False, oov0=oov0, glove_idx2idx=glove_idx2idx, vocab_size=vocab_size, nb_unknown_words=nb_unknown_words, idx2word=idx2word))
traingen = gen(X_train, Y_train, batch_size=batch_size, nb_batches=None, nflips=args.nflips, model=model, debug=False, oov0=oov0, glove_idx2idx=glove_idx2idx, vocab_size=vocab_size, nb_unknown_words=nb_unknown_words, idx2word=idx2word)
valgen = gen(X_test, Y_test, batch_size=batch_size, nb_batches=nb_val_samples // batch_size, nflips=None, model=None, debug=False, oov0=oov0, glove_idx2idx=glove_idx2idx, vocab_size=vocab_size, nb_unknown_words=nb_unknown_words, idx2word=idx2word)

# define callbacks for training
callbacks = [TensorBoard(
    log_dir=os.path.join(config.path_logs, str(time.time())),
    histogram_freq=2, write_graph=False, write_images=False)]

# train model and save weights
h = model.fit_generator(
    traingen, samples_per_epoch=nb_train_samples,
    nb_epoch=args.epochs, validation_data=valgen, nb_val_samples=nb_val_samples,
    callbacks=callbacks,
)
model.save_weights(FN1_filename, overwrite=True)

# print samples after training
gensamples(
    skips=2,
    k=10,
    batch_size=batch_size,
    short=False,
    temperature=args.temperature,
    use_unk=True,
    model=model,
    data=(X_test, Y_test),
    idx2word=idx2word,
    oov0=oov0,
    glove_idx2idx=glove_idx2idx,
    vocab_size=vocab_size,
    nb_unknown_words=nb_unknown_words,
)

ImportError: cannot import name 'empty'