# Seq2seq Language Translation - Keras

In [0]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
print('TensorFlow Version: {}'.format(tf.__version__))

Found GPU at: /device:GPU:0
TensorFlow Version: 1.10.1


In [0]:
import os
import copy
import pickle
import numpy as np
import pandas as pd

## Downloading the dataset

In [0]:
# from google.colab import drive
# drive.mount('/content/gdrive')

DIRNAME = 'gdrive/My Drive/Colab Notebooks/'

## Loading the data and preprocessing

In [0]:
# Utility functions
CODES = {'<PAD>': 0, '<EOS>': 1, '<UNK>': 2, '<GO>': 3 }


def load_data(path):
    """
    Load Dataset from File
    """
    input_file = os.path.join(path)
    with open(input_file, 'r', encoding='utf-8') as f:
        return f.read()


def preprocess_and_save_data(source_path, target_path, text_to_ids):
    """
    Preprocess Text Data.  Save to to file.
    """
    # Preprocess
    source_text = load_data(source_path)
    target_text = load_data(target_path)

    source_text = source_text.lower()
    target_text = target_text.lower()

    source_vocab_to_int, source_int_to_vocab = create_lookup_tables(source_text)
    target_vocab_to_int, target_int_to_vocab = create_lookup_tables(target_text)

    source_text, target_text = text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int)

    # Save Data
    with open('preprocess.p', 'wb') as out_file:
        pickle.dump((
            (source_text, target_text),
            (source_vocab_to_int, target_vocab_to_int),
            (source_int_to_vocab, target_int_to_vocab)), out_file)


def load_preprocess():
    """
    Load the Preprocessed Training data and return them in batches of <batch_size> or less
    """
    with open('preprocess.p', mode='rb') as in_file:
        return pickle.load(in_file)


def create_lookup_tables(text):
    """
    Create lookup tables for vocabulary
    """
    vocab = set(text.split())
    vocab_to_int = copy.copy(CODES)

    for v_i, v in enumerate(vocab, len(CODES)):
        vocab_to_int[v] = v_i

    int_to_vocab = {v_i: v for v, v_i in vocab_to_int.items()}

    return vocab_to_int, int_to_vocab


def save_params(params):
    """
    Save parameters to file
    """
    with open('params.p', 'wb') as out_file:
        pickle.dump(params, out_file)


def load_params():
    """
    Load parameters from file
    """
    with open('params.p', mode='rb') as in_file:
        return pickle.load(in_file)


def batch_data(source, target, batch_size):
    """
    Batch source and target together
    """
    for batch_i in range(0, len(source)//batch_size):
        start_i = batch_i * batch_size
        source_batch = source[start_i:start_i + batch_size]
        target_batch = target[start_i:start_i + batch_size]
        yield np.array(pad_sentence_batch(source_batch)), np.array(pad_sentence_batch(target_batch))


def pad_sentence_batch(sentence_batch):
    """
    Pad sentence with <PAD> id
    """
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [CODES['<PAD>']] * (max_sentence - len(sentence))
            for sentence in sentence_batch]

In [0]:
source_path = DIRNAME + 'data/small_vocab_en.txt'
target_path = DIRNAME + 'data/small_vocab_fr.txt'
source_text = load_data(source_path)
target_text = load_data(target_path)

In [0]:
view_sentence_range = (0, 10)

print('Approximate number of unique words: {}'.format(len({word: None for word in source_text.split()})))

sentences = source_text.split('\n')
word_counts = [len(sentence.split()) for sentence in sentences]
print('Number of sentences: {}'.format(len(sentences)))
print('Average number of words in a sentence: {}'.format(np.average(word_counts)))

print()
print('English sentences {} to {}:'.format(*view_sentence_range))
print('\n'.join(source_text.split('\n')[view_sentence_range[0]:view_sentence_range[1]]))
print()
print('French sentences {} to {}:'.format(*view_sentence_range))
print('\n'.join(target_text.split('\n')[view_sentence_range[0]:view_sentence_range[1]]))

Approximate number of unique words: 227
Number of sentences: 137861
Average number of words in a sentence: 13.225277634719028

English sentences 0 to 10:
new jersey is sometimes quiet during autumn , and it is snowy in april .
the united states is usually chilly during july , and it is usually freezing in november .
california is usually quiet during march , and it is usually hot in june .
the united states is sometimes mild during june , and it is cold in september .
your least liked fruit is the grape , but my least liked is the apple .
his favorite fruit is the orange , but my favorite is the grape .
paris is relaxing during december , but it is usually chilly in july .
new jersey is busy during spring , and it is never hot in march .
our least liked fruit is the lemon , but my least liked is the grape .
the united states is sometimes busy during january , and it is sometimes warm in november .

French sentences 0 to 10:
new jersey est parfois calme pendant l' automne , et il est ne

In [0]:
def text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int):
    """
    Convert source and target text to proper word ids. The <EOS> word id added at the end of target_text to help
    the neural network to predict when the sentence should end.
    :param source_text: String that contains all the source text.
    :param target_text: String that contains all the target text.
    :param source_vocab_to_int: Dictionary to go from the source words to an id
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :return: A tuple of lists (source_id_text, target_id_text)
    """
    source_sentences = source_text.lower().split('\n')
    source_text_split = [sentence.split() for sentence in source_sentences]
    source_ids = [[source_vocab_to_int[word] for word in sentence] for sentence in source_text_split]
    target_sentences = target_text.lower().split('\n')
    target_text_split = [sentence.split() + ['<EOS>'] for sentence in target_sentences]
    target_ids = [[target_vocab_to_int[word] for word in sentence] for sentence in target_text_split]
    return source_ids, target_ids

preprocess_and_save_data(source_path, target_path, text_to_ids)

## Checkpoint

In [0]:
(source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = load_preprocess()

In [0]:
len(source_int_text), len(target_int_text)

(137861, 137861)

## Creating network

In [0]:
def model_inputs():
    """
    Create TF Placeholders for input, targets, learning rate, and lengths of source and target sequences.
    :return: Tuple (input, targets, learning rate, keep probability, target sequence length,
    max target sequence length, source sequence length)
    """
    inputs = tf.placeholder(tf.int32, [None, None], name='input')
    targets = tf.placeholder(tf.int32, [None, None], name='targets')
    learning_rate = tf.placeholder(tf.float32, [], name='learning_rate')
    keep_prob = tf.placeholder(tf.float32, [], name='keep_prob')
    target_sequence_length = tf.placeholder(tf.int32, [None], name='target_sequence_length')
    max_target_len = tf.reduce_max(target_sequence_length)
    source_sequence_length = tf.placeholder(tf.int32, [None], name='source_sequence_length')
    return inputs, targets, learning_rate, keep_prob, target_sequence_length, max_target_len, source_sequence_length


def process_decoder_input(target_data, target_vocab_to_int, batch_size):
    """
    Preprocess target data for encoding by removing the last word id from each batch in target_data 
    and concat the GO ID to the beginning of each batch.
    :param target_data: Target Placeholder
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :param batch_size: Batch Size
    :return: Preprocessed target data
    """
    go_id = tf.constant(target_vocab_to_int['<GO>'], shape=(batch_size,1), dtype=tf.int32)
    processed_data = tf.concat([go_id, target_data[:,:-1]], axis=1)
    return processed_data

In [0]:
def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob, 
                   source_sequence_length, source_vocab_size, 
                   encoding_embedding_size):
    """
    Create encoding layer
    :param rnn_inputs: Inputs for the RNN
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param keep_prob: Dropout keep probability
    :param source_sequence_length: a list of the lengths of each sequence in the batch
    :param source_vocab_size: vocabulary size of source data
    :param encoding_embedding_size: embedding size of source data
    :return: tuple (RNN output, RNN state)
    """
    # Embed the encoder input
    embed = tf.contrib.layers.embed_sequence(rnn_inputs, vocab_size=source_vocab_size,
                                             embed_dim=encoding_embedding_size)

    # RNN cell
    def make_cell(rnn_size, keep_prob):
        # LSTM cell
        lstm = tf.contrib.rnn.LSTMCell(rnn_size)
        # Add dropout to the cell
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return drop
    
    # Stack up multiple LSTM layers, for deep learning
    cell = tf.contrib.rnn.MultiRNNCell([make_cell(rnn_size, keep_prob) for _ in range(num_layers)])

    # Pass cell and embedded input to tf.nn.dynamic.rnn
    output, final_state = tf.nn.dynamic_rnn(cell, embed, sequence_length=source_sequence_length, dtype=tf.float32)
    return output, final_state

In [0]:
def decoding_layer_train(encoder_state, dec_cell, dec_embed_input, 
                         target_sequence_length, max_summary_length, 
                         output_layer, keep_prob):
    """
    Create a decoding layer for training
    :param encoder_state: Encoder State
    :param dec_cell: Decoder RNN Cell
    :param dec_embed_input: Decoder embedded input
    :param target_sequence_length: The lengths of each sequence in the target batch
    :param max_summary_length: The length of the longest sequence in the batch
    :param output_layer: Function to apply the output layer
    :param keep_prob: Dropout keep probability
    :return: BasicDecoderOutput containing training logits and sample_id
    """
    training_helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input, target_sequence_length)
    basic_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, training_helper, encoder_state, output_layer)
    basic_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder, 
                                                                   maximum_iterations=max_summary_length)
    return basic_decoder_output

In [0]:
def decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id,
                         end_of_sequence_id, max_target_sequence_length,
                         vocab_size, output_layer, batch_size, keep_prob):
    """
    Create a decoding layer for inference
    :param encoder_state: Encoder state
    :param dec_cell: Decoder RNN Cell
    :param dec_embeddings: Decoder embeddings
    :param start_of_sequence_id: GO ID
    :param end_of_sequence_id: EOS ID
    :param max_target_sequence_length: Maximum length of target sequences
    :param vocab_size: Size of decoder/target vocabulary
    :param decoding_scope: TensorFlow Variable Scope for decoding
    :param output_layer: Function to apply the output layer
    :param batch_size: Batch size
    :param keep_prob: Dropout keep probability
    :return: BasicDecoderOutput containing inference logits and sample_id
    """
    start_tokens = tf.tile(tf.constant([start_of_sequence_id], dtype=tf.int32), 
                           [batch_size], name='start_tokens')
    training_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(dec_embeddings, start_tokens, end_of_sequence_id)
    inference_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, training_helper, encoder_state, output_layer)
    inference_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder, 
                                                                       maximum_iterations=max_target_sequence_length)
    return inference_decoder_output

In [0]:
def decoding_layer(dec_input, encoder_state,
                   target_sequence_length, max_target_sequence_length,
                   rnn_size,
                   num_layers, target_vocab_to_int, target_vocab_size,
                   batch_size, keep_prob, decoding_embedding_size):
    """
    Create decoding layer
    :param dec_input: Decoder input
    :param encoder_state: Encoder state
    :param target_sequence_length: The lengths of each sequence in the target batch
    :param max_target_sequence_length: Maximum length of target sequences
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :param target_vocab_size: Size of target vocabulary
    :param batch_size: The size of the batch
    :param keep_prob: Dropout keep probability
    :param decoding_embedding_size: Decoding embedding size
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    # 1. Decoder Embedding
    dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)

    # 2. Construct the decoder cell
    def make_cell(rnn_size, keep_prob):
        # LSTM cell
        lstm = tf.contrib.rnn.LSTMCell(rnn_size)
        # Add dropout to the cell
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return drop
    
    # Stack up multiple LSTM layers, for deep learning
    dec_cell = tf.contrib.rnn.MultiRNNCell([make_cell(rnn_size, keep_prob) for _ in range(num_layers)])

    # 3. Dense layer to translate the decoder's output at each time 
    # step into a choice from the target vocabulary
    output_layer = tf.layers.Dense(
        target_vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))
    
    with tf.variable_scope('decode'):
        tr_decoder_output = decoding_layer_train(encoder_state, dec_cell, dec_embed_input, 
                                                 target_sequence_length, max_target_sequence_length, 
                                                 output_layer, keep_prob)
        start_of_sequence_id = target_vocab_to_int['<GO>']
        end_of_sequence_id = target_vocab_to_int['<EOS>']
        inf_decoder_output = decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id,
                                                  end_of_sequence_id, max_target_sequence_length,
                                                  target_vocab_size, output_layer, batch_size, keep_prob)
    return tr_decoder_output, inf_decoder_output

## Model

In [0]:
def seq2seq_model(input_data, target_data, keep_prob, batch_size,
                  source_sequence_length, target_sequence_length,
                  max_target_sentence_length,
                  source_vocab_size, target_vocab_size,
                  enc_embedding_size, dec_embedding_size,
                  rnn_size, num_layers, target_vocab_to_int):
    """
    Build the Sequence-to-Sequence part of the neural network
    :param input_data: Input placeholder
    :param target_data: Target placeholder
    :param keep_prob: Dropout keep probability placeholder
    :param batch_size: Batch Size
    :param source_sequence_length: Sequence Lengths of source sequences in the batch
    :param target_sequence_length: Sequence Lengths of target sequences in the batch
    :param source_vocab_size: Source vocabulary size
    :param target_vocab_size: Target vocabulary size
    :param enc_embedding_size: Decoder embedding size
    :param dec_embedding_size: Encoder embedding size
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    # Pass the input data through the encoder. We'll ignore the encoder output, but use the state
    _, enc_state = encoding_layer(input_data, rnn_size, num_layers, keep_prob,  
                                  source_sequence_length, source_vocab_size, 
                                  enc_embedding_size)
    
    # Prepare the target sequences we'll feed to the decoder in training mode
    dec_input = process_decoder_input(target_data, target_vocab_to_int, batch_size)
    
    # Pass encoder state and decoder inputs to the decoders
    tr_decoder_output, inf_decoder_output = decoding_layer(dec_input, enc_state, target_sequence_length, 
                                                           max_target_sentence_length, rnn_size, num_layers, 
                                                           target_vocab_to_int, target_vocab_size, 
                                                           batch_size, keep_prob, 
                                                           dec_embedding_size)
    
    return tr_decoder_output, inf_decoder_output

## Train

In [0]:
# Number of Epochs
epochs = 10
# Batch Size
batch_size = 512
# RNN Size
rnn_size = 128
# Number of Layers
num_layers = 2
# Embedding Size
encoding_embedding_size = 128
decoding_embedding_size = 128
# Learning Rate
learning_rate = 0.001
# Dropout Keep Probability
keep_probability = 0.55
display_step = True

In [0]:
# %mkdir checkpoints/

In [0]:
# save_path = 'checkpoints/dev'
(source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = load_preprocess()
max_target_sentence_length = max([len(sentence) for sentence in source_int_text])

train_graph = tf.Graph()
with train_graph.as_default():
    input_data, targets, lr, keep_prob, target_sequence_length, max_target_sequence_length, source_sequence_length = model_inputs()

    #sequence_length = tf.placeholder_with_default(max_target_sentence_length, None, name='sequence_length')
    input_shape = tf.shape(input_data)

    train_logits, inference_logits = seq2seq_model(tf.reverse(input_data, [-1]),
                                                   targets,
                                                   keep_prob,
                                                   batch_size,
                                                   source_sequence_length,
                                                   target_sequence_length,
                                                   max_target_sequence_length,
                                                   len(source_vocab_to_int),
                                                   len(target_vocab_to_int),
                                                   encoding_embedding_size,
                                                   decoding_embedding_size,
                                                   rnn_size,
                                                   num_layers,
                                                   target_vocab_to_int)


    training_logits = tf.identity(train_logits.rnn_output, name='logits')
    inference_logits = tf.identity(inference_logits.sample_id, name='predictions')

    masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')

    with tf.name_scope("optimization"):
        # Loss function
        cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks)

        # Optimizer
        optimizer = tf.train.AdamOptimizer(lr)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

In [0]:
def pad_sentence_batch(sentence_batch, pad_int):
    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [pad_int] * (max_sentence - len(sentence)) for sentence in sentence_batch]


def get_batches(sources, targets, batch_size, source_pad_int, target_pad_int):
    """Batch targets, sources, and the lengths of their sentences together"""
    for batch_i in range(0, len(sources)//batch_size):
        start_i = batch_i * batch_size

        # Slice the right amount for the batch
        sources_batch = sources[start_i:start_i + batch_size]
        targets_batch = targets[start_i:start_i + batch_size]

        # Pad
        pad_sources_batch = np.array(pad_sentence_batch(sources_batch, source_pad_int))
        pad_targets_batch = np.array(pad_sentence_batch(targets_batch, target_pad_int))

        # Need the lengths for the _lengths parameters
        pad_targets_lengths = []
        for target in pad_targets_batch:
            pad_targets_lengths.append(len(target))

        pad_source_lengths = []
        for source in pad_sources_batch:
            pad_source_lengths.append(len(source))

        yield pad_sources_batch, pad_targets_batch, pad_source_lengths, pad_targets_lengths

In [0]:
train_source = source_int_text[batch_size:]
train_target = target_int_text[batch_size:]
valid_source = source_int_text[:batch_size]
valid_target = target_int_text[:batch_size]
(valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths) = \
    next(get_batches(valid_source,
                     valid_target,
                     batch_size,
                     source_vocab_to_int['<PAD>'],
                     target_vocab_to_int['<PAD>']))

In [0]:
def get_accuracy(target, logits):
    """
    Calculate accuracy
    """
    max_seq = max(target.shape[1], logits.shape[1])
    if max_seq - target.shape[1]:
        target = np.pad(
            target,
            [(0,0),(0,max_seq - target.shape[1])],
            'constant')
    if max_seq - logits.shape[1]:
        logits = np.pad(
            logits,
            [(0,0),(0,max_seq - logits.shape[1])],
            'constant')

    return np.mean(np.equal(target, logits))


with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())

    for epoch_i in range(epochs):
        for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate(
                get_batches(train_source, train_target, batch_size,
                            source_vocab_to_int['<PAD>'],
                            target_vocab_to_int['<PAD>'])):

            _, loss = sess.run(
                [train_op, cost],
                {input_data: source_batch,
                 targets: target_batch,
                 lr: learning_rate,
                 target_sequence_length: targets_lengths,
                 source_sequence_length: sources_lengths,
                 keep_prob: keep_probability})

            if batch_i % display_step == 0 and batch_i > 0:
                
                batch_train_logits = sess.run(
                    inference_logits,
                    {input_data: source_batch,
                     source_sequence_length: sources_lengths,
                     target_sequence_length: targets_lengths,
                     keep_prob: 1.0})

                batch_valid_logits = sess.run(
                    inference_logits,
                    {input_data: valid_sources_batch,
                     source_sequence_length: valid_sources_lengths,
                     target_sequence_length: valid_targets_lengths,
                     keep_prob: 1.0})

                train_acc = get_accuracy(target_batch, batch_train_logits)

                valid_acc = get_accuracy(valid_targets_batch, batch_valid_logits)

                print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, Loss: {:>6.4f}'
                      .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss))

    # Save Model
    saver = tf.train.Saver()
    saver.save(sess, save_path)
    print('Model Trained and Saved')

Epoch   0 Batch    1/269 - Train Accuracy: 0.2329, Validation Accuracy: 0.3097, Loss: 5.7307
Epoch   0 Batch    2/269 - Train Accuracy: 0.2655, Validation Accuracy: 0.3096, Loss: 5.5308
Epoch   0 Batch    3/269 - Train Accuracy: 0.2444, Validation Accuracy: 0.3096, Loss: 5.3812
Epoch   0 Batch    4/269 - Train Accuracy: 0.2317, Validation Accuracy: 0.3096, Loss: 5.2445
Epoch   0 Batch    5/269 - Train Accuracy: 0.2324, Validation Accuracy: 0.3096, Loss: 5.0761
Epoch   0 Batch    6/269 - Train Accuracy: 0.2785, Validation Accuracy: 0.3096, Loss: 4.7712
Epoch   0 Batch    7/269 - Train Accuracy: 0.2765, Validation Accuracy: 0.3096, Loss: 4.6169
Epoch   0 Batch    8/269 - Train Accuracy: 0.2531, Validation Accuracy: 0.3212, Loss: 4.6467
Epoch   0 Batch    9/269 - Train Accuracy: 0.2827, Validation Accuracy: 0.3248, Loss: 4.4119
Epoch   0 Batch   10/269 - Train Accuracy: 0.2664, Validation Accuracy: 0.3404, Loss: 4.4268
Epoch   0 Batch   11/269 - Train Accuracy: 0.3055, Validation Accuracy

Epoch   0 Batch   91/269 - Train Accuracy: 0.4642, Validation Accuracy: 0.4877, Loss: 2.4165
Epoch   0 Batch   92/269 - Train Accuracy: 0.4452, Validation Accuracy: 0.4711, Loss: 2.4022
Epoch   0 Batch   93/269 - Train Accuracy: 0.4801, Validation Accuracy: 0.4881, Loss: 2.3428
Epoch   0 Batch   94/269 - Train Accuracy: 0.4619, Validation Accuracy: 0.4766, Loss: 2.3927
Epoch   0 Batch   95/269 - Train Accuracy: 0.4681, Validation Accuracy: 0.4871, Loss: 2.3975
Epoch   0 Batch   96/269 - Train Accuracy: 0.4488, Validation Accuracy: 0.4786, Loss: 2.3740
Epoch   0 Batch   97/269 - Train Accuracy: 0.4594, Validation Accuracy: 0.4842, Loss: 2.3805
Epoch   0 Batch   98/269 - Train Accuracy: 0.4799, Validation Accuracy: 0.4861, Loss: 2.3209
Epoch   0 Batch   99/269 - Train Accuracy: 0.4396, Validation Accuracy: 0.4933, Loss: 2.4860
Epoch   0 Batch  100/269 - Train Accuracy: 0.4657, Validation Accuracy: 0.4756, Loss: 2.2885
Epoch   0 Batch  101/269 - Train Accuracy: 0.4299, Validation Accuracy

Epoch   0 Batch  181/269 - Train Accuracy: 0.4638, Validation Accuracy: 0.4912, Loss: 1.7331
Epoch   0 Batch  182/269 - Train Accuracy: 0.4735, Validation Accuracy: 0.5004, Loss: 1.7368
Epoch   0 Batch  183/269 - Train Accuracy: 0.5379, Validation Accuracy: 0.4996, Loss: 1.5001
Epoch   0 Batch  184/269 - Train Accuracy: 0.4315, Validation Accuracy: 0.4878, Loss: 1.7926
Epoch   0 Batch  185/269 - Train Accuracy: 0.4865, Validation Accuracy: 0.5024, Loss: 1.6939
Epoch   0 Batch  186/269 - Train Accuracy: 0.4434, Validation Accuracy: 0.4978, Loss: 1.7774
Epoch   0 Batch  187/269 - Train Accuracy: 0.4721, Validation Accuracy: 0.4930, Loss: 1.6635
Epoch   0 Batch  188/269 - Train Accuracy: 0.4771, Validation Accuracy: 0.4972, Loss: 1.6387
Epoch   0 Batch  189/269 - Train Accuracy: 0.4730, Validation Accuracy: 0.4915, Loss: 1.6727
Epoch   0 Batch  190/269 - Train Accuracy: 0.4650, Validation Accuracy: 0.4949, Loss: 1.6691
Epoch   0 Batch  191/269 - Train Accuracy: 0.4694, Validation Accuracy

Epoch   1 Batch    4/269 - Train Accuracy: 0.4011, Validation Accuracy: 0.4687, Loss: 1.4176
Epoch   1 Batch    5/269 - Train Accuracy: 0.4021, Validation Accuracy: 0.4705, Loss: 1.4386
Epoch   1 Batch    6/269 - Train Accuracy: 0.4550, Validation Accuracy: 0.4681, Loss: 1.3124
Epoch   1 Batch    7/269 - Train Accuracy: 0.4459, Validation Accuracy: 0.4675, Loss: 1.3567
Epoch   1 Batch    8/269 - Train Accuracy: 0.4335, Validation Accuracy: 0.4894, Loss: 1.4114
Epoch   1 Batch    9/269 - Train Accuracy: 0.4229, Validation Accuracy: 0.4694, Loss: 1.3611
Epoch   1 Batch   10/269 - Train Accuracy: 0.4194, Validation Accuracy: 0.4711, Loss: 1.3800
Epoch   1 Batch   11/269 - Train Accuracy: 0.4437, Validation Accuracy: 0.4729, Loss: 1.3598
Epoch   1 Batch   12/269 - Train Accuracy: 0.4073, Validation Accuracy: 0.4643, Loss: 1.4076
Epoch   1 Batch   13/269 - Train Accuracy: 0.4941, Validation Accuracy: 0.4835, Loss: 1.2558
Epoch   1 Batch   14/269 - Train Accuracy: 0.4263, Validation Accuracy

Epoch   1 Batch   94/269 - Train Accuracy: 0.5218, Validation Accuracy: 0.5305, Loss: 1.1412
Epoch   1 Batch   95/269 - Train Accuracy: 0.5121, Validation Accuracy: 0.5382, Loss: 1.1212
Epoch   1 Batch   96/269 - Train Accuracy: 0.5243, Validation Accuracy: 0.5431, Loss: 1.1055
Epoch   1 Batch   97/269 - Train Accuracy: 0.5166, Validation Accuracy: 0.5389, Loss: 1.1188
Epoch   1 Batch   98/269 - Train Accuracy: 0.5406, Validation Accuracy: 0.5380, Loss: 1.0988
Epoch   1 Batch   99/269 - Train Accuracy: 0.4985, Validation Accuracy: 0.5365, Loss: 1.1737
Epoch   1 Batch  100/269 - Train Accuracy: 0.5382, Validation Accuracy: 0.5431, Loss: 1.0833
Epoch   1 Batch  101/269 - Train Accuracy: 0.5006, Validation Accuracy: 0.5455, Loss: 1.1705
Epoch   1 Batch  102/269 - Train Accuracy: 0.5310, Validation Accuracy: 0.5467, Loss: 1.0958
Epoch   1 Batch  103/269 - Train Accuracy: 0.5289, Validation Accuracy: 0.5492, Loss: 1.1016
Epoch   1 Batch  104/269 - Train Accuracy: 0.5238, Validation Accuracy

Epoch   1 Batch  184/269 - Train Accuracy: 0.5319, Validation Accuracy: 0.5588, Loss: 0.9786
Epoch   1 Batch  185/269 - Train Accuracy: 0.5607, Validation Accuracy: 0.5576, Loss: 0.9454
Epoch   1 Batch  186/269 - Train Accuracy: 0.5174, Validation Accuracy: 0.5591, Loss: 0.9777
Epoch   1 Batch  187/269 - Train Accuracy: 0.5570, Validation Accuracy: 0.5626, Loss: 0.9202
Epoch   1 Batch  188/269 - Train Accuracy: 0.5565, Validation Accuracy: 0.5628, Loss: 0.9183
Epoch   1 Batch  189/269 - Train Accuracy: 0.5547, Validation Accuracy: 0.5646, Loss: 0.9328
Epoch   1 Batch  190/269 - Train Accuracy: 0.5421, Validation Accuracy: 0.5599, Loss: 0.9252
Epoch   1 Batch  191/269 - Train Accuracy: 0.5379, Validation Accuracy: 0.5594, Loss: 0.9360
Epoch   1 Batch  192/269 - Train Accuracy: 0.5516, Validation Accuracy: 0.5603, Loss: 0.9345
Epoch   1 Batch  193/269 - Train Accuracy: 0.5464, Validation Accuracy: 0.5631, Loss: 0.9346
Epoch   1 Batch  194/269 - Train Accuracy: 0.5546, Validation Accuracy

Epoch   2 Batch    7/269 - Train Accuracy: 0.5621, Validation Accuracy: 0.5721, Loss: 0.8265
Epoch   2 Batch    8/269 - Train Accuracy: 0.5397, Validation Accuracy: 0.5716, Loss: 0.8736
Epoch   2 Batch    9/269 - Train Accuracy: 0.5429, Validation Accuracy: 0.5679, Loss: 0.8479
Epoch   2 Batch   10/269 - Train Accuracy: 0.5428, Validation Accuracy: 0.5673, Loss: 0.8506
Epoch   2 Batch   11/269 - Train Accuracy: 0.5459, Validation Accuracy: 0.5715, Loss: 0.8466
Epoch   2 Batch   12/269 - Train Accuracy: 0.5229, Validation Accuracy: 0.5709, Loss: 0.8711
Epoch   2 Batch   13/269 - Train Accuracy: 0.5863, Validation Accuracy: 0.5718, Loss: 0.7766
Epoch   2 Batch   14/269 - Train Accuracy: 0.5632, Validation Accuracy: 0.5740, Loss: 0.8271
Epoch   2 Batch   15/269 - Train Accuracy: 0.5495, Validation Accuracy: 0.5785, Loss: 0.8209
Epoch   2 Batch   16/269 - Train Accuracy: 0.5738, Validation Accuracy: 0.5787, Loss: 0.8299
Epoch   2 Batch   17/269 - Train Accuracy: 0.5595, Validation Accuracy

Epoch   2 Batch   97/269 - Train Accuracy: 0.5870, Validation Accuracy: 0.5914, Loss: 0.7514
Epoch   2 Batch   98/269 - Train Accuracy: 0.6049, Validation Accuracy: 0.5941, Loss: 0.7503
Epoch   2 Batch   99/269 - Train Accuracy: 0.5809, Validation Accuracy: 0.6001, Loss: 0.7855
Epoch   2 Batch  100/269 - Train Accuracy: 0.6164, Validation Accuracy: 0.6012, Loss: 0.7461
Epoch   2 Batch  101/269 - Train Accuracy: 0.5670, Validation Accuracy: 0.5971, Loss: 0.7973
Epoch   2 Batch  102/269 - Train Accuracy: 0.6003, Validation Accuracy: 0.5959, Loss: 0.7445
Epoch   2 Batch  103/269 - Train Accuracy: 0.5926, Validation Accuracy: 0.5961, Loss: 0.7495
Epoch   2 Batch  104/269 - Train Accuracy: 0.5762, Validation Accuracy: 0.5915, Loss: 0.7484
Epoch   2 Batch  105/269 - Train Accuracy: 0.5960, Validation Accuracy: 0.5962, Loss: 0.7625
Epoch   2 Batch  106/269 - Train Accuracy: 0.5995, Validation Accuracy: 0.5945, Loss: 0.7470
Epoch   2 Batch  107/269 - Train Accuracy: 0.5688, Validation Accuracy

Epoch   2 Batch  187/269 - Train Accuracy: 0.6145, Validation Accuracy: 0.6163, Loss: 0.6850
Epoch   2 Batch  188/269 - Train Accuracy: 0.6174, Validation Accuracy: 0.6146, Loss: 0.6827
Epoch   2 Batch  189/269 - Train Accuracy: 0.6124, Validation Accuracy: 0.6194, Loss: 0.6831
Epoch   2 Batch  190/269 - Train Accuracy: 0.6174, Validation Accuracy: 0.6180, Loss: 0.6888
Epoch   2 Batch  191/269 - Train Accuracy: 0.6180, Validation Accuracy: 0.6171, Loss: 0.6940
Epoch   2 Batch  192/269 - Train Accuracy: 0.6223, Validation Accuracy: 0.6146, Loss: 0.7000
Epoch   2 Batch  193/269 - Train Accuracy: 0.6021, Validation Accuracy: 0.6126, Loss: 0.7028
Epoch   2 Batch  194/269 - Train Accuracy: 0.6221, Validation Accuracy: 0.6159, Loss: 0.7085
Epoch   2 Batch  195/269 - Train Accuracy: 0.6003, Validation Accuracy: 0.6137, Loss: 0.7069
Epoch   2 Batch  196/269 - Train Accuracy: 0.5954, Validation Accuracy: 0.6153, Loss: 0.6995
Epoch   2 Batch  197/269 - Train Accuracy: 0.5800, Validation Accuracy

Epoch   3 Batch   10/269 - Train Accuracy: 0.6170, Validation Accuracy: 0.6364, Loss: 0.6742
Epoch   3 Batch   11/269 - Train Accuracy: 0.6194, Validation Accuracy: 0.6322, Loss: 0.6669
Epoch   3 Batch   12/269 - Train Accuracy: 0.6022, Validation Accuracy: 0.6327, Loss: 0.6911
Epoch   3 Batch   13/269 - Train Accuracy: 0.6481, Validation Accuracy: 0.6318, Loss: 0.6114
Epoch   3 Batch   14/269 - Train Accuracy: 0.6187, Validation Accuracy: 0.6336, Loss: 0.6459
Epoch   3 Batch   15/269 - Train Accuracy: 0.6123, Validation Accuracy: 0.6314, Loss: 0.6441
Epoch   3 Batch   16/269 - Train Accuracy: 0.6376, Validation Accuracy: 0.6357, Loss: 0.6552
Epoch   3 Batch   17/269 - Train Accuracy: 0.6248, Validation Accuracy: 0.6330, Loss: 0.6453
Epoch   3 Batch   18/269 - Train Accuracy: 0.6068, Validation Accuracy: 0.6319, Loss: 0.6675
Epoch   3 Batch   19/269 - Train Accuracy: 0.6546, Validation Accuracy: 0.6341, Loss: 0.6160
Epoch   3 Batch   20/269 - Train Accuracy: 0.6128, Validation Accuracy

Epoch   3 Batch  100/269 - Train Accuracy: 0.6656, Validation Accuracy: 0.6516, Loss: 0.6004
Epoch   3 Batch  101/269 - Train Accuracy: 0.6070, Validation Accuracy: 0.6559, Loss: 0.6460
Epoch   3 Batch  102/269 - Train Accuracy: 0.6399, Validation Accuracy: 0.6531, Loss: 0.6049
Epoch   3 Batch  103/269 - Train Accuracy: 0.6443, Validation Accuracy: 0.6499, Loss: 0.6003
Epoch   3 Batch  104/269 - Train Accuracy: 0.6334, Validation Accuracy: 0.6474, Loss: 0.6033
Epoch   3 Batch  105/269 - Train Accuracy: 0.6322, Validation Accuracy: 0.6507, Loss: 0.6191
Epoch   3 Batch  106/269 - Train Accuracy: 0.6319, Validation Accuracy: 0.6482, Loss: 0.6071
Epoch   3 Batch  107/269 - Train Accuracy: 0.6028, Validation Accuracy: 0.6486, Loss: 0.6390
Epoch   3 Batch  108/269 - Train Accuracy: 0.6299, Validation Accuracy: 0.6452, Loss: 0.6081
Epoch   3 Batch  109/269 - Train Accuracy: 0.6195, Validation Accuracy: 0.6438, Loss: 0.6096
Epoch   3 Batch  110/269 - Train Accuracy: 0.6310, Validation Accuracy

Epoch   3 Batch  190/269 - Train Accuracy: 0.6511, Validation Accuracy: 0.6572, Loss: 0.5589
Epoch   3 Batch  191/269 - Train Accuracy: 0.6724, Validation Accuracy: 0.6601, Loss: 0.5609
Epoch   3 Batch  192/269 - Train Accuracy: 0.6509, Validation Accuracy: 0.6574, Loss: 0.5612
Epoch   3 Batch  193/269 - Train Accuracy: 0.6541, Validation Accuracy: 0.6588, Loss: 0.5625
Epoch   3 Batch  194/269 - Train Accuracy: 0.6717, Validation Accuracy: 0.6596, Loss: 0.5673
Epoch   3 Batch  195/269 - Train Accuracy: 0.6492, Validation Accuracy: 0.6543, Loss: 0.5730
Epoch   3 Batch  196/269 - Train Accuracy: 0.6377, Validation Accuracy: 0.6548, Loss: 0.5618
Epoch   3 Batch  197/269 - Train Accuracy: 0.6285, Validation Accuracy: 0.6562, Loss: 0.5919
Epoch   3 Batch  198/269 - Train Accuracy: 0.6235, Validation Accuracy: 0.6548, Loss: 0.5961
Epoch   3 Batch  199/269 - Train Accuracy: 0.6328, Validation Accuracy: 0.6561, Loss: 0.5811
Epoch   3 Batch  200/269 - Train Accuracy: 0.6457, Validation Accuracy

Epoch   4 Batch   13/269 - Train Accuracy: 0.6864, Validation Accuracy: 0.6635, Loss: 0.4902
Epoch   4 Batch   14/269 - Train Accuracy: 0.6633, Validation Accuracy: 0.6644, Loss: 0.5261
Epoch   4 Batch   15/269 - Train Accuracy: 0.6510, Validation Accuracy: 0.6638, Loss: 0.5158
Epoch   4 Batch   16/269 - Train Accuracy: 0.6795, Validation Accuracy: 0.6602, Loss: 0.5249
Epoch   4 Batch   17/269 - Train Accuracy: 0.6574, Validation Accuracy: 0.6616, Loss: 0.5119
Epoch   4 Batch   18/269 - Train Accuracy: 0.6402, Validation Accuracy: 0.6628, Loss: 0.5425
Epoch   4 Batch   19/269 - Train Accuracy: 0.6796, Validation Accuracy: 0.6634, Loss: 0.4884
Epoch   4 Batch   20/269 - Train Accuracy: 0.6582, Validation Accuracy: 0.6596, Loss: 0.5419
Epoch   4 Batch   21/269 - Train Accuracy: 0.6480, Validation Accuracy: 0.6589, Loss: 0.5598
Epoch   4 Batch   22/269 - Train Accuracy: 0.6721, Validation Accuracy: 0.6581, Loss: 0.5072
Epoch   4 Batch   23/269 - Train Accuracy: 0.6710, Validation Accuracy

Epoch   4 Batch  103/269 - Train Accuracy: 0.6897, Validation Accuracy: 0.6786, Loss: 0.5038
Epoch   4 Batch  104/269 - Train Accuracy: 0.6612, Validation Accuracy: 0.6825, Loss: 0.4806
Epoch   4 Batch  105/269 - Train Accuracy: 0.6629, Validation Accuracy: 0.6740, Loss: 0.5045
Epoch   4 Batch  106/269 - Train Accuracy: 0.6616, Validation Accuracy: 0.6722, Loss: 0.4902
Epoch   4 Batch  107/269 - Train Accuracy: 0.6598, Validation Accuracy: 0.6906, Loss: 0.5201
Epoch   4 Batch  108/269 - Train Accuracy: 0.6764, Validation Accuracy: 0.6779, Loss: 0.4962
Epoch   4 Batch  109/269 - Train Accuracy: 0.6665, Validation Accuracy: 0.6712, Loss: 0.4877
Epoch   4 Batch  110/269 - Train Accuracy: 0.6615, Validation Accuracy: 0.6730, Loss: 0.4844
Epoch   4 Batch  111/269 - Train Accuracy: 0.6610, Validation Accuracy: 0.6769, Loss: 0.5246
Epoch   4 Batch  112/269 - Train Accuracy: 0.6841, Validation Accuracy: 0.6761, Loss: 0.4881
Epoch   4 Batch  113/269 - Train Accuracy: 0.6735, Validation Accuracy

Epoch   4 Batch  193/269 - Train Accuracy: 0.7227, Validation Accuracy: 0.6998, Loss: 0.4424
Epoch   4 Batch  194/269 - Train Accuracy: 0.7072, Validation Accuracy: 0.6937, Loss: 0.4519
Epoch   4 Batch  195/269 - Train Accuracy: 0.6870, Validation Accuracy: 0.6868, Loss: 0.4515
Epoch   4 Batch  196/269 - Train Accuracy: 0.6907, Validation Accuracy: 0.7006, Loss: 0.4494
Epoch   4 Batch  197/269 - Train Accuracy: 0.6971, Validation Accuracy: 0.6978, Loss: 0.4747
Epoch   4 Batch  198/269 - Train Accuracy: 0.6893, Validation Accuracy: 0.7138, Loss: 0.4788
Epoch   4 Batch  199/269 - Train Accuracy: 0.7026, Validation Accuracy: 0.7094, Loss: 0.4537
Epoch   4 Batch  200/269 - Train Accuracy: 0.6968, Validation Accuracy: 0.7048, Loss: 0.4603
Epoch   4 Batch  201/269 - Train Accuracy: 0.7125, Validation Accuracy: 0.6999, Loss: 0.4461
Epoch   4 Batch  202/269 - Train Accuracy: 0.6980, Validation Accuracy: 0.7002, Loss: 0.4428
Epoch   4 Batch  203/269 - Train Accuracy: 0.6946, Validation Accuracy

Epoch   5 Batch   16/269 - Train Accuracy: 0.7378, Validation Accuracy: 0.7264, Loss: 0.4235
Epoch   5 Batch   17/269 - Train Accuracy: 0.7407, Validation Accuracy: 0.7212, Loss: 0.4098
Epoch   5 Batch   18/269 - Train Accuracy: 0.7101, Validation Accuracy: 0.7232, Loss: 0.4316
Epoch   5 Batch   19/269 - Train Accuracy: 0.7473, Validation Accuracy: 0.7336, Loss: 0.3958
Epoch   5 Batch   20/269 - Train Accuracy: 0.7336, Validation Accuracy: 0.7317, Loss: 0.4271
Epoch   5 Batch   21/269 - Train Accuracy: 0.6995, Validation Accuracy: 0.7199, Loss: 0.4444
Epoch   5 Batch   22/269 - Train Accuracy: 0.7334, Validation Accuracy: 0.7250, Loss: 0.4103
Epoch   5 Batch   23/269 - Train Accuracy: 0.7297, Validation Accuracy: 0.7320, Loss: 0.4131
Epoch   5 Batch   24/269 - Train Accuracy: 0.7271, Validation Accuracy: 0.7256, Loss: 0.4307
Epoch   5 Batch   25/269 - Train Accuracy: 0.7205, Validation Accuracy: 0.7217, Loss: 0.4396
Epoch   5 Batch   26/269 - Train Accuracy: 0.7418, Validation Accuracy

Epoch   5 Batch  106/269 - Train Accuracy: 0.7519, Validation Accuracy: 0.7546, Loss: 0.3869
Epoch   5 Batch  107/269 - Train Accuracy: 0.7424, Validation Accuracy: 0.7538, Loss: 0.4028
Epoch   5 Batch  108/269 - Train Accuracy: 0.7588, Validation Accuracy: 0.7451, Loss: 0.3875
Epoch   5 Batch  109/269 - Train Accuracy: 0.7356, Validation Accuracy: 0.7436, Loss: 0.3918
Epoch   5 Batch  110/269 - Train Accuracy: 0.7519, Validation Accuracy: 0.7472, Loss: 0.3883
Epoch   5 Batch  111/269 - Train Accuracy: 0.7348, Validation Accuracy: 0.7506, Loss: 0.4129
Epoch   5 Batch  112/269 - Train Accuracy: 0.7701, Validation Accuracy: 0.7531, Loss: 0.3853
Epoch   5 Batch  113/269 - Train Accuracy: 0.7428, Validation Accuracy: 0.7532, Loss: 0.3769
Epoch   5 Batch  114/269 - Train Accuracy: 0.7533, Validation Accuracy: 0.7449, Loss: 0.3849
Epoch   5 Batch  115/269 - Train Accuracy: 0.7377, Validation Accuracy: 0.7471, Loss: 0.3988
Epoch   5 Batch  116/269 - Train Accuracy: 0.7680, Validation Accuracy

Epoch   5 Batch  196/269 - Train Accuracy: 0.7646, Validation Accuracy: 0.7725, Loss: 0.3540
Epoch   5 Batch  197/269 - Train Accuracy: 0.7755, Validation Accuracy: 0.7654, Loss: 0.3727
Epoch   5 Batch  198/269 - Train Accuracy: 0.7662, Validation Accuracy: 0.7646, Loss: 0.3856
Epoch   5 Batch  199/269 - Train Accuracy: 0.7688, Validation Accuracy: 0.7671, Loss: 0.3757
Epoch   5 Batch  200/269 - Train Accuracy: 0.7782, Validation Accuracy: 0.7712, Loss: 0.3685
Epoch   5 Batch  201/269 - Train Accuracy: 0.7826, Validation Accuracy: 0.7686, Loss: 0.3549
Epoch   5 Batch  202/269 - Train Accuracy: 0.7656, Validation Accuracy: 0.7752, Loss: 0.3616
Epoch   5 Batch  203/269 - Train Accuracy: 0.7620, Validation Accuracy: 0.7685, Loss: 0.3870
Epoch   5 Batch  204/269 - Train Accuracy: 0.7736, Validation Accuracy: 0.7601, Loss: 0.3842
Epoch   5 Batch  205/269 - Train Accuracy: 0.7967, Validation Accuracy: 0.7637, Loss: 0.3548
Epoch   5 Batch  206/269 - Train Accuracy: 0.7745, Validation Accuracy

Epoch   6 Batch   19/269 - Train Accuracy: 0.8080, Validation Accuracy: 0.7972, Loss: 0.3064
Epoch   6 Batch   20/269 - Train Accuracy: 0.7979, Validation Accuracy: 0.7935, Loss: 0.3435
Epoch   6 Batch   21/269 - Train Accuracy: 0.7679, Validation Accuracy: 0.7962, Loss: 0.3645
Epoch   6 Batch   22/269 - Train Accuracy: 0.8114, Validation Accuracy: 0.7914, Loss: 0.3216
Epoch   6 Batch   23/269 - Train Accuracy: 0.7976, Validation Accuracy: 0.7903, Loss: 0.3276
Epoch   6 Batch   24/269 - Train Accuracy: 0.8133, Validation Accuracy: 0.7954, Loss: 0.3398
Epoch   6 Batch   25/269 - Train Accuracy: 0.7937, Validation Accuracy: 0.7917, Loss: 0.3531
Epoch   6 Batch   26/269 - Train Accuracy: 0.8077, Validation Accuracy: 0.7939, Loss: 0.3091
Epoch   6 Batch   27/269 - Train Accuracy: 0.7911, Validation Accuracy: 0.7888, Loss: 0.3285
Epoch   6 Batch   28/269 - Train Accuracy: 0.7723, Validation Accuracy: 0.7955, Loss: 0.3527
Epoch   6 Batch   29/269 - Train Accuracy: 0.8012, Validation Accuracy

Epoch   6 Batch  109/269 - Train Accuracy: 0.7919, Validation Accuracy: 0.7951, Loss: 0.3113
Epoch   6 Batch  110/269 - Train Accuracy: 0.8086, Validation Accuracy: 0.8006, Loss: 0.3021
Epoch   6 Batch  111/269 - Train Accuracy: 0.8013, Validation Accuracy: 0.8033, Loss: 0.3350
Epoch   6 Batch  112/269 - Train Accuracy: 0.8225, Validation Accuracy: 0.8127, Loss: 0.3029
Epoch   6 Batch  113/269 - Train Accuracy: 0.8031, Validation Accuracy: 0.8112, Loss: 0.2979
Epoch   6 Batch  114/269 - Train Accuracy: 0.8211, Validation Accuracy: 0.8139, Loss: 0.3117
Epoch   6 Batch  115/269 - Train Accuracy: 0.7886, Validation Accuracy: 0.8093, Loss: 0.3212
Epoch   6 Batch  116/269 - Train Accuracy: 0.8256, Validation Accuracy: 0.8115, Loss: 0.3171
Epoch   6 Batch  117/269 - Train Accuracy: 0.8113, Validation Accuracy: 0.8072, Loss: 0.3081
Epoch   6 Batch  118/269 - Train Accuracy: 0.8280, Validation Accuracy: 0.8098, Loss: 0.2985
Epoch   6 Batch  119/269 - Train Accuracy: 0.8043, Validation Accuracy

Epoch   6 Batch  199/269 - Train Accuracy: 0.8313, Validation Accuracy: 0.8238, Loss: 0.2915
Epoch   6 Batch  200/269 - Train Accuracy: 0.8152, Validation Accuracy: 0.8298, Loss: 0.3017
Epoch   6 Batch  201/269 - Train Accuracy: 0.8265, Validation Accuracy: 0.8290, Loss: 0.2915
Epoch   6 Batch  202/269 - Train Accuracy: 0.8049, Validation Accuracy: 0.8294, Loss: 0.2897
Epoch   6 Batch  203/269 - Train Accuracy: 0.8241, Validation Accuracy: 0.8244, Loss: 0.3118
Epoch   6 Batch  204/269 - Train Accuracy: 0.8218, Validation Accuracy: 0.8321, Loss: 0.3058
Epoch   6 Batch  205/269 - Train Accuracy: 0.8410, Validation Accuracy: 0.8232, Loss: 0.2894
Epoch   6 Batch  206/269 - Train Accuracy: 0.8106, Validation Accuracy: 0.8357, Loss: 0.3061
Epoch   6 Batch  207/269 - Train Accuracy: 0.8352, Validation Accuracy: 0.8306, Loss: 0.2772
Epoch   6 Batch  208/269 - Train Accuracy: 0.8369, Validation Accuracy: 0.8283, Loss: 0.2986
Epoch   6 Batch  209/269 - Train Accuracy: 0.8487, Validation Accuracy

Epoch   7 Batch   22/269 - Train Accuracy: 0.8623, Validation Accuracy: 0.8557, Loss: 0.2602
Epoch   7 Batch   23/269 - Train Accuracy: 0.8498, Validation Accuracy: 0.8574, Loss: 0.2644
Epoch   7 Batch   24/269 - Train Accuracy: 0.8570, Validation Accuracy: 0.8533, Loss: 0.2733
Epoch   7 Batch   25/269 - Train Accuracy: 0.8447, Validation Accuracy: 0.8518, Loss: 0.2826
Epoch   7 Batch   26/269 - Train Accuracy: 0.8515, Validation Accuracy: 0.8522, Loss: 0.2424
Epoch   7 Batch   27/269 - Train Accuracy: 0.8372, Validation Accuracy: 0.8540, Loss: 0.2608
Epoch   7 Batch   28/269 - Train Accuracy: 0.8024, Validation Accuracy: 0.8520, Loss: 0.2888
Epoch   7 Batch   29/269 - Train Accuracy: 0.8587, Validation Accuracy: 0.8622, Loss: 0.2808
Epoch   7 Batch   30/269 - Train Accuracy: 0.8473, Validation Accuracy: 0.8521, Loss: 0.2617
Epoch   7 Batch   31/269 - Train Accuracy: 0.8544, Validation Accuracy: 0.8517, Loss: 0.2550
Epoch   7 Batch   32/269 - Train Accuracy: 0.8488, Validation Accuracy

Epoch   7 Batch  112/269 - Train Accuracy: 0.8656, Validation Accuracy: 0.8568, Loss: 0.2451
Epoch   7 Batch  113/269 - Train Accuracy: 0.8512, Validation Accuracy: 0.8574, Loss: 0.2401
Epoch   7 Batch  114/269 - Train Accuracy: 0.8639, Validation Accuracy: 0.8574, Loss: 0.2475
Epoch   7 Batch  115/269 - Train Accuracy: 0.8429, Validation Accuracy: 0.8601, Loss: 0.2587
Epoch   7 Batch  116/269 - Train Accuracy: 0.8671, Validation Accuracy: 0.8543, Loss: 0.2554
Epoch   7 Batch  117/269 - Train Accuracy: 0.8485, Validation Accuracy: 0.8604, Loss: 0.2496
Epoch   7 Batch  118/269 - Train Accuracy: 0.8705, Validation Accuracy: 0.8577, Loss: 0.2347
Epoch   7 Batch  119/269 - Train Accuracy: 0.8477, Validation Accuracy: 0.8568, Loss: 0.2627
Epoch   7 Batch  120/269 - Train Accuracy: 0.8649, Validation Accuracy: 0.8559, Loss: 0.2486
Epoch   7 Batch  121/269 - Train Accuracy: 0.8590, Validation Accuracy: 0.8583, Loss: 0.2425
Epoch   7 Batch  122/269 - Train Accuracy: 0.8570, Validation Accuracy

Epoch   7 Batch  202/269 - Train Accuracy: 0.8542, Validation Accuracy: 0.8723, Loss: 0.2384
Epoch   7 Batch  203/269 - Train Accuracy: 0.8572, Validation Accuracy: 0.8634, Loss: 0.2422
Epoch   7 Batch  204/269 - Train Accuracy: 0.8695, Validation Accuracy: 0.8721, Loss: 0.2457
Epoch   7 Batch  205/269 - Train Accuracy: 0.8751, Validation Accuracy: 0.8706, Loss: 0.2200
Epoch   7 Batch  206/269 - Train Accuracy: 0.8512, Validation Accuracy: 0.8706, Loss: 0.2498
Epoch   7 Batch  207/269 - Train Accuracy: 0.8608, Validation Accuracy: 0.8651, Loss: 0.2149
Epoch   7 Batch  208/269 - Train Accuracy: 0.8689, Validation Accuracy: 0.8726, Loss: 0.2406
Epoch   7 Batch  209/269 - Train Accuracy: 0.8832, Validation Accuracy: 0.8707, Loss: 0.2266
Epoch   7 Batch  210/269 - Train Accuracy: 0.8725, Validation Accuracy: 0.8706, Loss: 0.2226
Epoch   7 Batch  211/269 - Train Accuracy: 0.8677, Validation Accuracy: 0.8574, Loss: 0.2267
Epoch   7 Batch  212/269 - Train Accuracy: 0.8704, Validation Accuracy

Epoch   8 Batch   25/269 - Train Accuracy: 0.8723, Validation Accuracy: 0.8865, Loss: 0.2251
Epoch   8 Batch   26/269 - Train Accuracy: 0.8873, Validation Accuracy: 0.8824, Loss: 0.1903
Epoch   8 Batch   27/269 - Train Accuracy: 0.8783, Validation Accuracy: 0.8898, Loss: 0.2042
Epoch   8 Batch   28/269 - Train Accuracy: 0.8403, Validation Accuracy: 0.8799, Loss: 0.2208
Epoch   8 Batch   29/269 - Train Accuracy: 0.8908, Validation Accuracy: 0.8857, Loss: 0.2210
Epoch   8 Batch   30/269 - Train Accuracy: 0.8703, Validation Accuracy: 0.8823, Loss: 0.2033
Epoch   8 Batch   31/269 - Train Accuracy: 0.8744, Validation Accuracy: 0.8814, Loss: 0.1931
Epoch   8 Batch   32/269 - Train Accuracy: 0.8810, Validation Accuracy: 0.8844, Loss: 0.1990
Epoch   8 Batch   33/269 - Train Accuracy: 0.8717, Validation Accuracy: 0.8771, Loss: 0.1913
Epoch   8 Batch   34/269 - Train Accuracy: 0.8708, Validation Accuracy: 0.8817, Loss: 0.2055
Epoch   8 Batch   35/269 - Train Accuracy: 0.8749, Validation Accuracy

Epoch   8 Batch  115/269 - Train Accuracy: 0.8674, Validation Accuracy: 0.8729, Loss: 0.1950
Epoch   8 Batch  116/269 - Train Accuracy: 0.8863, Validation Accuracy: 0.8771, Loss: 0.1961
Epoch   8 Batch  117/269 - Train Accuracy: 0.8687, Validation Accuracy: 0.8843, Loss: 0.1897
Epoch   8 Batch  118/269 - Train Accuracy: 0.8936, Validation Accuracy: 0.8878, Loss: 0.1810
Epoch   8 Batch  119/269 - Train Accuracy: 0.8643, Validation Accuracy: 0.8819, Loss: 0.2042
Epoch   8 Batch  120/269 - Train Accuracy: 0.8914, Validation Accuracy: 0.8851, Loss: 0.1945
Epoch   8 Batch  121/269 - Train Accuracy: 0.8848, Validation Accuracy: 0.8921, Loss: 0.1842
Epoch   8 Batch  122/269 - Train Accuracy: 0.8783, Validation Accuracy: 0.8859, Loss: 0.1851
Epoch   8 Batch  123/269 - Train Accuracy: 0.8820, Validation Accuracy: 0.8829, Loss: 0.2009
Epoch   8 Batch  124/269 - Train Accuracy: 0.8718, Validation Accuracy: 0.8838, Loss: 0.1802
Epoch   8 Batch  125/269 - Train Accuracy: 0.8911, Validation Accuracy

Epoch   8 Batch  205/269 - Train Accuracy: 0.8865, Validation Accuracy: 0.8817, Loss: 0.1777
Epoch   8 Batch  206/269 - Train Accuracy: 0.8720, Validation Accuracy: 0.8814, Loss: 0.1906
Epoch   8 Batch  207/269 - Train Accuracy: 0.8777, Validation Accuracy: 0.8925, Loss: 0.1782
Epoch   8 Batch  208/269 - Train Accuracy: 0.8805, Validation Accuracy: 0.8776, Loss: 0.1895
Epoch   8 Batch  209/269 - Train Accuracy: 0.9002, Validation Accuracy: 0.8850, Loss: 0.1742
Epoch   8 Batch  210/269 - Train Accuracy: 0.8895, Validation Accuracy: 0.8893, Loss: 0.1700
Epoch   8 Batch  211/269 - Train Accuracy: 0.8802, Validation Accuracy: 0.8903, Loss: 0.1869
Epoch   8 Batch  212/269 - Train Accuracy: 0.8916, Validation Accuracy: 0.8920, Loss: 0.1822
Epoch   8 Batch  213/269 - Train Accuracy: 0.8812, Validation Accuracy: 0.8873, Loss: 0.1799
Epoch   8 Batch  214/269 - Train Accuracy: 0.8728, Validation Accuracy: 0.8915, Loss: 0.1928
Epoch   8 Batch  215/269 - Train Accuracy: 0.8975, Validation Accuracy

Epoch   9 Batch   28/269 - Train Accuracy: 0.8563, Validation Accuracy: 0.9067, Loss: 0.1765
Epoch   9 Batch   29/269 - Train Accuracy: 0.8989, Validation Accuracy: 0.8968, Loss: 0.1714
Epoch   9 Batch   30/269 - Train Accuracy: 0.8919, Validation Accuracy: 0.9031, Loss: 0.1607
Epoch   9 Batch   31/269 - Train Accuracy: 0.9053, Validation Accuracy: 0.8969, Loss: 0.1616
Epoch   9 Batch   32/269 - Train Accuracy: 0.8997, Validation Accuracy: 0.8923, Loss: 0.1584
Epoch   9 Batch   33/269 - Train Accuracy: 0.8994, Validation Accuracy: 0.8970, Loss: 0.1490
Epoch   9 Batch   34/269 - Train Accuracy: 0.8804, Validation Accuracy: 0.8969, Loss: 0.1673
Epoch   9 Batch   35/269 - Train Accuracy: 0.8969, Validation Accuracy: 0.8961, Loss: 0.1676
Epoch   9 Batch   36/269 - Train Accuracy: 0.8819, Validation Accuracy: 0.8970, Loss: 0.1666
Epoch   9 Batch   37/269 - Train Accuracy: 0.8954, Validation Accuracy: 0.8991, Loss: 0.1715
Epoch   9 Batch   38/269 - Train Accuracy: 0.8999, Validation Accuracy

Epoch   9 Batch  118/269 - Train Accuracy: 0.9062, Validation Accuracy: 0.8975, Loss: 0.1433
Epoch   9 Batch  119/269 - Train Accuracy: 0.8882, Validation Accuracy: 0.8914, Loss: 0.1634
Epoch   9 Batch  120/269 - Train Accuracy: 0.9106, Validation Accuracy: 0.8992, Loss: 0.1589
Epoch   9 Batch  121/269 - Train Accuracy: 0.9002, Validation Accuracy: 0.9056, Loss: 0.1491
Epoch   9 Batch  122/269 - Train Accuracy: 0.8995, Validation Accuracy: 0.9105, Loss: 0.1529
Epoch   9 Batch  123/269 - Train Accuracy: 0.8960, Validation Accuracy: 0.9083, Loss: 0.1562
Epoch   9 Batch  124/269 - Train Accuracy: 0.8892, Validation Accuracy: 0.9059, Loss: 0.1487
Epoch   9 Batch  125/269 - Train Accuracy: 0.9045, Validation Accuracy: 0.8978, Loss: 0.1464
Epoch   9 Batch  126/269 - Train Accuracy: 0.8790, Validation Accuracy: 0.9031, Loss: 0.1498
Epoch   9 Batch  127/269 - Train Accuracy: 0.8823, Validation Accuracy: 0.9002, Loss: 0.1616
Epoch   9 Batch  128/269 - Train Accuracy: 0.8961, Validation Accuracy

Epoch   9 Batch  208/269 - Train Accuracy: 0.8944, Validation Accuracy: 0.9059, Loss: 0.1557
Epoch   9 Batch  209/269 - Train Accuracy: 0.9111, Validation Accuracy: 0.9075, Loss: 0.1362
Epoch   9 Batch  210/269 - Train Accuracy: 0.9019, Validation Accuracy: 0.9092, Loss: 0.1468
Epoch   9 Batch  211/269 - Train Accuracy: 0.8951, Validation Accuracy: 0.9065, Loss: 0.1461
Epoch   9 Batch  212/269 - Train Accuracy: 0.8894, Validation Accuracy: 0.8979, Loss: 0.1552
Epoch   9 Batch  213/269 - Train Accuracy: 0.8958, Validation Accuracy: 0.9086, Loss: 0.1456
Epoch   9 Batch  214/269 - Train Accuracy: 0.8886, Validation Accuracy: 0.9077, Loss: 0.1482
Epoch   9 Batch  215/269 - Train Accuracy: 0.9164, Validation Accuracy: 0.9081, Loss: 0.1434
Epoch   9 Batch  216/269 - Train Accuracy: 0.8758, Validation Accuracy: 0.9048, Loss: 0.1598
Epoch   9 Batch  217/269 - Train Accuracy: 0.8924, Validation Accuracy: 0.9078, Loss: 0.1539
Epoch   9 Batch  218/269 - Train Accuracy: 0.9052, Validation Accuracy

In [0]:
# save_params(save_path)

## Checkpoint for testing the sentence to sequence model

In [0]:
batch_size = 512

_, (source_vocab_to_int, target_vocab_to_int), (source_int_to_vocab, target_int_to_vocab) = load_preprocess()
load_path = load_params()

In [0]:
def sentence_to_seq(sentence, vocab_to_int):
    """
    Convert a sentence to a sequence of ids
    :param sentence: String
    :param vocab_to_int: Dictionary to go from the words to an id
    :return: List of word ids
    """
    # TODO: Implement Function
    UNK_id = vocab_to_int['<UNK>']
    seq = []
    for word in sentence.lower().split():
        if word in vocab_to_int:
            seq.append(vocab_to_int[word])
        else:
            seq.append(UNK_id)
    return seq

In [0]:
translate_sentence = 'he saw a old yellow truck .'

translate_sentence = sentence_to_seq(translate_sentence, source_vocab_to_int)

loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(load_path + '.meta')
    loader.restore(sess, load_path)

    input_data = loaded_graph.get_tensor_by_name('input:0')
    logits = loaded_graph.get_tensor_by_name('predictions:0')
    target_sequence_length = loaded_graph.get_tensor_by_name('target_sequence_length:0')
    source_sequence_length = loaded_graph.get_tensor_by_name('source_sequence_length:0')
    keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')

    translate_logits = sess.run(logits, {input_data: [translate_sentence]*batch_size,
                                         target_sequence_length: [len(translate_sentence)*2]*batch_size,
                                         source_sequence_length: [len(translate_sentence)]*batch_size,
                                         keep_prob: 1.0})[0]

print('Input')
print('  Word Ids:      {}'.format([i for i in translate_sentence]))
print('  English Words: {}'.format([source_int_to_vocab[i] for i in translate_sentence]))

print('\nPrediction')
print('  Word Ids:      {}'.format([i for i in translate_logits]))
print('  French Words: {}'.format(" ".join([target_int_to_vocab[i] for i in translate_logits])))

INFO:tensorflow:Restoring parameters from checkpoints/dev
Input
  Word Ids:      [171, 25, 8, 136, 187, 151, 46]
  English Words: ['he', 'saw', 'a', 'old', 'yellow', 'truck', '.']

Prediction
  Word Ids:      [81, 178, 181, 316, 83, 335, 322, 189, 1]
  French Words: il a vu un camion jaune brillant . <EOS>


## Download files, models, results, plots

In [0]:
!ls

checkpoints  data  datalab  params.p  preprocess.p


In [0]:
from google.colab import files

files.download('params.p')

## Keras

In [0]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, GRU

Using TensorFlow backend.


In [0]:
%mkdir data/

In [0]:
downloaded = drive.CreateFile({'id':'14W_O1I-3g6C6v7CtAqv_bwB9y29sjCaP'})
downloaded.GetContentFile('data/fra.txt')

In [0]:
%ls data/

fra.txt


In [0]:
num_samples = 10000  # Number of samples to train on.
# Path to the data txt file on disk.
data_path = 'data/fra.txt'

# Vectorize the data.
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()
with open(data_path, 'r', encoding='utf-8') as f:
    lines = f.read().split('\n')
    
for line in lines[: min(num_samples, len(lines) - 1)]:
    input_text, target_text = line.split('\t')
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    target_text = '\t' + target_text + '\n'
    input_texts.append(input_text)
    target_texts.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

In [0]:
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])

print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)

# Lookup token index to encode sequences.
input_token_index = dict(
    [(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict(
    [(char, i) for i, char in enumerate(target_characters)])

# Reverse-lookup token index to decode sequences back to
# something readable.
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())


# Prepare input and target data for training
encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
    dtype='float32')
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t, target_token_index[char]] = 1.
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.

Number of samples: 10000
Number of unique input tokens: 71
Number of unique output tokens: 94
Max sequence length for inputs: 16
Max sequence length for outputs: 59


In [0]:
encoder_input_data.shape, decoder_input_data.shape, decoder_target_data.shape

((10000, 16, 71), (10000, 59, 94), (10000, 59, 94))

The training model leverages three key features of Keras RNNs:

- The return_state contructor argument, configuring a RNN layer to return a list where the first entry is the outputs and the next entries are the internal RNN states. This is used to recover the states of the encoder.
- The inital_state call argument, specifying the initial state(s) of a RNN. This is used to pass the encoder states to the decoder as initial states.
- The return_sequences constructor argument, configuring a RNN to return its full sequence of outputs (instead of just the last output, which the defaults behavior). This is used in the decoder.

### Training model

In [0]:
batch_size = 64  # Batch size for training.
epochs = 20  # Number of epochs to train for.
latent_dim = 256  # Latent dimensionality of the encoding space.

In [0]:
def seq2seq_lstm(num_encoder_tokens, num_decoder_tokens, latent_dim=256):
    # Define an input sequence and process it.
    encoder_inputs = Input(shape=(None, num_encoder_tokens))
    encoder = LSTM(latent_dim, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    # We discard `encoder_outputs` and only keep the states.
    encoder_states = [state_h, state_c]
    encoder_model = Model(encoder_inputs, encoder_states)
    print(encoder_model.summary())

    # Set up the decoder, using `encoder_states` as initial state.
    decoder_inputs = Input(shape=(None, num_decoder_tokens))
    # We set up our decoder to return full output sequences,
    # and to return internal states as well. We don't use the 
    # return states in the training model, but we will use them in inference.
    decoder = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder(decoder_inputs,
                                    initial_state=encoder_states)
    decoder_dense = Dense(num_decoder_tokens, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)

    # Define the model that will turn
    # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    print(model.summary())

    # Inference decoder model
    decoder_state_input_h = Input(shape=(latent_dim, ))
    decoder_state_input_c = Input(shape=(latent_dim, ))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

    decoder_outputs, state_h, state_c = decoder(
        decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model(
        [decoder_inputs] + decoder_states_inputs,
        [decoder_outputs] + decoder_states)
    print(decoder_model.summary())
    return model, encoder_model, decoder_model


def seq2seq_gru(num_encoder_tokens, num_decoder_tokens, latent_dim=256):
    # Define an input sequence and process it.
    encoder_inputs = Input(shape=(None, num_encoder_tokens))
    encoder = GRU(latent_dim, return_state=True)
    encoder_outputs, state_h = encoder(encoder_inputs)
    # We discard `encoder_outputs` and only keep the states.
    encoder_states = [state_h]
    encoder_model = Model(encoder_inputs, encoder_states)
    print(encoder_model.summary())
    
    # Set up the decoder, using `encoder_states` as initial state.
    decoder_inputs = Input(shape=(None, num_decoder_tokens))
    # We set up our decoder to return full output sequences,
    # and to return internal states as well. We don't use the 
    # return states in the training model, but we will use them in inference.
    decoder = GRU(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _ = decoder(decoder_inputs, initial_state=state_h)
    decoder_dense = Dense(num_decoder_tokens, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    print(model.summary())
    
    # Inference decoder model
    decoder_state_input_h = Input(shape=(latent_dim, ))
    decoder_states_inputs = [decoder_state_input_h]

    decoder_outputs, state_h = decoder(
        decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model(
        [decoder_inputs] + decoder_states_inputs,
        [decoder_outputs] + decoder_states)
    print(decoder_model.summary())
    return model, encoder_model, decoder_model

In [0]:
model2, encoder_model2, decoder_model2 = seq2seq_lstm(num_encoder_tokens, num_decoder_tokens)

# Run training
model2.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model2.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=1,
          validation_split=0.2)

In [0]:
model, encoder_model, decoder_model = seq2seq_gru(num_encoder_tokens, num_decoder_tokens)

# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.2)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, 71)          0         
_________________________________________________________________
gru_1 (GRU)                  [(None, 256), (None, 256) 251904    
Total params: 251,904
Trainable params: 251,904
Non-trainable params: 0
_________________________________________________________________
None
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 71)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None, 94)     0                                            
________________________________

<keras.callbacks.History at 0x7f8197c73be0>

In [0]:
def decode_sequence(input_seq, encoder_model, decoder_model, m='gru'):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
    if m == 'gru':
        states_value = [states_value]

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, target_token_index['\t']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        x = decoder_model.predict([target_seq] + states_value)
        output_tokens = x[0]

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '\n' or len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = x[1:]

    return decoded_sentence

In [0]:
for seq_index in range(10):
    # Take one sequence (part of the training set)
    # for trying out decoding.
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq, encoder_model, decoder_model, m='gru')
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

-
Input sentence: Go.
Decoded sentence: Tous pais pais !

-
Input sentence: Run!
Decoded sentence: Tous pais pais !

-
Input sentence: Run!
Decoded sentence: Tous pais pais !

-
Input sentence: Fire!
Decoded sentence: Tous pais pais !

-
Input sentence: Help!
Decoded sentence: Tous me sois !

-
Input sentence: Jump.
Decoded sentence: Tous me sois !

-
Input sentence: Stop!
Decoded sentence: Tous pais !

-
Input sentence: Stop!
Decoded sentence: Tous pais !

-
Input sentence: Stop!
Decoded sentence: Tous pais !

-
Input sentence: Wait!
Decoded sentence: Tous pais !

