In [152]:
import os
import pickle
import copy
import numpy as np


CODES = {'<PAD>': 0, '<EOS>': 1, '<UNK>': 2, '<GO>': 3 }


def load_data(path):
    """ Load Dataset from File """
    input_file = os.path.join(path)
    
    with open(input_file, 'r', encoding='utf-8') as f:
        return f.read()


def preprocess_and_save_data(source_path, target_path, text_to_ids):
    """ Preprocess Text Data.  Save to to file."""
    # Preprocess
    source_text = load_data(source_path)
    target_text = load_data(target_path)
    
    source_text = source_text.lower()
    target_text = target_text.lower()
    
    source_vocab_to_int, source_int_to_vocab = create_lookup_tables(source_text)
    target_vocab_to_int, target_int_to_vocab = create_lookup_tables(target_text)
    
    source_text, target_text = text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int)

# Save Data
    with open('/Users/kamalesh_das/Desktop/Python/AcadGild/LanguageTranslator/preprocess.p', 'wb') as out_file:
        pickle.dump((
        (source_text, target_text),
            (source_vocab_to_int, target_vocab_to_int),
            (source_int_to_vocab, target_int_to_vocab)), out_file)


def load_preprocess():
    """Load the Preprocessed Training data and return them in batches of <batch_size> or less"""
    with open('/Users/kamalesh_das/Desktop/Python/AcadGild/LanguageTranslator/preprocess.p', mode='rb') as in_file:
        return pickle.load(in_file)


def create_lookup_tables(text):
    """Create lookup tables for vocabulary"""
    vocab = set(text.split())
    vocab_to_int = copy.copy(CODES)

    for v_i, v in enumerate(vocab, len(CODES)):
        vocab_to_int[v] = v_i

    int_to_vocab = {v_i: v for v, v_i in vocab_to_int.items()}

    return vocab_to_int, int_to_vocab


def save_params(params):
    """Save parameters to file"""
    with open('/Users/kamalesh_das/Desktop/Python/AcadGild/LanguageTranslator/params.p', 'wb') as out_file:
        pickle.dump(params, out_file)


def load_params():
    """Load parameters from file"""
    with open('/Users/kamalesh_das/Desktop/Python/AcadGild/LanguageTranslator/params.p', mode='rb') as in_file:
        return pickle.load(in_file)


def batch_data(source, target, batch_size):
    """Batch source and target together"""
    for batch_i in range(0, len(source)//batch_size):
        start_i = batch_i * batch_size
        source_batch = source[start_i:start_i + batch_size]
        target_batch = target[start_i:start_i + batch_size]
        yield np.array(pad_sentence_batch(source_batch)), np.array(pad_sentence_batch(target_batch))


def pad_sentence_batch(sentence_batch):
    """Pad sentence with <PAD> id"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [CODES['<PAD>']] * (max_sentence - len(sentence)) for sentence in sentence_batch]

In [153]:
import numpy as np
import tensorflow as tf
from tensorflow.python.layers.core import Dense
import itertools
import collections


def _print_success_message():
    print('Tests Passed')


def test_text_to_ids(text_to_ids):
    test_source_text = 'new jersey is sometimes quiet during autumn , and it is snowy in april .\nthe united states is usually chilly during july , and it is usually freezing in november .\ncalifornia is usually quiet during march , and it is usually hot in june .\nthe united states is sometimes mild during june , and it is cold in september .'
    test_target_text = 'new jersey est parfois calme pendant l\' automne , et il est neigeux en avril .\nles états-unis est généralement froid en juillet , et il gèle habituellement en novembre .\ncalifornia est généralement calme en mars , et il est généralement chaud en juin .\nles états-unis est parfois légère en juin , et il fait froid en septembre .'
    
    test_source_text = test_source_text.lower()
    test_target_text = test_target_text.lower()
    
    source_vocab_to_int, source_int_to_vocab = create_lookup_tables(test_source_text)
    target_vocab_to_int, target_int_to_vocab = create_lookup_tables(test_target_text)
    
    test_source_id_seq, test_target_id_seq = text_to_ids(test_source_text, test_target_text, source_vocab_to_int, target_vocab_to_int)

    assert len(test_source_id_seq) == len(test_source_text.split('\n')),\
        'source_id_text has wrong length, it should be {}.'.format(len(test_source_text.split('\n')))
    assert len(test_target_id_seq) == len(test_target_text.split('\n')), \
        'target_id_text has wrong length, it should be {}.'.format(len(test_target_text.split('\n')))

    target_not_iter = [type(x) for x in test_source_id_seq if not isinstance(x, collections.Iterable)]
    assert not target_not_iter,\
        'Element in source_id_text is not iteratable.  Found type {}'.format(target_not_iter[0])
    target_not_iter = [type(x) for x in test_target_id_seq if not isinstance(x, collections.Iterable)]
    assert not target_not_iter, \
        'Element in target_id_text is not iteratable.  Found type {}'.format(target_not_iter[0])

    source_changed_length = [(words, word_ids) for words, word_ids in zip(test_source_text.split('\n'), test_source_id_seq) if len(words.split()) != len(word_ids)]
    assert not source_changed_length,\
        'Source text changed in size from {} word(s) to {} id(s): {}'.format(len(source_changed_length[0][0].split()), len(source_changed_length[0][1]), source_changed_length[0][1])

    target_missing_end = [word_ids for word_ids in test_target_id_seq if word_ids[-1] != target_vocab_to_int['<EOS>']]
    assert not target_missing_end,\
        'Missing <EOS> id at the end of {}'.format(target_missing_end[0])

    target_bad_size = [(words.split(), word_ids) for words, word_ids in zip(test_target_text.split('\n'), test_target_id_seq) if len(word_ids) != len(words.split()) + 1]
    assert not target_bad_size,\
        'Target text incorrect size.  {} should be length {}'.format(target_bad_size[0][1], len(target_bad_size[0][0]) + 1)

    source_bad_id = [(word, word_id) for word, word_id in zip([word for sentence in test_source_text.split('\n') for word in sentence.split()],itertools.chain.from_iterable(test_source_id_seq)) if source_vocab_to_int[word] != word_id]
    assert not source_bad_id,\
        'Source word incorrectly converted from {} to id {}.'.format(source_bad_id[0][0], source_bad_id[0][1])

    target_bad_id = [(word, word_id) for word, word_id in zip([word for sentence in test_target_text.split('\n') for word in sentence.split()],[word_id for word_ids in test_target_id_seq for word_id in word_ids[:-1]]) if target_vocab_to_int[word] != word_id]
    assert not target_bad_id,\
        'Target word incorrectly converted from {} to id {}.'.format(target_bad_id[0][0], target_bad_id[0][1])

    _print_success_message()


def test_model_inputs(model_inputs):
    with tf.Graph().as_default():
        input_data, targets, lr, keep_prob, target_sequence_length, max_target_sequence_length, source_sequence_length = model_inputs()

    # Check type
        assert input_data.op.type == 'Placeholder',\
            'Input is not a Placeholder.'
        assert targets.op.type == 'Placeholder',\
            'Targets is not a Placeholder.'
        assert lr.op.type == 'Placeholder',\
            'Learning Rate is not a Placeholder.'
        assert keep_prob.op.type == 'Placeholder', \
            'Keep Probability is not a Placeholder.'
        assert target_sequence_length.op.type == 'Placeholder', \
            'Target Sequence Length is not a Placeholder.'
        assert max_target_sequence_length.op.type == 'Max', \
            'Max Target Sequence Length is not a Placeholder.'
        assert source_sequence_length.op.type == 'Placeholder', \
            'Source Sequence Length is not a Placeholder.'

        # Check name
        assert input_data.name == 'input:0',\
            'Input has bad name.  Found name {}'.format(input_data.name)
        assert target_sequence_length.name == 'target_sequence_length:0',\
            'Target Sequence Length has bad name.  Found name {}'.format(target_sequence_length.name)
        assert source_sequence_length.name == 'source_sequence_length:0',\
            'Source Sequence Length has bad name.  Found name {}'.format(source_sequence_length.name)
        assert keep_prob.name == 'keep_prob:0', \
            'Keep Probability has bad name.  Found name {}'.format(keep_prob.name)

        assert tf.assert_rank(input_data, 2, message='Input data has wrong rank')
        assert tf.assert_rank(targets, 2, message='Targets has wrong rank')
        assert tf.assert_rank(lr, 0, message='Learning Rate has wrong rank')
        assert tf.assert_rank(keep_prob, 0, message='Keep Probability has wrong rank')
        assert tf.assert_rank(target_sequence_length, 1, message='Target Sequence Length has wrong rank')
        assert tf.assert_rank(max_target_sequence_length, 0, message='Max Target Sequence Length has wrong rank')
        assert tf.assert_rank(source_sequence_length, 1, message='Source Sequence Lengthhas wrong rank')

    _print_success_message()


def test_encoding_layer(encoding_layer):
    rnn_size = 512
    batch_size = 64
    num_layers = 3
    source_sequence_len = 22
    source_vocab_size = 20
    encoding_embedding_size = 30

    with tf.Graph().as_default():
        rnn_inputs = tf.placeholder(tf.int32, [batch_size,source_sequence_len])
        source_sequence_length = tf.placeholder(tf.int32,(None,),name='source_sequence_length')
        keep_prob = tf.placeholder(tf.float32)

        enc_output, states = encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob,source_sequence_length, source_vocab_size,encoding_embedding_size)


        assert len(states) == num_layers,\
            'Found {} state(s). It should be {} states.'.format(len(states), num_layers)

        bad_types = [type(state) for state in states if not isinstance(state, tf.contrib.rnn.LSTMStateTuple)]
        assert not bad_types,\
            'Found wrong type: {}'.format(bad_types[0])

        bad_shapes = [state_tensor.get_shape() for state in states for state_tensor in state if state_tensor.get_shape().as_list() not in [[None, rnn_size], [batch_size, rnn_size]]]
        assert not bad_shapes,\
            'Found wrong shape: {}'.format(bad_shapes[0])

    _print_success_message()


def test_decoding_layer(decoding_layer):
    batch_size = 64
    vocab_size = 1000
    embedding_size = 200
    sequence_length = 22
    rnn_size = 512
    num_layers = 3
    target_vocab_to_int = {'<EOS>': 1, '<GO>': 3}

    with tf.Graph().as_default():
        target_sequence_length_p = tf.placeholder(tf.int32, (None,), name='target_sequence_length')
        max_target_sequence_length = tf.reduce_max(target_sequence_length_p, name='max_target_len')

        dec_input = tf.placeholder(tf.int32, [batch_size, sequence_length])
        dec_embed_input = tf.placeholder(tf.float32, [batch_size, sequence_length, embedding_size])
        dec_embeddings = tf.placeholder(tf.float32, [vocab_size, embedding_size])
        keep_prob = tf.placeholder(tf.float32)
        state = tf.contrib.rnn.LSTMStateTuple(tf.placeholder(tf.float32, [None, rnn_size]),tf.placeholder(tf.float32, [None, rnn_size]))
        encoder_state = (state, state, state)

        train_decoder_output, infer_logits_output = decoding_layer( dec_input,encoder_state,target_sequence_length_p,max_target_sequence_length,rnn_size,num_layers,target_vocab_to_int,vocab_size,batch_size,keep_prob,embedding_size)



        assert isinstance(train_decoder_output, tf.contrib.seq2seq.BasicDecoderOutput),\
            'Found wrong type: {}'.format(type(train_decoder_output))
        assert isinstance(infer_logits_output, tf.contrib.seq2seq.BasicDecoderOutput),\
            'Found wrong type: {}'.format(type(infer_logits_output))

        assert train_decoder_output.rnn_output.get_shape().as_list() == [batch_size, None, vocab_size], \
            'Wrong shape returned.  Found {}'.format(train_decoder_output.rnn_output.get_shape())
        assert infer_logits_output.sample_id.get_shape().as_list() == [batch_size, None], \
            'Wrong shape returned.  Found {}'.format(infer_logits_output.sample_id.get_shape())


    _print_success_message()


def test_seq2seq_model(seq2seq_model):
    batch_size = 64
    vocab_size = 300
    embedding_size = 100
    sequence_length = 22
    rnn_size = 512
    num_layers = 3
    target_vocab_to_int = {'<EOS>': 1, '<GO>': 3}

    with tf.Graph().as_default():

        dec_input = tf.placeholder(tf.int32, [batch_size, sequence_length])
        dec_embed_input = tf.placeholder(tf.float32, [batch_size, sequence_length, embedding_size])
        dec_embeddings = tf.placeholder(tf.float32, [vocab_size, embedding_size])
        keep_prob = tf.placeholder(tf.float32)
        enc_state = tf.contrib.rnn.LSTMStateTuple(tf.placeholder(tf.float32, [None, rnn_size]),tf.placeholder(tf.float32, [None, rnn_size]))

        input_data = tf.placeholder(tf.int32, [batch_size, sequence_length])
        target_data = tf.placeholder(tf.int32, [batch_size, sequence_length])
        keep_prob = tf.placeholder(tf.float32)
        source_sequence_length = tf.placeholder(tf.int32, (None,), name='source_sequence_length')
        target_sequence_length_p = tf.placeholder(tf.int32, (None,), name='target_sequence_length')
        max_target_sequence_length = tf.reduce_max(target_sequence_length_p, name='max_target_len')

        train_decoder_output, infer_logits_output = seq2seq_model(  input_data,target_data,keep_prob,batch_size,source_sequence_length,target_sequence_length_p,max_target_sequence_length,vocab_size,vocab_size,embedding_size,embedding_size,rnn_size,num_layers,target_vocab_to_int)

        # input_data, target_data, keep_prob, batch_size, sequence_length,
        # 200, target_vocab_size, 64, 80, rnn_size, num_layers, target_vocab_to_int)

        assert isinstance(train_decoder_output, tf.contrib.seq2seq.BasicDecoderOutput),\
            'Found wrong type: {}'.format(type(train_decoder_output))
        assert isinstance(infer_logits_output, tf.contrib.seq2seq.BasicDecoderOutput),\
            'Found wrong type: {}'.format(type(infer_logits_output))

        assert train_decoder_output.rnn_output.get_shape().as_list() == [batch_size, None, vocab_size], \
            'Wrong shape returned.  Found {}'.format(train_decoder_output.rnn_output.get_shape())
        assert infer_logits_output.sample_id.get_shape().as_list() == [batch_size, None], \
            'Wrong shape returned.  Found {}'.format(infer_logits_output.sample_id.get_shape())

    _print_success_message()


def test_sentence_to_seq(sentence_to_seq):
    sentence = 'this is a test sentence'
    vocab_to_int = {'<PAD>': 0, '<EOS>': 1, '<UNK>': 2, 'this': 3, 'is': 6, 'a': 5, 'sentence': 4}

    output = sentence_to_seq(sentence, vocab_to_int)

    assert len(output) == 5,\
        'Wrong length. Found a length of {}'.format(len(output))

    assert output[3] == 2,\
        'Missing <UNK> id.'

    assert np.array_equal(output, [3, 6, 5, 2, 4]),\
        'Incorrect ouput. Found {}'.format(output)

    _print_success_message()


def test_process_encoding_input(process_encoding_input):
    batch_size = 2
    seq_length = 3
    target_vocab_to_int = {'<GO>': 3}
    with tf.Graph().as_default():
        target_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        dec_input = process_encoding_input(target_data, target_vocab_to_int, batch_size)

        assert dec_input.get_shape() == (batch_size, seq_length),\
            'Wrong shape returned.  Found {}'.format(dec_input.get_shape())

        test_target_data = [[10, 20, 30], [40, 18, 23]]
        with tf.Session() as sess:
            test_dec_input = sess.run(dec_input, {target_data: test_target_data})

        assert test_dec_input[0][0] == target_vocab_to_int['<GO>'] and\
            test_dec_input[1][0] == target_vocab_to_int['<GO>'],\
            'Missing GO Id.'

    _print_success_message()


def test_decoding_layer_train(decoding_layer_train):
    batch_size = 64
    vocab_size = 1000
    embedding_size = 200
    sequence_length = 22
    rnn_size = 512
    num_layers = 3

    with tf.Graph().as_default():
        with tf.variable_scope("decoding") as decoding_scope:
# dec_cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.BasicLSTMCell(rnn_size)] * num_layers)

            dec_embed_input = tf.placeholder(tf.float32, [batch_size, sequence_length, embedding_size])
            keep_prob = tf.placeholder(tf.float32)
            target_sequence_length_p = tf.placeholder(tf.int32, (None,), name='target_sequence_length')
            max_target_sequence_length = tf.reduce_max(target_sequence_length_p, name='max_target_len')

            for layer in range(num_layers):
                with tf.variable_scope('decoder_{}'.format(layer)):
                    lstm = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
                    dec_cell = tf.contrib.rnn.DropoutWrapper(lstm,input_keep_prob=keep_prob)

            output_layer = Dense(vocab_size,kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),name='output_layer')
            # output_fn = lambda x: tf.contrib.layers.fully_connected(x, vocab_size, None, scope=decoding_scope)


            encoder_state = tf.contrib.rnn.LSTMStateTuple(tf.placeholder(tf.float32, [None, rnn_size]),tf.placeholder(tf.float32, [None, rnn_size]))

            train_decoder_output = decoding_layer_train(encoder_state, dec_cell,dec_embed_input,target_sequence_length_p,max_target_sequence_length,output_layer,keep_prob)

            # encoder_state, dec_cell, dec_embed_input, sequence_length,
            #                      decoding_scope, output_fn, keep_prob)


            assert isinstance(train_decoder_output, tf.contrib.seq2seq.BasicDecoderOutput),\
                'Found wrong type: {}'.format(type(train_decoder_output))

            assert train_decoder_output.rnn_output.get_shape().as_list() == [batch_size, None, vocab_size], \
                'Wrong shape returned.  Found {}'.format(train_decoder_output.rnn_output.get_shape())

    _print_success_message()


def test_decoding_layer_infer(decoding_layer_infer):
    batch_size = 64
    vocab_size = 1000
    sequence_length = 22
    embedding_size = 200
    rnn_size = 512
    num_layers = 3

    with tf.Graph().as_default():
        with tf.variable_scope("decoding") as decoding_scope:

            dec_embeddings = tf.Variable(tf.random_uniform([vocab_size, embedding_size]))

            dec_embed_input = tf.placeholder(tf.float32, [batch_size, sequence_length, embedding_size])
            keep_prob = tf.placeholder(tf.float32)
            target_sequence_length_p = tf.placeholder(tf.int32, (None,), name='target_sequence_length')
            max_target_sequence_length = tf.reduce_max(target_sequence_length_p, name='max_target_len')

            for layer in range(num_layers):
                with tf.variable_scope('decoder_{}'.format(layer)):
                    lstm = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
                    dec_cell = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=keep_prob)

            output_layer = Dense(vocab_size,kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),name='output_layer')
            # output_fn = lambda x: tf.contrib.layers.fully_connected(x, vocab_size, None, scope=decoding_scope)


            encoder_state = tf.contrib.rnn.LSTMStateTuple(tf.placeholder(tf.float32, [None, rnn_size]),tf.placeholder(tf.float32, [None, rnn_size]))

            infer_logits_output = decoding_layer_infer( encoder_state,dec_cell,dec_embeddings,1,2,max_target_sequence_length,vocab_size,output_layer,batch_size,keep_prob)

            # encoder_state, dec_cell, dec_embeddings, 10, 20,
            #                     sequence_length, vocab_size, decoding_scope, output_fn, keep_prob)


            assert isinstance(infer_logits_output, tf.contrib.seq2seq.BasicDecoderOutput),\
                'Found wrong type: {}'.format(type(infer_logits_output))

            assert infer_logits_output.sample_id.get_shape().as_list() == [batch_size, None], \
                'Wrong shape returned.  Found {}'.format(infer_logits_output.sample_id.get_shape())

    _print_success_message()

###### Main Code starts here #####

In [154]:
source_path = '/Users/kamalesh_das/Desktop/Python/AcadGild/LanguageTranslator/small_vocab_en'
target_path = '/Users/kamalesh_das/Desktop/Python/AcadGild/LanguageTranslator/small_vocab_fr'
source_text = load_data(source_path)
target_text = load_data(target_path)

#Explore the Data

In [155]:
view_sentence_range = (10, 20)

"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import numpy as np

print('Dataset Stats')
print('Roughly the number of unique words: {}'.format(len({word: None for word in source_text.split()})))

sentences = source_text.split('\n')
word_counts = [len(sentence.split()) for sentence in sentences]
print('Number of sentences: {}'.format(len(sentences)))
print('Average number of words in a sentence: {}'.format(np.average(word_counts)))

print()
print('English sentences {} to {}:'.format(*view_sentence_range))
print('\n'.join(source_text.split('\n')[view_sentence_range[0]:view_sentence_range[1]]))
print()
print('French sentences {} to {}:'.format(*view_sentence_range))
print('\n'.join(target_text.split('\n')[view_sentence_range[0]:view_sentence_range[1]]))

Dataset Stats
Roughly the number of unique words: 227
Number of sentences: 40000
Average number of words in a sentence: 13.2385

English sentences 10 to 20:
the lime is her least liked fruit , but the banana is my least liked .
he saw a old yellow truck .
india is rainy during june , and it is sometimes warm in november .
that cat was my most loved animal .
he dislikes grapefruit , limes , and lemons .
her least liked fruit is the lemon , but his least liked is the grapefruit .
california is never cold during february , but it is sometimes freezing in june .
china is usually pleasant during autumn , and it is usually quiet in october .
paris is never freezing during november , but it is wonderful in october .
the united states is never rainy during january , but it is sometimes mild in october .

French sentences 10 to 20:
la chaux est son moins aimé des fruits , mais la banane est mon moins aimé.
il a vu un vieux camion jaune .
inde est pluvieux en juin , et il est parfois chaud en no

In [156]:
##### Text to Word Ids
def text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int):
    """
    Convert source and target text to proper word ids
    :param source_text: String that contains all the source text.
    :param target_text: String that contains all the target text.
    :param source_vocab_to_int: Dictionary to go from the source words to an id
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :return: A tuple of lists (source_id_text, target_id_text)
    """
    # Just go through the text and transform it.
    source_id_text = []
    for idx, line in enumerate(source_text.split('\n')):
        source_id_text.append([])
        for word in line.split():
            source_id_text[idx].append(source_vocab_to_int[word])
        
    target_id_text = []
    for idx, line in enumerate(target_text.split('\n')):
        target_id_text.append([])
        for word in line.split():
            target_id_text[idx].append(target_vocab_to_int[word])
        target_id_text[idx].append(target_vocab_to_int['<EOS>'])

    return (source_id_text, target_id_text)


test_text_to_ids(text_to_ids)

Tests Passed


In [160]:
###Preprocess all the data and save it
preprocess_and_save_data(source_path, target_path, text_to_ids)

In [161]:
### Check Point

(source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = load_preprocess()

In [162]:
### Check the Version of TensorFlow and Access to GPU
from distutils.version import LooseVersion
import warnings
import tensorflow as tf
from tensorflow.python.layers.core import Dense

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.1'), 'Please use TensorFlow version 1.1 or newer'
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.11.0


  del sys.path[0]


In [164]:
###Build the Neural Network
def model_inputs():
    """
    Create TF Placeholders for input, targets, learning rate, and lengths of source and target sequences.
    :return: Tuple (input, targets, learning rate, keep probability, target sequence length,
    max target sequence length, source sequence length)
    """
    # TODO: Implement Function
    inputs = tf.placeholder(tf.int32, shape=[None,None], name= "input")
    targets = tf.placeholder(tf.int32, shape=[None,None], name= "targets")
    lrate = tf.placeholder(tf.float32, name= "learning_rate")
    keep_prob = tf.placeholder(tf.float32, name= "keep_prob")
    target_seq_lenth = tf.placeholder(tf.int32, shape=[None], name= "target_sequence_length")
    max_target_len = tf.reduce_max(target_seq_lenth, name= 'max_target_len')
    source_seq_length = tf.placeholder(tf.int32, shape=[None], name= "source_sequence_length")
    return (inputs, targets, lrate, keep_prob, target_seq_lenth, max_target_len, source_seq_length)

test_model_inputs(model_inputs)

Tests Passed


In [165]:
### Process Decoder Input

def process_decoder_input(target_data, target_vocab_to_int, batch_size):
    """
    Preprocess target data for encoding
    :param target_data: Target Placehoder
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :param batch_size: Batch Size
    :return: Preprocessed target data
    """
    # Create a constant tensor with the 'go id'.
    go_id = tf.constant(target_vocab_to_int['<GO>'], shape=(batch_size,1), dtype=tf.int32)
    # Concatenate the vector without the last word id with the go ids vector
    processed_input = tf.concat([go_id,target_data[:,:-1]],1)
    return processed_input

test_process_encoding_input(process_decoder_input)

Tests Passed


In [167]:
###Encoding

def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob, 
                   source_sequence_length, source_vocab_size, 
                   encoding_embedding_size):
    """
    Create encoding layer
    :param rnn_inputs: Inputs for the RNN
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param keep_prob: Dropout keep probability
    :param source_sequence_length: a list of the lengths of each sequence in the batch
    :param source_vocab_size: vocabulary size of source data
    :param encoding_embedding_size: embedding size of source data
    :return: tuple (RNN output, RNN state)
    """
    # Build the lstm cells wrapped in dropout
    def build_cell(rnn_size, keep_prob):
        lstm = tf.contrib.rnn.LSTMCell(rnn_size)
        lstm_drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return lstm_drop
    # Stack them all
    stacked_lstm = tf.contrib.rnn.MultiRNNCell([build_cell(rnn_size, keep_prob) for _ in range(num_layers)])
    # Creathe embedding layer.
    embed_encoder = tf.contrib.layers.embed_sequence(rnn_inputs, vocab_size = source_vocab_size, embed_dim = encoding_embedding_size)
    # If we don't have an initial zero state, provide a dtype.
    output, state = tf.nn.dynamic_rnn(stacked_lstm, embed_encoder, source_sequence_length, dtype=tf.float32)
    return (output, state)


test_encoding_layer(encoding_layer)

Tests Passed


In [138]:
### Decoding - Training

def decoding_layer_train(encoder_state, dec_cell, dec_embed_input, 
                         target_sequence_length, max_summary_length, 
                         output_layer, keep_prob):
    """
    Create a decoding layer for training
    :param encoder_state: Encoder State
    :param dec_cell: Decoder RNN Cell
    :param dec_embed_input: Decoder embedded input
    :param target_sequence_length: The lengths of each sequence in the target batch
    :param max_summary_length: The length of the longest sequence in the batch
    :param output_layer: Function to apply the output layer
    :param keep_prob: Dropout keep probability
    :return: BasicDecoderOutput containing training logits and sample_id
    """
    # TODO: Implement Function
    trainig_helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input, target_sequence_length)
    basic_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, trainig_helper, encoder_state, output_layer)
    f_output, _, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder,maximum_iterations=max_summary_length)
    return f_output


test_decoding_layer_train(decoding_layer_train)

Tests Passed


In [168]:
### Decoding - Inference
def decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id,
                         end_of_sequence_id, max_target_sequence_length,
                         vocab_size, output_layer, batch_size, keep_prob):
    """
    Create a decoding layer for inference
    :param encoder_state: Encoder state
    :param dec_cell: Decoder RNN Cell
    :param dec_embeddings: Decoder embeddings
    :param start_of_sequence_id: GO ID
    :param end_of_sequence_id: EOS Id
    :param max_target_sequence_length: Maximum length of target sequences
    :param vocab_size: Size of decoder/target vocabulary
    :param decoding_scope: TenorFlow Variable Scope for decoding
    :param output_layer: Function to apply the output layer
    :param batch_size: Batch size
    :param keep_prob: Dropout keep probability
    :return: BasicDecoderOutput containing inference logits and sample_id
    """
    # Convert the start_ids to be a vector with batch size (the go id repeated batch size times)
    start_ids = tf.tile([start_of_sequence_id], [batch_size])
    # Create the embedding helper.
    embedding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
        dec_embeddings, start_ids, end_of_sequence_id)
    basic_decoder = tf.contrib.seq2seq.BasicDecoder(
        dec_cell, embedding_helper, encoder_state, output_layer)
    f_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
        basic_decoder,maximum_iterations=max_target_sequence_length)
    return f_output


test_decoding_layer_infer(decoding_layer_infer)

Tests Passed


In [140]:
### Build the Decoding Layer
def decoding_layer(dec_input, encoder_state,
                   target_sequence_length, max_target_sequence_length,
                   rnn_size,
                   num_layers, target_vocab_to_int, target_vocab_size,
                   batch_size, keep_prob, decoding_embedding_size):
    """
    Create decoding layer
    :param dec_input: Decoder input
    :param encoder_state: Encoder state
    :param target_sequence_length: The lengths of each sequence in the target batch
    :param max_target_sequence_length: Maximum length of target sequences
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :param target_vocab_size: Size of target vocabulary
    :param batch_size: The size of the batch
    :param keep_prob: Dropout keep probability
    :param decoding_embedding_size: Decoding embedding size
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    # Use the same proess as in the encoding layer.
    def build_cell(rnn_size, keep_prob):
        lstm = tf.contrib.rnn.LSTMCell(rnn_size)
        lstm_drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return lstm_drop
    # Stack them all
    stacked_lstm = tf.contrib.rnn.MultiRNNCell([build_cell(rnn_size, keep_prob) for _ in range(num_layers)])
    
    dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)

    dense_layer = Dense(target_vocab_size,
                         kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))
    
    with tf.variable_scope("decode") as scope:
        tr_decoder_output = decoding_layer_train(
            encoder_state, stacked_lstm, dec_embed_input, 
            target_sequence_length, max_target_sequence_length, 
            dense_layer, keep_prob)
        scope.reuse_variables()
        inf_decoder_output = decoding_layer_infer(
            encoder_state, stacked_lstm, dec_embeddings, 
            target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], 
            max_target_sequence_length, target_vocab_size, 
            dense_layer, batch_size, keep_prob)
    
    return tr_decoder_output, inf_decoder_output


test_decoding_layer(decoding_layer)

Tests Passed


In [169]:
###### Build the Neural Network ############
def seq2seq_model(input_data, target_data, keep_prob, batch_size,
                  source_sequence_length, target_sequence_length,
                  max_target_sentence_length,
                  source_vocab_size, target_vocab_size,
                  enc_embedding_size, dec_embedding_size,
                  rnn_size, num_layers, target_vocab_to_int):
    """
    Build the Sequence-to-Sequence part of the neural network
    :param input_data: Input placeholder
    :param target_data: Target placeholder
    :param keep_prob: Dropout keep probability placeholder
    :param batch_size: Batch Size
    :param source_sequence_length: Sequence Lengths of source sequences in the batch
    :param target_sequence_length: Sequence Lengths of target sequences in the batch
    :param source_vocab_size: Source vocabulary size
    :param target_vocab_size: Target vocabulary size
    :param enc_embedding_size: Decoder embedding size
    :param dec_embedding_size: Encoder embedding size
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    output, state = encoding_layer(input_data, rnn_size, num_layers, keep_prob, 
                   source_sequence_length, source_vocab_size, 
                   enc_embedding_size)
    
    processed_input = process_decoder_input(target_data, target_vocab_to_int, batch_size)
    
    tr_decoder_output, inf_decoder_output = decoding_layer(processed_input, state,
                   target_sequence_length, max_target_sentence_length,
                   rnn_size, num_layers, target_vocab_to_int, target_vocab_size,
                   batch_size, keep_prob, dec_embedding_size)
    
    return tr_decoder_output, inf_decoder_output



test_seq2seq_model(seq2seq_model)

Tests Passed


In [173]:
### Neural Network Training
# Number of Epochs
epochs = 10
# Batch Size
batch_size = 512
# RNN Size
rnn_size = 128
# Number of Layers
num_layers = 2
# Embedding Size
encoding_embedding_size = 128
decoding_embedding_size = 128
# Learning Rate
learning_rate = 0.001
# Dropout Keep Probability
keep_probability = 0.55
display_step = True

In [176]:
### Build the Graph
save_path = '/Users/kamalesh_das/Desktop/Python/AcadGild/LanguageTranslator/dev'
(source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = load_preprocess()
max_target_sentence_length = max([len(sentence) for sentence in source_int_text])

train_graph = tf.Graph()
with train_graph.as_default():
    input_data, targets, lr, keep_prob, target_sequence_length, max_target_sequence_length, source_sequence_length = model_inputs()

    #sequence_length = tf.placeholder_with_default(max_target_sentence_length, None, name='sequence_length')
    input_shape = tf.shape(input_data)

    train_logits, inference_logits = seq2seq_model(tf.reverse(input_data, [-1]),
                                                   targets,
                                                   keep_prob,
                                                   batch_size,
                                                   source_sequence_length,
                                                   target_sequence_length,
                                                   max_target_sequence_length,
                                                   len(source_vocab_to_int),
                                                   len(target_vocab_to_int),
                                                   encoding_embedding_size,
                                                   decoding_embedding_size,
                                                   rnn_size,
                                                   num_layers,
                                                   target_vocab_to_int)


    training_logits = tf.identity(train_logits.rnn_output, name='logits')
    inference_logits = tf.identity(inference_logits.sample_id, name='predictions')

    masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')

    with tf.name_scope("optimization"):
        # Loss function
        cost = tf.contrib.seq2seq.sequence_loss(
            training_logits,
            targets,
            masks)

        # Optimizer
        optimizer = tf.train.AdamOptimizer(lr)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

In [178]:
def pad_sentence_batch(sentence_batch, pad_int):
    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [pad_int] * (max_sentence - len(sentence)) for sentence in sentence_batch]


def get_batches(sources, targets, batch_size, source_pad_int, target_pad_int):
    """Batch targets, sources, and the lengths of their sentences together"""
    for batch_i in range(0, len(sources)//batch_size):
        start_i = batch_i * batch_size

        # Slice the right amount for the batch
        sources_batch = sources[start_i:start_i + batch_size]
        targets_batch = targets[start_i:start_i + batch_size]

        # Pad
        pad_sources_batch = np.array(pad_sentence_batch(sources_batch, source_pad_int))
        pad_targets_batch = np.array(pad_sentence_batch(targets_batch, target_pad_int))

        # Need the lengths for the _lengths parameters
        pad_targets_lengths = []
        for target in pad_targets_batch:
            pad_targets_lengths.append(len(target))

        pad_source_lengths = []
        for source in pad_sources_batch:
            pad_source_lengths.append(len(source))

        yield pad_sources_batch, pad_targets_batch, pad_source_lengths, pad_targets_lengths

In [179]:
## Train
def get_accuracy(target, logits):
    """
    Calculate accuracy
    """
    max_seq = max(target.shape[1], logits.shape[1])
    if max_seq - target.shape[1]:
        target = np.pad(
            target,
            [(0,0),(0,max_seq - target.shape[1])],
            'constant')
    if max_seq - logits.shape[1]:
        logits = np.pad(
            logits,
            [(0,0),(0,max_seq - logits.shape[1])],
            'constant')

    return np.mean(np.equal(target, logits))

# Split data to training and validation sets
train_source = source_int_text[batch_size:]
train_target = target_int_text[batch_size:]
valid_source = source_int_text[:batch_size]
valid_target = target_int_text[:batch_size]
(valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths ) = next(get_batches(valid_source,
                                                                                                             valid_target,
                                                                                                             batch_size,
                                                                                                             source_vocab_to_int['<PAD>'],
                                                                                                             target_vocab_to_int['<PAD>']))                                                                                                  
with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())

    for epoch_i in range(epochs):
        for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate(
                get_batches(train_source, train_target, batch_size,
                            source_vocab_to_int['<PAD>'],
                            target_vocab_to_int['<PAD>'])):

            _, loss = sess.run(
                [train_op, cost],
                {input_data: source_batch,
                 targets: target_batch,
                 lr: learning_rate,
                 target_sequence_length: targets_lengths,
                 source_sequence_length: sources_lengths,
                 keep_prob: keep_probability})


            if batch_i % display_step == 0 and batch_i > 0:


                batch_train_logits = sess.run(
                    inference_logits,
                    {input_data: source_batch,
                     source_sequence_length: sources_lengths,
                     target_sequence_length: targets_lengths,
                     keep_prob: 1.0})


                batch_valid_logits = sess.run(
                    inference_logits,
                    {input_data: valid_sources_batch,
                     source_sequence_length: valid_sources_lengths,
                     target_sequence_length: valid_targets_lengths,
                     keep_prob: 1.0})

                train_acc = get_accuracy(target_batch, batch_train_logits)

                valid_acc = get_accuracy(valid_targets_batch, batch_valid_logits)

                print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, Loss: {:>6.4f}'
                      .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss))

    # Save Model
    saver = tf.train.Saver()
    saver.save(sess, save_path)
    print('Model Trained and Saved')

Epoch   0 Batch    1/78 - Train Accuracy: 0.2329, Validation Accuracy: 0.3096, Loss: 5.6255
Epoch   0 Batch    2/78 - Train Accuracy: 0.2655, Validation Accuracy: 0.3096, Loss: 5.3936
Epoch   0 Batch    3/78 - Train Accuracy: 0.2444, Validation Accuracy: 0.3096, Loss: 5.2271
Epoch   0 Batch    4/78 - Train Accuracy: 0.2317, Validation Accuracy: 0.3096, Loss: 5.0593
Epoch   0 Batch    5/78 - Train Accuracy: 0.2380, Validation Accuracy: 0.3121, Loss: 4.8916
Epoch   0 Batch    6/78 - Train Accuracy: 0.2843, Validation Accuracy: 0.3155, Loss: 4.5755
Epoch   0 Batch    7/78 - Train Accuracy: 0.2837, Validation Accuracy: 0.3155, Loss: 4.4486
Epoch   0 Batch    8/78 - Train Accuracy: 0.2494, Validation Accuracy: 0.3169, Loss: 4.5276
Epoch   0 Batch    9/78 - Train Accuracy: 0.2876, Validation Accuracy: 0.3279, Loss: 4.3126
Epoch   0 Batch   10/78 - Train Accuracy: 0.2740, Validation Accuracy: 0.3448, Loss: 4.3580
Epoch   0 Batch   11/78 - Train Accuracy: 0.3121, Validation Accuracy: 0.3469, L

Epoch   1 Batch   15/78 - Train Accuracy: 0.4467, Validation Accuracy: 0.4806, Loss: 2.4604
Epoch   1 Batch   16/78 - Train Accuracy: 0.4622, Validation Accuracy: 0.4801, Loss: 2.4087
Epoch   1 Batch   17/78 - Train Accuracy: 0.4593, Validation Accuracy: 0.4885, Loss: 2.3914
Epoch   1 Batch   18/78 - Train Accuracy: 0.4351, Validation Accuracy: 0.4853, Loss: 2.4933
Epoch   1 Batch   19/78 - Train Accuracy: 0.4869, Validation Accuracy: 0.4776, Loss: 2.2889
Epoch   1 Batch   20/78 - Train Accuracy: 0.4378, Validation Accuracy: 0.4763, Loss: 2.4682
Epoch   1 Batch   21/78 - Train Accuracy: 0.4397, Validation Accuracy: 0.4849, Loss: 2.4947
Epoch   1 Batch   22/78 - Train Accuracy: 0.4647, Validation Accuracy: 0.4864, Loss: 2.3601
Epoch   1 Batch   23/78 - Train Accuracy: 0.4702, Validation Accuracy: 0.4877, Loss: 2.3414
Epoch   1 Batch   24/78 - Train Accuracy: 0.4380, Validation Accuracy: 0.4859, Loss: 2.4602
Epoch   1 Batch   25/78 - Train Accuracy: 0.4323, Validation Accuracy: 0.4806, L

Epoch   2 Batch   29/78 - Train Accuracy: 0.4521, Validation Accuracy: 0.5036, Loss: 1.8451
Epoch   2 Batch   30/78 - Train Accuracy: 0.4871, Validation Accuracy: 0.5075, Loss: 1.7799
Epoch   2 Batch   31/78 - Train Accuracy: 0.4923, Validation Accuracy: 0.5162, Loss: 1.7672
Epoch   2 Batch   32/78 - Train Accuracy: 0.4833, Validation Accuracy: 0.5159, Loss: 1.7615
Epoch   2 Batch   33/78 - Train Accuracy: 0.4959, Validation Accuracy: 0.5120, Loss: 1.6945
Epoch   2 Batch   34/78 - Train Accuracy: 0.4907, Validation Accuracy: 0.5138, Loss: 1.7319
Epoch   2 Batch   35/78 - Train Accuracy: 0.5042, Validation Accuracy: 0.5210, Loss: 1.7103
Epoch   2 Batch   36/78 - Train Accuracy: 0.4985, Validation Accuracy: 0.5247, Loss: 1.7210
Epoch   2 Batch   37/78 - Train Accuracy: 0.5046, Validation Accuracy: 0.5255, Loss: 1.7292
Epoch   2 Batch   38/78 - Train Accuracy: 0.5007, Validation Accuracy: 0.5312, Loss: 1.7168
Epoch   2 Batch   39/78 - Train Accuracy: 0.5073, Validation Accuracy: 0.5292, L

Epoch   3 Batch   43/78 - Train Accuracy: 0.4683, Validation Accuracy: 0.5130, Loss: 1.4049
Epoch   3 Batch   44/78 - Train Accuracy: 0.5065, Validation Accuracy: 0.5226, Loss: 1.3491
Epoch   3 Batch   45/78 - Train Accuracy: 0.4885, Validation Accuracy: 0.5248, Loss: 1.4053
Epoch   3 Batch   46/78 - Train Accuracy: 0.4805, Validation Accuracy: 0.5195, Loss: 1.4036
Epoch   3 Batch   47/78 - Train Accuracy: 0.5257, Validation Accuracy: 0.5203, Loss: 1.2656
Epoch   3 Batch   48/78 - Train Accuracy: 0.5076, Validation Accuracy: 0.5306, Loss: 1.3056
Epoch   3 Batch   49/78 - Train Accuracy: 0.4751, Validation Accuracy: 0.5134, Loss: 1.3814
Epoch   3 Batch   50/78 - Train Accuracy: 0.4636, Validation Accuracy: 0.5098, Loss: 1.3868
Epoch   3 Batch   51/78 - Train Accuracy: 0.4905, Validation Accuracy: 0.5196, Loss: 1.3670
Epoch   3 Batch   52/78 - Train Accuracy: 0.5038, Validation Accuracy: 0.5241, Loss: 1.3084
Epoch   3 Batch   53/78 - Train Accuracy: 0.4747, Validation Accuracy: 0.5178, L

Epoch   4 Batch   57/78 - Train Accuracy: 0.5203, Validation Accuracy: 0.5303, Loss: 1.0998
Epoch   4 Batch   58/78 - Train Accuracy: 0.5288, Validation Accuracy: 0.5452, Loss: 1.0888
Epoch   4 Batch   59/78 - Train Accuracy: 0.5319, Validation Accuracy: 0.5439, Loss: 1.0644
Epoch   4 Batch   60/78 - Train Accuracy: 0.5214, Validation Accuracy: 0.5305, Loss: 1.0480
Epoch   4 Batch   61/78 - Train Accuracy: 0.5399, Validation Accuracy: 0.5367, Loss: 1.0197
Epoch   4 Batch   62/78 - Train Accuracy: 0.5387, Validation Accuracy: 0.5405, Loss: 1.0535
Epoch   4 Batch   63/78 - Train Accuracy: 0.5067, Validation Accuracy: 0.5411, Loss: 1.0828
Epoch   4 Batch   64/78 - Train Accuracy: 0.5314, Validation Accuracy: 0.5526, Loss: 1.0707
Epoch   4 Batch   65/78 - Train Accuracy: 0.5370, Validation Accuracy: 0.5511, Loss: 1.0587
Epoch   4 Batch   66/78 - Train Accuracy: 0.5388, Validation Accuracy: 0.5474, Loss: 1.0313
Epoch   4 Batch   67/78 - Train Accuracy: 0.5154, Validation Accuracy: 0.5395, L

Epoch   5 Batch   71/78 - Train Accuracy: 0.5338, Validation Accuracy: 0.5704, Loss: 0.9654
Epoch   5 Batch   72/78 - Train Accuracy: 0.5673, Validation Accuracy: 0.5692, Loss: 0.9018
Epoch   5 Batch   73/78 - Train Accuracy: 0.5491, Validation Accuracy: 0.5685, Loss: 0.9496
Epoch   5 Batch   74/78 - Train Accuracy: 0.5503, Validation Accuracy: 0.5679, Loss: 0.9345
Epoch   5 Batch   75/78 - Train Accuracy: 0.5548, Validation Accuracy: 0.5661, Loss: 0.9129
Epoch   5 Batch   76/78 - Train Accuracy: 0.5408, Validation Accuracy: 0.5662, Loss: 0.9445
Epoch   6 Batch    1/78 - Train Accuracy: 0.5343, Validation Accuracy: 0.5670, Loss: 0.9366
Epoch   6 Batch    2/78 - Train Accuracy: 0.5318, Validation Accuracy: 0.5678, Loss: 0.9265
Epoch   6 Batch    3/78 - Train Accuracy: 0.5453, Validation Accuracy: 0.5676, Loss: 0.9426
Epoch   6 Batch    4/78 - Train Accuracy: 0.5373, Validation Accuracy: 0.5661, Loss: 0.9467
Epoch   6 Batch    5/78 - Train Accuracy: 0.5261, Validation Accuracy: 0.5673, L

Epoch   7 Batch    9/78 - Train Accuracy: 0.5809, Validation Accuracy: 0.5981, Loss: 0.8327
Epoch   7 Batch   10/78 - Train Accuracy: 0.5778, Validation Accuracy: 0.5991, Loss: 0.8318
Epoch   7 Batch   11/78 - Train Accuracy: 0.5789, Validation Accuracy: 0.6006, Loss: 0.8316
Epoch   7 Batch   12/78 - Train Accuracy: 0.5648, Validation Accuracy: 0.6032, Loss: 0.8537
Epoch   7 Batch   13/78 - Train Accuracy: 0.6112, Validation Accuracy: 0.5926, Loss: 0.7678
Epoch   7 Batch   14/78 - Train Accuracy: 0.5831, Validation Accuracy: 0.5912, Loss: 0.8159
Epoch   7 Batch   15/78 - Train Accuracy: 0.5785, Validation Accuracy: 0.5971, Loss: 0.8059
Epoch   7 Batch   16/78 - Train Accuracy: 0.5987, Validation Accuracy: 0.6007, Loss: 0.8133
Epoch   7 Batch   17/78 - Train Accuracy: 0.5928, Validation Accuracy: 0.6021, Loss: 0.7990
Epoch   7 Batch   18/78 - Train Accuracy: 0.5615, Validation Accuracy: 0.5963, Loss: 0.8307
Epoch   7 Batch   19/78 - Train Accuracy: 0.6112, Validation Accuracy: 0.5969, L

Epoch   8 Batch   23/78 - Train Accuracy: 0.6097, Validation Accuracy: 0.6112, Loss: 0.7536
Epoch   8 Batch   24/78 - Train Accuracy: 0.5934, Validation Accuracy: 0.6164, Loss: 0.7844
Epoch   8 Batch   25/78 - Train Accuracy: 0.5890, Validation Accuracy: 0.6152, Loss: 0.7954
Epoch   8 Batch   26/78 - Train Accuracy: 0.6289, Validation Accuracy: 0.6157, Loss: 0.6999
Epoch   8 Batch   27/78 - Train Accuracy: 0.6007, Validation Accuracy: 0.6151, Loss: 0.7420
Epoch   8 Batch   28/78 - Train Accuracy: 0.5646, Validation Accuracy: 0.6167, Loss: 0.7968
Epoch   8 Batch   29/78 - Train Accuracy: 0.5979, Validation Accuracy: 0.6167, Loss: 0.7653
Epoch   8 Batch   30/78 - Train Accuracy: 0.6063, Validation Accuracy: 0.6171, Loss: 0.7384
Epoch   8 Batch   31/78 - Train Accuracy: 0.6158, Validation Accuracy: 0.6144, Loss: 0.7308
Epoch   8 Batch   32/78 - Train Accuracy: 0.5904, Validation Accuracy: 0.6102, Loss: 0.7390
Epoch   8 Batch   33/78 - Train Accuracy: 0.6246, Validation Accuracy: 0.6171, L

Epoch   9 Batch   37/78 - Train Accuracy: 0.6240, Validation Accuracy: 0.6185, Loss: 0.6876
Epoch   9 Batch   38/78 - Train Accuracy: 0.6143, Validation Accuracy: 0.6171, Loss: 0.6994
Epoch   9 Batch   39/78 - Train Accuracy: 0.6163, Validation Accuracy: 0.6167, Loss: 0.6814
Epoch   9 Batch   40/78 - Train Accuracy: 0.5966, Validation Accuracy: 0.6186, Loss: 0.7216
Epoch   9 Batch   41/78 - Train Accuracy: 0.6103, Validation Accuracy: 0.6218, Loss: 0.7055
Epoch   9 Batch   42/78 - Train Accuracy: 0.6430, Validation Accuracy: 0.6164, Loss: 0.6483
Epoch   9 Batch   43/78 - Train Accuracy: 0.6008, Validation Accuracy: 0.6164, Loss: 0.7078
Epoch   9 Batch   44/78 - Train Accuracy: 0.6217, Validation Accuracy: 0.6177, Loss: 0.6934
Epoch   9 Batch   45/78 - Train Accuracy: 0.5899, Validation Accuracy: 0.6165, Loss: 0.7166
Epoch   9 Batch   46/78 - Train Accuracy: 0.6116, Validation Accuracy: 0.6140, Loss: 0.7070
Epoch   9 Batch   47/78 - Train Accuracy: 0.6451, Validation Accuracy: 0.6110, L

In [180]:
### Save Parameters
save_params(save_path)

In [181]:
### Checkpoint
import tensorflow as tf
import numpy as np

_, (source_vocab_to_int, target_vocab_to_int), (source_int_to_vocab, target_int_to_vocab) = load_preprocess()
load_path = load_params()

In [183]:
### Sentence to Sequence
def sentence_to_seq(sentence, vocab_to_int):
    """
    Convert a sentence to a sequence of ids
    :param sentence: String
    :param vocab_to_int: Dictionary to go from the words to an id
    :return: List of word ids
    """
    word_ids = []
    for word in sentence.lower().split():
        if word in vocab_to_int:
            word_ids.append(vocab_to_int[word])
        else:
            word_ids.append(vocab_to_int['<UNK>'])
    return word_ids


test_sentence_to_seq(sentence_to_seq)

Tests Passed


In [194]:
### Translate
translate_sentence = 'he saw a old yellow truck .'
#translate_sentence = 'truck is yellow'

translate_sentence = sentence_to_seq(translate_sentence, source_vocab_to_int)

loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(load_path + '.meta')
    loader.restore(sess, load_path)

    input_data = loaded_graph.get_tensor_by_name('input:0')
    logits = loaded_graph.get_tensor_by_name('predictions:0')
    target_sequence_length = loaded_graph.get_tensor_by_name('target_sequence_length:0')
    source_sequence_length = loaded_graph.get_tensor_by_name('source_sequence_length:0')
    keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')

    translate_logits = sess.run(logits, {input_data: [translate_sentence]*batch_size,
                                         target_sequence_length: [len(translate_sentence)*2]*batch_size,
                                         source_sequence_length: [len(translate_sentence)]*batch_size,
                                         keep_prob: 1.0})[0]

print('Input')
print('  Word Ids:      {}'.format([i for i in translate_sentence]))
print('  English Words: {}'.format([source_int_to_vocab[i] for i in translate_sentence]))

print('\nPrediction')
print('  Word Ids:      {}'.format([i for i in translate_logits]))
print('  French Words: {}'.format(" ".join([target_int_to_vocab[i] for i in translate_logits])))

INFO:tensorflow:Restoring parameters from /Users/kamalesh_das/Desktop/Python/AcadGild/LanguageTranslator/dev
Input
  Word Ids:      [208, 154, 121, 11, 60, 37, 190]
  English Words: ['he', 'saw', 'a', 'old', 'yellow', 'truck', '.']

Prediction
  Word Ids:      [251, 43, 306, 260, 8, 298, 249, 329, 1]
  French Words: il a pas le nouveau camion voiture . <EOS>
