In [3]:
!pip install tensorflow-gpu==1.0


Looking in indexes: https://pypi.org/simple, https://legacy.pypi.org/simple
Collecting tensorflow-gpu==1.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/e2/7389e7a1c10eb209ddabe0b35cca2e522a3985a1df7e07904c76d1d5609c/tensorflow_gpu-1.0.0-cp36-cp36m-manylinux1_x86_64.whl (95.3MB)
[K    100% |████████████████████████████████| 95.4MB 371kB/s 
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-1.0.0


In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
import io
tf.__version__

'1.0.0'

In [2]:
from google.colab import files
uploaded = files.upload()


Saving southpark_dialouges.csv to southpark_dialouges.csv


In [0]:
southpark = pd.read_csv(io.StringIO(uploaded['southpark_dialouges.csv'].decode('utf-8')))


In [4]:
southpark.head()

Unnamed: 0,Season,Episode,Character,Line
0,10,1,Stan,"You guys, you guys! Chef is going away. \n"
1,10,1,Kyle,Going away? For how long?\n
2,10,1,Stan,Forever.\n
3,10,1,Chef,I'm sorry boys.\n
4,10,1,Stan,"Chef said he's been bored, so he joining a gro..."


# Data Preprocessing

In [0]:
def clean_text(text):
    '''Clean text by removing unnecessary characters and altering the format of words.'''

    text = text.lower()
    
    text = re.sub(r"\n", "",  text)
    text = re.sub(r"[-()]", "", text)
    text = re.sub(r"\.", " .", text)
    text = re.sub(r"\!", " !", text)
    text = re.sub(r"\?", " ?", text)
    text = re.sub(r"\,", " ,", text)
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"ohh", "oh", text)
    text = re.sub(r"ohhh", "oh", text)
    text = re.sub(r"ohhhh", "oh", text)
    text = re.sub(r"ohhhhh", "oh", text)
    text = re.sub(r"ohhhhhh", "oh", text)
    text = re.sub(r"ahh", "ah", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    return text

In [0]:
# Clean the scripts and add them to the same list.
text = []

for line in southpark.Line:
    text.append(clean_text(line))

In [7]:
text

['you guys  you guys  chef is going away  ',
 'going away  for how long ',
 'forever ',
 'i am sorry boys ',
 'chef said he is been bored  so he joining a group called the super adventure club  ',
 'wow ',
 'chef   what kind of questions do you think adventuring around the world is gonna answer  ',
 'that is the meaning of life  why are we here ',
 'i hope you are making the right choice ',
 'i am gonna miss him   i am gonna miss chef and i   and i do not know how to tell him  ',
 'dude  how are we gonna go on  chef was our fuh   fffriend  ',
 'and we will all miss you  chef   but we know you must do what your heart tells you  ',
 'byebye ',
 'goodbye ',
 'so long ',
 'so long  chef ',
 'goodbye  chef ',
 'goodbye  chef  have a great time with the super adventure club ',
 'goodbye    ',
 'draw two card  fatass ',
 'reverse to you  jew  ',
 'i will get it  ',
 'hello there  children ',
 'he is back ',
 'yeah ',
 'all right  ',
 'chef  i cannot believe you are back ',
 'well  it is true 

In [0]:
# Find the length of lines
lengths = []
for line in text:
    lengths.append(len(line.split()))

# Create a dataframe so that the values can be inspected
lengths = pd.DataFrame(lengths, columns=['counts'])

In [9]:
lengths

Unnamed: 0,counts
0,8
1,5
2,1
3,4
4,16
5,1
6,15
7,10
8,8
9,21


In [10]:
print(np.percentile(lengths, 95))
print(np.percentile(lengths, 99))

35.0
66.0


In [0]:
# Limit the text we will use to the shorter 95%.
max_line_length = 40

short_text = []
for line in text:
    if len(line.split()) <= max_line_length:
        short_text.append(line)

In [0]:
# Create a dictionary for the frequency of the vocabulary
vocab = {}
for line in short_text:
    for word in line.split():
        if word not in vocab:
            vocab[word] = 1
        else:
            vocab[word] += 1

In [0]:
threshold = 3
# In case we want to use a different vocabulary sizes for the source and target text, 
# we can set different threshold values.
# Nonetheless, we will create dictionaries to provide a unique integer for each word.
source_vocab_to_int = {}

word_num = 0
for k,v in vocab.items():
    if v >= threshold:
        source_vocab_to_int[k] = word_num
        word_num += 1
        
target_vocab_to_int = {}

word_num = 0
for k,v in vocab.items():
    if v >= threshold:
        target_vocab_to_int[k] = word_num
        word_num += 1

In [0]:
# Add the unique tokens to the vocabulary dictionaries.
codes = ['<PAD>','<EOS>','<UNK>','<GO>']

for code in codes:
    source_vocab_to_int[code] = len(source_vocab_to_int)+1
    
for code in codes:
    target_vocab_to_int[code] = len(target_vocab_to_int)+1

In [0]:
# Create dictionaries to map the unique integers to their respective words.
# i.e. an inverse dictionary for vocab_to_int.
source_int_to_vocab = {v_i: v for v, v_i in source_vocab_to_int.items()}
target_int_to_vocab = {v_i: v for v, v_i in target_vocab_to_int.items()}

In [0]:
# Create the source and target texts.
# The target text is the line following the source text.
source_text = short_text[:-1]
target_text = short_text[1:]
for i in range(len(target_text)):
    target_text[i] += ' <EOS>'

In [0]:
#Convert the text to integers. 
# Replace any words that are not in the respective vocabulary with <UNK> (unknown)
source_int = []
for line in source_text:
    sentence = []
    for word in line.split():
        if word not in source_vocab_to_int:
            sentence.append(source_vocab_to_int['<UNK>'])
        else:
            sentence.append(source_vocab_to_int[word])
    source_int.append(sentence)
    
target_int = []
for line in target_text:
    sentence = []
    for word in line.split():
        if word not in target_vocab_to_int:
            sentence.append(target_vocab_to_int['<UNK>'])
        else:
            sentence.append(target_vocab_to_int[word])
    target_int.append(sentence)

# Building the model

In [0]:
def model_inputs():
    '''Create palceholders for inputs to the model'''
    input_data = tf.placeholder(tf.int32, [None, None], name='input')
    targets = tf.placeholder(tf.int32, [None, None], name='targets')
    lr = tf.placeholder(tf.float32, name='learning_rate')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')

    return input_data, targets, lr, keep_prob

In [0]:
def process_encoding_input(target_data, vocab_to_int, batch_size):
    '''Remove the last word id from each batch and concat the <GO> to the begining of each batch'''
    ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1])
    dec_input = tf.concat([tf.fill([batch_size, 1], vocab_to_int['<GO>']), ending], 1)

    return dec_input

In [0]:
def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob, sequence_length, attn_length):
    '''Create the encoding layer'''
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    drop = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
    cell = tf.contrib.rnn.AttentionCellWrapper(drop, attn_length, state_is_tuple = True)
    enc_cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)
    _, enc_state = tf.nn.bidirectional_dynamic_rnn(cell_fw = enc_cell,
                                                   cell_bw = enc_cell,
                                                   sequence_length = sequence_length,
                                                   inputs = rnn_inputs, 
                                                   dtype=tf.float32)

    return enc_state

In [0]:
def decoding_layer_train(encoder_state, dec_cell, dec_embed_input, sequence_length, decoding_scope,
                         output_fn, keep_prob):
    '''Decode the training data'''
    train_decoder_fn = tf.contrib.seq2seq.simple_decoder_fn_train(encoder_state)
    train_pred, _, _ = tf.contrib.seq2seq.dynamic_rnn_decoder(
        dec_cell, train_decoder_fn, dec_embed_input, sequence_length, scope=decoding_scope)
    train_pred_drop = tf.nn.dropout(train_pred, keep_prob)
    return output_fn(train_pred_drop)

In [0]:
def decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id, end_of_sequence_id,
                         maximum_length, vocab_size, decoding_scope, output_fn, keep_prob):
    '''Decode the prediction data'''
    infer_decoder_fn = tf.contrib.seq2seq.simple_decoder_fn_inference(
        output_fn, encoder_state, dec_embeddings, start_of_sequence_id, end_of_sequence_id, maximum_length, vocab_size)
    infer_logits, _, _ = tf.contrib.seq2seq.dynamic_rnn_decoder(dec_cell, infer_decoder_fn, scope=decoding_scope)
    return infer_logits

In [0]:
def decoding_layer(dec_embed_input, dec_embeddings, encoder_state, vocab_size, sequence_length, rnn_size,
                   num_layers, vocab_to_int, keep_prob, attn_length):
    '''Create the decoding cell and input the parameters for the training and inference decoding layers'''
    
    with tf.variable_scope("decoding") as decoding_scope:
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
        cell = tf.contrib.rnn.AttentionCellWrapper(drop, attn_length, state_is_tuple = True)
        dec_cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)
        
        weights = tf.truncated_normal_initializer(stddev = 0.1)
        biases = tf.zeros_initializer()
        output_fn = lambda x: tf.contrib.layers.fully_connected(x, 
                                                                vocab_size, 
                                                                None, 
                                                                scope=decoding_scope,
                                                                weights_initializer = weights,
                                                                biases_initializer = biases)

        train_logits = decoding_layer_train(
            encoder_state[0], dec_cell, dec_embed_input, sequence_length, decoding_scope, output_fn, keep_prob)
        decoding_scope.reuse_variables()
        infer_logits = decoding_layer_infer(encoder_state[0], dec_cell, dec_embeddings, vocab_to_int['<GO>'],
                                            vocab_to_int['<EOS>'], sequence_length, vocab_size,
                                            decoding_scope, output_fn, keep_prob)

    return train_logits, infer_logits

In [0]:
def seq2seq_model(input_data, target_data, keep_prob, batch_size, sequence_length, source_vocab_size, target_vocab_size,
                  enc_embedding_size, dec_embedding_size, rnn_size, num_layers, vocab_to_int, attn_length):
    
    '''Use the previous functions to create the training and inference logits'''
    
    enc_embed_input = tf.contrib.layers.embed_sequence(input_data, source_vocab_size+1, enc_embedding_size)
    enc_state = encoding_layer(enc_embed_input, rnn_size, num_layers, keep_prob, sequence_length, attn_length)

    dec_input = process_encoding_input(target_data, vocab_to_int, batch_size)
    dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size+1, dec_embedding_size], -1.0, 1.0))
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)

    train_logits, infer_logits = decoding_layer(dec_embed_input, dec_embeddings, enc_state, target_vocab_size+1, 
                                                sequence_length, rnn_size, num_layers, vocab_to_int, keep_prob, 
                                                attn_length)
    
    return train_logits, infer_logits

In [0]:
epochs = 10
batch_size = 32
rnn_size = 512
num_layers = 2
encoding_embedding_size = 512
decoding_embedding_size = 512
attn_length = 10
learning_rate = 0.0001
keep_probability = 0.5

In [0]:
train_graph = tf.Graph()
with train_graph.as_default():
    
    # Load the model inputs
    input_data, targets, lr, keep_prob = model_inputs()
    # Sequence length will be the max line length for each batch
    sequence_length = tf.placeholder_with_default(max_line_length, None, name='sequence_length')
    input_shape = tf.shape(input_data)
    
    # Create the logits from the model
    train_logits, inference_logits = seq2seq_model(
        tf.reverse(input_data, [-1]), targets, keep_prob, batch_size, sequence_length, len(source_vocab_to_int), 
        len(target_vocab_to_int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, 
        target_vocab_to_int, attn_length)
    
    # Create a tensor to be used for making predictions.
    tf.identity(inference_logits, 'logits')
    with tf.name_scope("optimization"):
        # Loss function
        cost = tf.contrib.seq2seq.sequence_loss(
            train_logits,
            targets,
            tf.ones([input_shape[0], sequence_length]))

        # Optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

In [0]:
def pad_sentence_batch(sentence_batch, vocab_to_int):
    """Pad lines with <PAD> so each line of a batch has the same length"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [vocab_to_int['<PAD>']] * (max_sentence - len(sentence)) for sentence in sentence_batch]

In [0]:
def batch_data(source, target, batch_size):
    """Batch source and target together"""
    for batch_i in range(0, len(source)//batch_size):
        start_i = batch_i * batch_size
        source_batch = source[start_i:start_i + batch_size]
        target_batch = target[start_i:start_i + batch_size]
        yield (np.array(pad_sentence_batch(source_batch, source_vocab_to_int)), 
               np.array(pad_sentence_batch(target_batch, target_vocab_to_int)))

In [0]:
train_valid_split = int(len(source_int)*0.1)

train_source = source_int[train_valid_split:]
train_target = target_int[train_valid_split:]

valid_source = source_int[:train_valid_split]
valid_target = target_int[:train_valid_split]


In [30]:
import time

learning_rate_decay = 0.95
display_step = 50
stop_early = 0
stop = 3
total_train_loss = 0
summary_valid_loss = []


checkpoint = "best_model.ckpt" 

with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())

    for epoch_i in range(1, epochs+1):
        for batch_i, (source_batch, target_batch) in enumerate(
                batch_data(train_source, train_target, batch_size)):
            start_time = time.time()
            _, loss = sess.run(
                [train_op, cost],
                {input_data: source_batch,
                 targets: target_batch,
                 lr: learning_rate,
                 sequence_length: target_batch.shape[1],
                 keep_prob: keep_probability})

            total_train_loss += loss
            end_time = time.time()
            batch_time = end_time - start_time
            
            if batch_i % display_step == 0:
                print('Epoch {:>3}/{} Batch {:>4}/{} - Loss: {:>6.3f}, Seconds: {:>4.2f}'
                      .format(epoch_i,
                              epochs, 
                              batch_i, 
                              len(train_source) // batch_size, 
                              total_train_loss / display_step, 
                              batch_time*display_step))
                total_train_loss = 0

            if batch_i % 235 == 0 and batch_i > 0:
                total_valid_loss = 0
                start_time = time.time()
                for batch_ii, (source_batch, target_batch) in \
                        enumerate(batch_data(valid_source, valid_target, batch_size)):
                    valid_loss = sess.run(
                    cost, {input_data: source_batch,
                           targets: target_batch,
                           lr: learning_rate,
                           sequence_length: target_batch.shape[1],
                           keep_prob: 1})
                    total_valid_loss += valid_loss
                end_time = time.time()
                batch_time = end_time - start_time
                avg_valid_loss = total_valid_loss / (len(valid_source) / batch_size)
                print('Valid Loss: {:>6.3f}, Seconds: {:>5.2f}'.format(avg_valid_loss, batch_time))
                
                learning_rate *= learning_rate_decay
                
                summary_valid_loss.append(avg_valid_loss)
                if avg_valid_loss <= min(summary_valid_loss):
                    print('New Record!') 
                    stop_early = 0
                    saver = tf.train.Saver() 
                    saver.save(sess, checkpoint)
                
                else:
                    print("No Improvement.")
                    stop_early += 1
                    if stop_early == stop:
                        break
        if stop_early == stop:
            print("Stopping Training.")
            break

Epoch   1/10 Batch    0/1926 - Loss:  0.185, Seconds: 72.46
Epoch   1/10 Batch   50/1926 - Loss:  3.636, Seconds: 35.23
Epoch   1/10 Batch  100/1926 - Loss:  2.595, Seconds: 39.94
Epoch   1/10 Batch  150/1926 - Loss:  2.470, Seconds: 36.99
Epoch   1/10 Batch  200/1926 - Loss:  2.372, Seconds: 30.80
Valid Loss:  2.134, Seconds: 93.30
New Record!
Epoch   1/10 Batch  250/1926 - Loss:  2.328, Seconds: 34.13
Epoch   1/10 Batch  300/1926 - Loss:  2.312, Seconds: 40.22
Epoch   1/10 Batch  350/1926 - Loss:  2.285, Seconds: 33.34
Epoch   1/10 Batch  400/1926 - Loss:  2.313, Seconds: 29.24
Epoch   1/10 Batch  450/1926 - Loss:  2.160, Seconds: 42.08
Valid Loss:  2.094, Seconds: 90.54
New Record!
Epoch   1/10 Batch  500/1926 - Loss:  2.209, Seconds: 46.19
Epoch   1/10 Batch  550/1926 - Loss:  2.338, Seconds: 39.29
Epoch   1/10 Batch  600/1926 - Loss:  2.239, Seconds: 48.32
Epoch   1/10 Batch  650/1926 - Loss:  2.114, Seconds: 23.08
Epoch   1/10 Batch  700/1926 - Loss:  2.097, Seconds: 35.97
Valid 

Valid Loss:  1.881, Seconds: 95.05
New Record!
Epoch   2/10 Batch  500/1926 - Loss:  1.959, Seconds: 45.85
Epoch   2/10 Batch  550/1926 - Loss:  2.077, Seconds: 40.38
Epoch   2/10 Batch  600/1926 - Loss:  1.973, Seconds: 44.51
Epoch   2/10 Batch  650/1926 - Loss:  1.867, Seconds: 23.30
Epoch   2/10 Batch  700/1926 - Loss:  1.864, Seconds: 35.25
Valid Loss:  1.870, Seconds: 91.37
New Record!
Epoch   2/10 Batch  750/1926 - Loss:  1.822, Seconds: 35.04
Epoch   2/10 Batch  800/1926 - Loss:  1.921, Seconds: 34.77
Epoch   2/10 Batch  850/1926 - Loss:  1.829, Seconds: 30.07
Epoch   2/10 Batch  900/1926 - Loss:  1.870, Seconds: 28.89
Valid Loss:  1.860, Seconds: 91.04
New Record!
Epoch   2/10 Batch  950/1926 - Loss:  1.728, Seconds: 43.75
Epoch   2/10 Batch 1000/1926 - Loss:  1.649, Seconds: 37.84
Epoch   2/10 Batch 1050/1926 - Loss:  1.790, Seconds: 28.02
Epoch   2/10 Batch 1100/1926 - Loss:  1.803, Seconds: 18.57
Epoch   2/10 Batch 1150/1926 - Loss:  1.793, Seconds: 34.11
Valid Loss:  1.849,

Valid Loss:  1.795, Seconds: 90.53
New Record!
Epoch   3/10 Batch  950/1926 - Loss:  1.643, Seconds: 43.62
Epoch   3/10 Batch 1000/1926 - Loss:  1.577, Seconds: 37.15
Epoch   3/10 Batch 1050/1926 - Loss:  1.712, Seconds: 29.41
Epoch   3/10 Batch 1100/1926 - Loss:  1.721, Seconds: 19.33
Epoch   3/10 Batch 1150/1926 - Loss:  1.714, Seconds: 32.80
Valid Loss:  1.789, Seconds: 94.12
New Record!
Epoch   3/10 Batch 1200/1926 - Loss:  1.731, Seconds: 28.41
Epoch   3/10 Batch 1250/1926 - Loss:  1.622, Seconds: 40.29
Epoch   3/10 Batch 1300/1926 - Loss:  1.628, Seconds: 37.85
Epoch   3/10 Batch 1350/1926 - Loss:  1.753, Seconds: 47.33
Epoch   3/10 Batch 1400/1926 - Loss:  1.771, Seconds: 36.52
Valid Loss:  1.785, Seconds: 92.82
New Record!
Epoch   3/10 Batch 1450/1926 - Loss:  1.798, Seconds: 29.13
Epoch   3/10 Batch 1500/1926 - Loss:  1.781, Seconds: 36.62
Epoch   3/10 Batch 1550/1926 - Loss:  1.797, Seconds: 47.75
Epoch   3/10 Batch 1600/1926 - Loss:  1.832, Seconds: 45.79
Valid Loss:  1.776,

Epoch   4/10 Batch 1400/1926 - Loss:  1.724, Seconds: 38.62
Valid Loss:  1.753, Seconds: 95.77
New Record!
Epoch   4/10 Batch 1450/1926 - Loss:  1.750, Seconds: 30.06
Epoch   4/10 Batch 1500/1926 - Loss:  1.736, Seconds: 34.90
Epoch   4/10 Batch 1550/1926 - Loss:  1.750, Seconds: 45.20
Epoch   4/10 Batch 1600/1926 - Loss:  1.784, Seconds: 47.71
Valid Loss:  1.746, Seconds: 93.22
New Record!
Epoch   4/10 Batch 1650/1926 - Loss:  1.679, Seconds: 34.66
Epoch   4/10 Batch 1700/1926 - Loss:  1.753, Seconds: 39.36
Epoch   4/10 Batch 1750/1926 - Loss:  1.652, Seconds: 47.70
Epoch   4/10 Batch 1800/1926 - Loss:  1.742, Seconds: 42.92
Epoch   4/10 Batch 1850/1926 - Loss:  1.780, Seconds: 47.23
Valid Loss:  1.740, Seconds: 94.95
New Record!
Epoch   4/10 Batch 1900/1926 - Loss:  1.651, Seconds: 45.86
Epoch   5/10 Batch    0/1926 - Loss:  0.882, Seconds: 38.06
Epoch   5/10 Batch   50/1926 - Loss:  1.746, Seconds: 34.17
Epoch   5/10 Batch  100/1926 - Loss:  1.709, Seconds: 40.45
Epoch   5/10 Batch 

Epoch   5/10 Batch 1850/1926 - Loss:  1.743, Seconds: 46.39
Valid Loss:  1.721, Seconds: 91.69
New Record!
Epoch   5/10 Batch 1900/1926 - Loss:  1.619, Seconds: 46.44
Epoch   6/10 Batch    0/1926 - Loss:  0.865, Seconds: 39.43
Epoch   6/10 Batch   50/1926 - Loss:  1.715, Seconds: 36.05
Epoch   6/10 Batch  100/1926 - Loss:  1.676, Seconds: 38.73
Epoch   6/10 Batch  150/1926 - Loss:  1.783, Seconds: 37.15
Epoch   6/10 Batch  200/1926 - Loss:  1.787, Seconds: 31.73
Valid Loss:  1.719, Seconds: 88.84
New Record!
Epoch   6/10 Batch  250/1926 - Loss:  1.768, Seconds: 34.73
Epoch   6/10 Batch  300/1926 - Loss:  1.790, Seconds: 39.80
Epoch   6/10 Batch  350/1926 - Loss:  1.789, Seconds: 32.18
Epoch   6/10 Batch  400/1926 - Loss:  1.839, Seconds: 29.32
Epoch   6/10 Batch  450/1926 - Loss:  1.697, Seconds: 41.81
Valid Loss:  1.714, Seconds: 89.27
New Record!
Epoch   6/10 Batch  500/1926 - Loss:  1.749, Seconds: 47.68
Epoch   6/10 Batch  550/1926 - Loss:  1.844, Seconds: 41.72
Epoch   6/10 Batch 

In [35]:
files.download('best_model.ckpt.data-00000-of-00001')

----------------------------------------
Exception happened during processing of request from ('::ffff:127.0.0.1', 35362, 0, 0)
Traceback (most recent call last):
  File "/usr/lib/python3.6/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/usr/lib/python3.6/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "/usr/lib/python3.6/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/usr/lib/python3.6/socketserver.py", line 696, in __init__
    self.handle()
  File "/usr/lib/python3.6/http/server.py", line 418, in handle
    self.handle_one_request()
  File "/usr/lib/python3.6/http/server.py", line 406, in handle_one_request
    method()
  File "/usr/lib/python3.6/http/server.py", line 639, in do_GET
    self.copyfile(f, self.wfile)
  File "/usr/lib/python3.6/http/server.py", line 800, in copyfile
    shutil.copyfil

MessageError: ignored

In [0]:
!pip install -U -q PyDrive

In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [38]:
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'best_model.ckpt.data-00000-of-00001'})
uploaded.SetContentFile('best_model.ckpt.data-00000-of-00001')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

Uploaded file with ID 14rFz2qARlwgkXXR1Zk-rHRI-Wed9vbXt


In [39]:
uploaded = drive.CreateFile({'title': 'checkpoint'})
uploaded.SetContentFile('checkpoint')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

Uploaded file with ID 1brZThI0JIjPLP56_p3geYlX5FpSXvqrb


In [0]:
def sentence_to_seq(sentence, vocab_to_int):
    '''Prepare the predicted sentence for the model'''
    
    sentence = clean_text(sentence)
    return [vocab_to_int.get(word, vocab_to_int['<UNK>']) for word in sentence.split()]

In [61]:
input_sentence = 'Dad, if you agree to something but you did not mean to agree to it, what do you do?!'

input_sentence = sentence_to_seq(input_sentence, source_vocab_to_int)
checkpoint = "./" + checkpoint
loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load the saved model
    loader = tf.train.import_meta_graph(checkpoint + '.meta')
    loader.restore(sess, checkpoint)
    
    # Load the tensors to be used as inputs
    input_data = loaded_graph.get_tensor_by_name('input:0')
    logits = loaded_graph.get_tensor_by_name('logits:0')
    keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')
    
    response_logits = sess.run(logits, {input_data: [input_sentence],keep_prob: 1.0})[0]

print('Input')
print('  Word Ids:      {}'.format([i for i in input_sentence]))
print('  Input Words: {}'.format([source_int_to_vocab[i] for i in input_sentence]))
print('\nResponse')
print('  Word Ids:      {}'.format([i for i in np.argmax(response_logits, 1)]))
print('  Response Words: {}'.format([target_int_to_vocab[i] for i in np.argmax(response_logits, 1)]))

Input
  Word Ids:      [530, 348, 0, 597, 55, 231, 65, 0, 144, 53, 109, 55, 597, 55, 83, 28, 32, 0, 32]
  Input Words: ['dad', 'if', 'you', 'agree', 'to', 'something', 'but', 'you', 'did', 'not', 'mean', 'to', 'agree', 'to', 'it', 'what', 'do', 'you', 'do']

Response
  Word Ids:      [10, 11, 12, 10, 11, 12, 10, 11, 12, 10, 11, 53, 37, 283, 20, 9454, 9453]
  Response Words: ['i', 'am', 'sorry', 'i', 'am', 'sorry', 'i', 'am', 'sorry', 'i', 'am', 'not', 'gonna', 'be', 'a', '<UNK>', '<EOS>']
