In [1]:
import numpy as np
import datasets
import tensorflow as tf
import re

  from ._conv import register_converters as _register_converters


In [2]:
data_o = datasets.readCornellData('data/cornell')

100%|██████████| 83097/83097 [00:02<00:00, 27763.60it/s]


In [3]:
data_o[:5]

[('there', 'where'),
 ('have fun tonight', 'tons'),
 ('what good stuff', 'the real you'),
 ('wow', 'lets go'),
 ('she okay', 'i hope so')]

In [4]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3333)


In [5]:
import gensim
from gensim.models.keyedvectors import KeyedVectors as kv



In [7]:
start_symbol = '[^]'
end_symbol = '[$]'
padding_symbol = '[#]'

In [8]:
data_clean = []
word_set = set([start_symbol, end_symbol, padding_symbol])
for chats in data_o:
    words = ' '.join(chats).split()
    word_set.update(words)
    data_clean.append([chats[0].split(), chats[1].split()])
    

In [9]:
#print(data_clean[:5000])

In [10]:
len(data_clean)

24792

In [11]:
word2id = {symbol:i for i, symbol in enumerate(word_set)}
id2word = {i:symbol for symbol, i in word2id.items()}

In [12]:
# embedding_dim = 300
start_symbol_id = word2id[start_symbol]
end_symbol_id = word2id[end_symbol]
pad_symbol_id = word2id[padding_symbol]

In [13]:
print(start_symbol_id)
print(end_symbol_id)
print(pad_symbol_id)

6115
5423
1928


In [14]:
def sentence_to_ids(sentence, word2id, padded_len):
    """ Converts a sequence of symbols to a padded sequence of their ids.
    
      sentence: a string, input/output sequence of symbols.
      word2id: a dict, a mapping from original symbols to ids.
      padded_len: an integer, a desirable length of the sequence.

      result: a tuple of (a list of ids, an actual length of sentence).
    """
            
   
    sent_ids = [word2id[word] for word in sentence[:padded_len]]
    if padded_len>len(sentence):
        sent_ids.append(word2id['[$]'])
        sent_ids += [word2id['[#]']]*(padded_len-len(sent_ids))
        return sent_ids, len(sentence)+1
    sent_ids[-1] = word2id['[$]']
    return sent_ids, padded_len


def  ids_to_sentence(ids, id2word):
    """ Converts a sequence of ids to a sequence of symbols.
    
          ids: a list, indices for the padded sequence.
          id2word:  a dict, a mapping from ids to original symbols.

          result: a list of symbols.
    """
 
    return [id2word[i] for i in ids]


In [15]:
def batch_to_ids(sentences, word2id, max_len):
    """Prepares batches of indices. 
    
       Sequences are padded to match the longest sequence in the batch,
       if it's longer than max_len, then max_len is used instead.

        sentences: a list of strings, original sequences.
        word2id: a dict, a mapping from original symbols to ids.
        max_len: an integer, max len of sequences allowed.

        result: a list of lists of ids, a list of actual lengths.
    """
    
    max_len_in_batch = min(max(len(s) for s in sentences) + 1, max_len)
    batch_ids, batch_ids_len = [], []
    for sentence in sentences:
        ids, ids_len = sentence_to_ids(sentence, word2id, max_len_in_batch)
        batch_ids.append(ids)
        batch_ids_len.append(ids_len)
    return batch_ids, batch_ids_len

In [16]:
def generate_batches(samples, batch_size=64):
    X, Y = [], []
    for i, (x, y) in enumerate(samples, 1):
        X.append(x)
        Y.append(y)
        if i % batch_size == 0:
            yield X, Y
            X, Y = [], []
    if X and Y:
        yield X, Y

In [17]:
def text_prepare(text):
    """Performs tokenization and simple preprocessing."""
    
    replace_by_space_re = re.compile('[/(){}\[\]\|@,;]')
    good_symbols_re = re.compile('[^0-9a-z #+_]')

    text = text.lower()
    text = replace_by_space_re.sub(' ', text)
    text = good_symbols_re.sub('', text)

    return text.strip()

In [18]:
import tensorflow as tf


In [19]:

class Seq2SeqModel(object):

    def __init__(self, vocab_size, embeddings_size, hidden_size,
                 max_iter, start_symbol_id, end_symbol_id, padding_symbol_id):


        self.declare_placeholders()
        self.create_embeddings(vocab_size, embeddings_size)
        self.build_encoder(hidden_size)
        self.build_decoder(hidden_size, vocab_size, max_iter, start_symbol_id, end_symbol_id)

        # Compute loss and back-propagate.
        self.compute_loss()
        self.perform_optimization()

        # Get predictions for evaluation.
        self.train_predictions = self.train_outputs.sample_id
        self.infer_predictions = self.infer_outputs.sample_id

    def declare_placeholders(self):
        """Specifies placeholders for the model."""

        # Placeholders for input and its actual lengths.
        self.input_batch = tf.placeholder(shape=(None, None), dtype=tf.int32, name='input_batch')
        self.input_batch_lengths = tf.placeholder(shape=(None,), dtype=tf.int32, name='input_batch_lengths')

        # Placeholders for groundtruth and its actual lengths.
        self.ground_truth = tf.placeholder(shape=(None, None), dtype=tf.int32, name='ground_thruth')
        self.ground_truth_lengths = tf.placeholder(shape=(None,), dtype=tf.int32, name='ground_thrugth_length')

        self.dropout_ph = tf.placeholder_with_default(tf.cast(1.0, tf.float32), shape=[])
        self.learning_rate_ph = tf.placeholder_with_default(tf.cast(0.001, tf.float32), shape=[])

    def create_embeddings(self, vocab_size, embeddings_size):
        """Specifies embeddings layer and embeds an input batch."""

        random_initializer = tf.random_uniform((vocab_size, embeddings_size), -1.0, 1.0)
        self.embeddings = tf.Variable(initial_value=random_initializer, dtype=tf.float32, name='embeding_matrix')

        # Perform embeddings lookup for self.input_batch.
        self.input_batch_embedded = tf.nn.embedding_lookup(self.embeddings, self.input_batch)

    def build_encoder(self, hidden_size):
        """Specifies encoder architecture and computes its output."""

        # Create GRUCell with dropout.
        encoder_cell = tf.contrib.rnn.GRUCell(num_units=hidden_size)
        encoder_cell_dropout = tf.contrib.rnn.DropoutWrapper(encoder_cell, input_keep_prob=self.dropout_ph)

        # Create RNN with the predefined cell.
        _, self.final_encoder_state = tf.nn.dynamic_rnn(cell=encoder_cell_dropout,
                                                        inputs=self.input_batch_embedded,
                                                        sequence_length=self.input_batch_lengths,
                                                        dtype=tf.float32)

    def build_decoder(self, hidden_size, vocab_size, max_iter, start_symbol_id, end_symbol_id):
        """Specifies decoder architecture and computes the output.

            Uses different helpers:
              - for train: feeding ground truth
              - for inference: feeding generated output

            As a result, self.train_outputs and self.infer_outputs are created.
            Each of them contains two fields:
              rnn_output (predicted logits)
              sample_id (predictions).

        """

        # Use start symbols as the decoder inputs at the first time step.
        batch_size = tf.shape(self.input_batch)[0]
        start_tokens = tf.fill([batch_size], start_symbol_id)
        ground_truth_as_input = tf.concat([tf.expand_dims(start_tokens, 1), self.ground_truth], 1)

        # Use the embedding layer defined before to lookup embedings for ground_truth_as_input.
        self.ground_truth_embedded = tf.nn.embedding_lookup(self.embeddings, ground_truth_as_input)

        # Create TrainingHelper for the train stage.
        train_helper = tf.contrib.seq2seq.TrainingHelper(self.ground_truth_embedded,
                                                         self.ground_truth_lengths)

        # Create GreedyEmbeddingHelper for the inference stage.
        # You should provide the embedding layer, start_tokens and index of the end symbol.
        infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(self.embeddings, start_tokens, end_symbol_id)

        def decode(helper, scope, reuse=None):
            """Creates decoder and return the results of the decoding with a given helper."""

            with tf.variable_scope(scope, reuse=reuse):
                # Create GRUCell with dropout. Do not forget to set the reuse flag properly.
                decoder_cell = tf.contrib.rnn.GRUCell(num_units=hidden_size, reuse=reuse)
                decoder_cell_dropout = tf.contrib.rnn.DropoutWrapper(decoder_cell, input_keep_prob=self.dropout_ph)

                # Create a projection wrapper.
                decoder_cell = tf.contrib.rnn.OutputProjectionWrapper(decoder_cell_dropout, vocab_size, reuse=reuse)

                # Create BasicDecoder, pass the defined cell, a helper, and initial state.
                # The initial state should be equal to the final state of the encoder!
                decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell, helper=helper,
                                                          initial_state=self.final_encoder_state)

                # The first returning argument of dynamic_decode contains two fields:
                #   rnn_output (predicted logits)
                #   sample_id (predictions)
                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder, maximum_iterations=max_iter,
                                                                  output_time_major=False, impute_finished=True)

                return outputs

        self.train_outputs = decode(train_helper, 'decode')
        self.infer_outputs = decode(infer_helper, 'decode', reuse=True)

    def compute_loss(self):
        """Computes sequence loss (masked cross-entopy loss with logits)."""

        weights = tf.cast(tf.sequence_mask(self.ground_truth_lengths), dtype=tf.float32)

        self.loss = tf.contrib.seq2seq.sequence_loss(
            logits=self.train_outputs.rnn_output,
            targets=self.ground_truth,
            weights=weights)

    def perform_optimization(self):
        """Specifies train_op that optimizes self.loss."""

        self.train_op = tf.contrib.layers.optimize_loss(
            loss=self.loss,
            global_step=tf.train.get_global_step(),
            learning_rate=self.learning_rate_ph,
            optimizer='Adam',
            clip_gradients=1.0
        )

    def get_response(self, session, input_sentence):
        sentence = text_prepare(input_sentence)
        X = []
        row = []
        for word in sentence:
            if word in word2id:
                row.append(word2id[word])
            else:
                row.append(start_symbol_id)
        X.append(row)
        X = np.array(X)

        feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: np.array([len(input_sentence)]),
        }
        predictions = session.run([self.infer_predictions], feed_dict=feed_dict)
        return " ".join([id2word[index] for index in predictions[0][0][:-1]])


    def train_on_batch(self, session, X, X_seq_len, Y, Y_seq_len, learning_rate, dropout_keep_probability):
        feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: X_seq_len,
            self.ground_truth: Y,
            self.ground_truth_lengths: Y_seq_len,
            self.learning_rate_ph: learning_rate,
            self.dropout_ph: dropout_keep_probability
        }
        pred, loss, _ = session.run([
            self.train_predictions,
            self.loss,
            self.train_op], feed_dict=feed_dict)
        return pred, loss

    def predict_for_batch(self, session, X, X_seq_len):
        feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: X_seq_len
        }
        pred = session.run([
            self.infer_predictions
        ], feed_dict=feed_dict)[0]
        return pred

    def predict_for_batch_with_loss(self, session, X, X_seq_len, Y, Y_seq_len):
        feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: X_seq_len,
            self.ground_truth: Y,
            self.ground_truth_lengths: Y_seq_len
        }
        pred, loss = session.run([
            self.infer_predictions,
            self.loss,
        ], feed_dict=feed_dict)
        return pred, loss


In [20]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(data_clean, test_size=0.2, random_state=42)

In [21]:
import pickle
pickle.dump(word2id, open( "word2id.p", "wb" ) )
pickle.dump(id2word, open( "id2word.p", "wb" ) )


In [22]:
tf.reset_default_graph()
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))


model = Seq2SeqModel(vocab_size = len(word2id),
                    embeddings_size = 300,
                    hidden_size = 128,
                    max_iter = 20,
                    start_symbol_id=word2id['[^]'],
                    end_symbol_id=word2id['[$]'],
                    padding_symbol_id=word2id['[#]'])

batch_size = 8
n_epochs = 900
learning_rate = 0.001
dropout_keep_probability = 0.4
max_len = 20

n_step = int(len(train_set) / batch_size)

Instructions for updating:
Use the retry module or similar alternatives.


In [None]:
import random 
session.run(tf.global_variables_initializer())
            
invalid_number_prediction_counts = []
all_model_predictions = []
all_ground_truth = []

saver = tf.train.Saver(max_to_keep=1000)
print('Start training... \n')
for epoch in range(n_epochs):  
    random.shuffle(train_set)
    random.shuffle(test_set)
    
    print('Train: epoch', epoch + 1)
    for n_iter, (X_batch, Y_batch) in enumerate(generate_batches(train_set, batch_size=batch_size)):

        X_batch_ids, X_batch_ids_len = batch_to_ids(X_batch, word2id, max_len)
        Y_batch_ids, Y_batch_ids_len = batch_to_ids(Y_batch, word2id, max_len)

        predictions, loss = model.train_on_batch(session, 
                                                 X_batch_ids,
                                                 X_batch_ids_len, 
                                                 Y_batch_ids, 
                                                 Y_batch_ids_len, 
                                                 learning_rate, 
                                                 dropout_keep_probability)
    
    
        if n_iter % 200 == 0:
            print("Epoch: [%d/%d], step: [%d/%d], loss: %f" % (epoch + 1, n_epochs, n_iter + 1, n_step, loss))
                
    X_sent, Y_sent = next(generate_batches(test_set, batch_size=batch_size))


    X_test_batch_ids, X_test_batch_ids_len = batch_to_ids(X_sent, word2id, max_len)
    Y_test_batch_ids, Y_test_batch_ids_len = batch_to_ids(Y_sent, word2id, max_len)


    predictions, loss = model.predict_for_batch_with_loss(session=session, 
                                                          X=X_test_batch_ids, 
                                                          X_seq_len=X_test_batch_ids_len,
                                                          Y=Y_test_batch_ids, 
                                                          Y_seq_len=Y_test_batch_ids_len)
    print('Test: epoch', epoch + 1, 'loss:', loss,)
    saver.save(session, 'checkpoints/model_four_'+str(epoch))


#         print('2.')

Start training... 

Train: epoch 1
Epoch: [1/900], step: [1/2479], loss: 9.089571
Epoch: [1/900], step: [201/2479], loss: 5.162564
Epoch: [1/900], step: [401/2479], loss: 4.956809
Epoch: [1/900], step: [601/2479], loss: 4.736184
Epoch: [1/900], step: [801/2479], loss: 3.839516
Epoch: [1/900], step: [1001/2479], loss: 4.797082
Epoch: [1/900], step: [1201/2479], loss: 5.164343
Epoch: [1/900], step: [1401/2479], loss: 4.954337
Epoch: [1/900], step: [1601/2479], loss: 5.012486
Epoch: [1/900], step: [1801/2479], loss: 4.893082
Epoch: [1/900], step: [2001/2479], loss: 4.135650
Epoch: [1/900], step: [2201/2479], loss: 4.223727
Epoch: [1/900], step: [2401/2479], loss: 4.644779
Test: epoch 1 loss: 5.068281
Train: epoch 2
Epoch: [2/900], step: [1/2479], loss: 4.712706
Epoch: [2/900], step: [201/2479], loss: 4.148042
Epoch: [2/900], step: [401/2479], loss: 3.550639
Epoch: [2/900], step: [601/2479], loss: 4.268332
Epoch: [2/900], step: [801/2479], loss: 4.344828
Epoch: [2/900], step: [1001/2479], 

Epoch: [12/900], step: [2201/2479], loss: 2.970464
Epoch: [12/900], step: [2401/2479], loss: 2.892827
Test: epoch 12 loss: 4.604541
Train: epoch 13
Epoch: [13/900], step: [1/2479], loss: 2.784136
Epoch: [13/900], step: [201/2479], loss: 2.925337
Epoch: [13/900], step: [401/2479], loss: 3.364030
Epoch: [13/900], step: [601/2479], loss: 3.238554
Epoch: [13/900], step: [801/2479], loss: 3.117167
Epoch: [13/900], step: [1001/2479], loss: 3.014548
Epoch: [13/900], step: [1201/2479], loss: 2.995309
Epoch: [13/900], step: [1401/2479], loss: 2.906691
Epoch: [13/900], step: [1601/2479], loss: 3.133435
Epoch: [13/900], step: [1801/2479], loss: 2.423719
Epoch: [13/900], step: [2001/2479], loss: 3.782163
Epoch: [13/900], step: [2201/2479], loss: 3.088079
Epoch: [13/900], step: [2401/2479], loss: 3.938178
Test: epoch 13 loss: 3.7842705
Train: epoch 14
Epoch: [14/900], step: [1/2479], loss: 2.144584
Epoch: [14/900], step: [201/2479], loss: 2.947818
Epoch: [14/900], step: [401/2479], loss: 2.098896
E

Epoch: [24/900], step: [1201/2479], loss: 2.784442
Epoch: [24/900], step: [1401/2479], loss: 2.559507
Epoch: [24/900], step: [1601/2479], loss: 2.117641
Epoch: [24/900], step: [1801/2479], loss: 2.651484
Epoch: [24/900], step: [2001/2479], loss: 3.064284
Epoch: [24/900], step: [2201/2479], loss: 2.617297
Epoch: [24/900], step: [2401/2479], loss: 2.187746
Test: epoch 24 loss: 5.5017786
Train: epoch 25
Epoch: [25/900], step: [1/2479], loss: 2.406727
Epoch: [25/900], step: [201/2479], loss: 2.497492
Epoch: [25/900], step: [401/2479], loss: 2.645295
Epoch: [25/900], step: [601/2479], loss: 2.898926
Epoch: [25/900], step: [801/2479], loss: 2.396592
Epoch: [25/900], step: [1001/2479], loss: 2.448204
Epoch: [25/900], step: [1201/2479], loss: 2.913520
Epoch: [25/900], step: [1401/2479], loss: 2.932544
Epoch: [25/900], step: [1601/2479], loss: 2.948591
Epoch: [25/900], step: [1801/2479], loss: 2.311488
Epoch: [25/900], step: [2001/2479], loss: 3.143854
Epoch: [25/900], step: [2201/2479], loss: 

Epoch: [36/900], step: [201/2479], loss: 2.361049
Epoch: [36/900], step: [401/2479], loss: 2.513638
Epoch: [36/900], step: [601/2479], loss: 2.408422
Epoch: [36/900], step: [801/2479], loss: 2.667599
Epoch: [36/900], step: [1001/2479], loss: 2.775263
Epoch: [36/900], step: [1201/2479], loss: 1.715851
Epoch: [36/900], step: [1401/2479], loss: 2.450883
Epoch: [36/900], step: [1601/2479], loss: 2.009586
Epoch: [36/900], step: [1801/2479], loss: 2.314884
Epoch: [36/900], step: [2001/2479], loss: 1.935795
Epoch: [36/900], step: [2201/2479], loss: 2.645828
Epoch: [36/900], step: [2401/2479], loss: 2.277745
Test: epoch 36 loss: 8.572964
Train: epoch 37
Epoch: [37/900], step: [1/2479], loss: 1.692852
Epoch: [37/900], step: [201/2479], loss: 1.387664
Epoch: [37/900], step: [401/2479], loss: 1.999169
Epoch: [37/900], step: [601/2479], loss: 2.513089
Epoch: [37/900], step: [801/2479], loss: 2.085521
Epoch: [37/900], step: [1001/2479], loss: 3.090790
Epoch: [37/900], step: [1201/2479], loss: 2.632

Epoch: [47/900], step: [2001/2479], loss: 2.881992
Epoch: [47/900], step: [2201/2479], loss: 2.160333
Epoch: [47/900], step: [2401/2479], loss: 2.221586
Test: epoch 47 loss: 4.334727
Train: epoch 48
Epoch: [48/900], step: [1/2479], loss: 1.564216
Epoch: [48/900], step: [201/2479], loss: 1.802980
Epoch: [48/900], step: [401/2479], loss: 1.822131
Epoch: [48/900], step: [601/2479], loss: 2.479000
Epoch: [48/900], step: [801/2479], loss: 2.135602
Epoch: [48/900], step: [1001/2479], loss: 2.000051
Epoch: [48/900], step: [1201/2479], loss: 2.378141
Epoch: [48/900], step: [1401/2479], loss: 1.984845
Epoch: [48/900], step: [1601/2479], loss: 1.905633
Epoch: [48/900], step: [1801/2479], loss: 1.950553
Epoch: [48/900], step: [2001/2479], loss: 2.899110
Epoch: [48/900], step: [2201/2479], loss: 2.488944
Epoch: [48/900], step: [2401/2479], loss: 1.911402
Test: epoch 48 loss: 4.9420266
Train: epoch 49
Epoch: [49/900], step: [1/2479], loss: 1.928851
Epoch: [49/900], step: [201/2479], loss: 2.064047


Epoch: [59/900], step: [1001/2479], loss: 1.956298
Epoch: [59/900], step: [1201/2479], loss: 2.143432
Epoch: [59/900], step: [1401/2479], loss: 1.924211
Epoch: [59/900], step: [1601/2479], loss: 1.314072
Epoch: [59/900], step: [1801/2479], loss: 1.959152
Epoch: [59/900], step: [2001/2479], loss: 1.802448
Epoch: [59/900], step: [2201/2479], loss: 2.081960
Epoch: [59/900], step: [2401/2479], loss: 2.212619
Test: epoch 59 loss: 7.635249
Train: epoch 60
Epoch: [60/900], step: [1/2479], loss: 1.775971
Epoch: [60/900], step: [201/2479], loss: 2.172951
Epoch: [60/900], step: [401/2479], loss: 2.067847
Epoch: [60/900], step: [601/2479], loss: 1.903731
Epoch: [60/900], step: [801/2479], loss: 2.109278
Epoch: [60/900], step: [1001/2479], loss: 2.424789
Epoch: [60/900], step: [1201/2479], loss: 1.973078
Epoch: [60/900], step: [1401/2479], loss: 2.573982
Epoch: [60/900], step: [1601/2479], loss: 2.405604
Epoch: [60/900], step: [1801/2479], loss: 1.493515
Epoch: [60/900], step: [2001/2479], loss: 1

Epoch: [71/900], step: [1/2479], loss: 1.938320
Epoch: [71/900], step: [201/2479], loss: 1.854643
Epoch: [71/900], step: [401/2479], loss: 1.649571
Epoch: [71/900], step: [601/2479], loss: 3.208989
Epoch: [71/900], step: [801/2479], loss: 1.987198
Epoch: [71/900], step: [1001/2479], loss: 2.413112
Epoch: [71/900], step: [1201/2479], loss: 1.606021
Epoch: [71/900], step: [1401/2479], loss: 2.503923
Epoch: [71/900], step: [1601/2479], loss: 2.038549
Epoch: [71/900], step: [1801/2479], loss: 2.217619
Epoch: [71/900], step: [2001/2479], loss: 2.201038
Epoch: [71/900], step: [2201/2479], loss: 1.916034
Epoch: [71/900], step: [2401/2479], loss: 1.745366
Test: epoch 71 loss: 5.887672
Train: epoch 72
Epoch: [72/900], step: [1/2479], loss: 1.742571
Epoch: [72/900], step: [201/2479], loss: 1.908734
Epoch: [72/900], step: [401/2479], loss: 1.960262
Epoch: [72/900], step: [601/2479], loss: 1.717284
Epoch: [72/900], step: [801/2479], loss: 1.646533
Epoch: [72/900], step: [1001/2479], loss: 1.795344

Epoch: [82/900], step: [1801/2479], loss: 1.585885
Epoch: [82/900], step: [2001/2479], loss: 1.466191
Epoch: [82/900], step: [2201/2479], loss: 1.700557
Epoch: [82/900], step: [2401/2479], loss: 2.240819
Test: epoch 82 loss: 5.9050183
Train: epoch 83
Epoch: [83/900], step: [1/2479], loss: 2.631446
Epoch: [83/900], step: [201/2479], loss: 1.251616
Epoch: [83/900], step: [401/2479], loss: 1.473358
Epoch: [83/900], step: [601/2479], loss: 1.494940
Epoch: [83/900], step: [801/2479], loss: 1.735320
Epoch: [83/900], step: [1001/2479], loss: 2.106371
Epoch: [83/900], step: [1201/2479], loss: 1.443219
Epoch: [83/900], step: [1401/2479], loss: 1.274338
Epoch: [83/900], step: [1601/2479], loss: 2.024905
Epoch: [83/900], step: [1801/2479], loss: 1.902372
Epoch: [83/900], step: [2001/2479], loss: 1.457568
Epoch: [83/900], step: [2201/2479], loss: 1.927399
Epoch: [83/900], step: [2401/2479], loss: 2.013811
Test: epoch 83 loss: 5.502715
Train: epoch 84
Epoch: [84/900], step: [1/2479], loss: 1.759981

Epoch: [94/900], step: [801/2479], loss: 2.394591
Epoch: [94/900], step: [1001/2479], loss: 2.156112
Epoch: [94/900], step: [1201/2479], loss: 2.102726
Epoch: [94/900], step: [1401/2479], loss: 2.430955
Epoch: [94/900], step: [1601/2479], loss: 2.045424
Epoch: [94/900], step: [1801/2479], loss: 2.265533
Epoch: [94/900], step: [2001/2479], loss: 2.345140
Epoch: [94/900], step: [2201/2479], loss: 1.973570
Epoch: [94/900], step: [2401/2479], loss: 2.319056
Test: epoch 94 loss: 6.096442
Train: epoch 95
Epoch: [95/900], step: [1/2479], loss: 1.699121
Epoch: [95/900], step: [201/2479], loss: 1.672376
Epoch: [95/900], step: [401/2479], loss: 2.113259
Epoch: [95/900], step: [601/2479], loss: 2.036323
Epoch: [95/900], step: [801/2479], loss: 1.555509
Epoch: [95/900], step: [1001/2479], loss: 1.652851
Epoch: [95/900], step: [1201/2479], loss: 2.241258
Epoch: [95/900], step: [1401/2479], loss: 2.380244
Epoch: [95/900], step: [1601/2479], loss: 1.878907
Epoch: [95/900], step: [1801/2479], loss: 1.

Epoch: [105/900], step: [2201/2479], loss: 1.369045
Epoch: [105/900], step: [2401/2479], loss: 1.864955
Test: epoch 105 loss: 6.347972
Train: epoch 106
Epoch: [106/900], step: [1/2479], loss: 2.275174
Epoch: [106/900], step: [201/2479], loss: 1.804294
Epoch: [106/900], step: [401/2479], loss: 2.060491
Epoch: [106/900], step: [601/2479], loss: 1.959521
Epoch: [106/900], step: [801/2479], loss: 1.738414
Epoch: [106/900], step: [1001/2479], loss: 1.956438
Epoch: [106/900], step: [1201/2479], loss: 1.335440
Epoch: [106/900], step: [1401/2479], loss: 1.327876
Epoch: [106/900], step: [1601/2479], loss: 1.369149
Epoch: [106/900], step: [1801/2479], loss: 1.088819
Epoch: [106/900], step: [2001/2479], loss: 1.769154
Epoch: [106/900], step: [2201/2479], loss: 2.256861
Epoch: [106/900], step: [2401/2479], loss: 2.288472
Test: epoch 106 loss: 6.2445993
Train: epoch 107
Epoch: [107/900], step: [1/2479], loss: 1.673892
Epoch: [107/900], step: [201/2479], loss: 2.046188
Epoch: [107/900], step: [401/2

Epoch: [117/900], step: [401/2479], loss: 2.242872
Epoch: [117/900], step: [601/2479], loss: 1.932852
Epoch: [117/900], step: [801/2479], loss: 1.622887
Epoch: [117/900], step: [1001/2479], loss: 1.906115
Epoch: [117/900], step: [1201/2479], loss: 1.127891
Epoch: [117/900], step: [1401/2479], loss: 1.297666
Epoch: [117/900], step: [1601/2479], loss: 1.678508
Epoch: [117/900], step: [1801/2479], loss: 1.881782
Epoch: [117/900], step: [2001/2479], loss: 2.012921
Epoch: [117/900], step: [2201/2479], loss: 2.001248
Epoch: [117/900], step: [2401/2479], loss: 1.957415
Test: epoch 117 loss: 7.4489822
Train: epoch 118
Epoch: [118/900], step: [1/2479], loss: 2.143893
Epoch: [118/900], step: [201/2479], loss: 2.846869
Epoch: [118/900], step: [401/2479], loss: 1.936080
Epoch: [118/900], step: [601/2479], loss: 1.874600
Epoch: [118/900], step: [801/2479], loss: 2.392325
Epoch: [118/900], step: [1001/2479], loss: 2.198176
Epoch: [118/900], step: [1201/2479], loss: 2.661246
Epoch: [118/900], step: [

Epoch: [128/900], step: [1401/2479], loss: 1.323053
Epoch: [128/900], step: [1601/2479], loss: 1.842431
Epoch: [128/900], step: [1801/2479], loss: 1.776824
Epoch: [128/900], step: [2001/2479], loss: 2.056471
Epoch: [128/900], step: [2201/2479], loss: 1.718626
Epoch: [128/900], step: [2401/2479], loss: 1.220021
Test: epoch 128 loss: 5.0458107
Train: epoch 129
Epoch: [129/900], step: [1/2479], loss: 1.821203
Epoch: [129/900], step: [201/2479], loss: 2.050339
Epoch: [129/900], step: [401/2479], loss: 2.037117
Epoch: [129/900], step: [601/2479], loss: 2.025604
Epoch: [129/900], step: [801/2479], loss: 1.759601
Epoch: [129/900], step: [1001/2479], loss: 1.521868
Epoch: [129/900], step: [1201/2479], loss: 1.905104
Epoch: [129/900], step: [1401/2479], loss: 1.501236
Epoch: [129/900], step: [1601/2479], loss: 1.662072
Epoch: [129/900], step: [1801/2479], loss: 1.796186
Epoch: [129/900], step: [2001/2479], loss: 1.888135
Epoch: [129/900], step: [2201/2479], loss: 2.007354
Epoch: [129/900], step

Epoch: [139/900], step: [2401/2479], loss: 2.412434
Test: epoch 139 loss: 5.0466104
Train: epoch 140
Epoch: [140/900], step: [1/2479], loss: 2.515866
Epoch: [140/900], step: [201/2479], loss: 2.016138
Epoch: [140/900], step: [401/2479], loss: 1.433321
Epoch: [140/900], step: [601/2479], loss: 2.000807
Epoch: [140/900], step: [801/2479], loss: 1.944053
Epoch: [140/900], step: [1001/2479], loss: 1.823358
Epoch: [140/900], step: [1201/2479], loss: 1.384757
Epoch: [140/900], step: [1401/2479], loss: 1.317672
Epoch: [140/900], step: [1601/2479], loss: 1.900336
Epoch: [140/900], step: [1801/2479], loss: 1.685714
Epoch: [140/900], step: [2001/2479], loss: 1.783702
Epoch: [140/900], step: [2201/2479], loss: 1.640712
Epoch: [140/900], step: [2401/2479], loss: 2.253725
Test: epoch 140 loss: 6.448006
Train: epoch 141
Epoch: [141/900], step: [1/2479], loss: 1.769103
Epoch: [141/900], step: [201/2479], loss: 2.487294
Epoch: [141/900], step: [401/2479], loss: 1.849964
Epoch: [141/900], step: [601/24

Epoch: [151/900], step: [601/2479], loss: 2.199646
Epoch: [151/900], step: [801/2479], loss: 2.370520
Epoch: [151/900], step: [1001/2479], loss: 2.001425
Epoch: [151/900], step: [1201/2479], loss: 1.485856
Epoch: [151/900], step: [1401/2479], loss: 1.739993
Epoch: [151/900], step: [1601/2479], loss: 1.623869
Epoch: [151/900], step: [1801/2479], loss: 1.515748
Epoch: [151/900], step: [2001/2479], loss: 2.318938
Epoch: [151/900], step: [2201/2479], loss: 1.732295
Epoch: [151/900], step: [2401/2479], loss: 1.647642
Test: epoch 151 loss: 4.960156
Train: epoch 152
Epoch: [152/900], step: [1/2479], loss: 1.560990
Epoch: [152/900], step: [201/2479], loss: 1.512743
Epoch: [152/900], step: [401/2479], loss: 1.891825
Epoch: [152/900], step: [601/2479], loss: 2.058457
Epoch: [152/900], step: [801/2479], loss: 1.247728
Epoch: [152/900], step: [1001/2479], loss: 2.000068
Epoch: [152/900], step: [1201/2479], loss: 1.674198
Epoch: [152/900], step: [1401/2479], loss: 2.165054
Epoch: [152/900], step: [

Epoch: [162/900], step: [1601/2479], loss: 1.414323
Epoch: [162/900], step: [1801/2479], loss: 2.625894
Epoch: [162/900], step: [2001/2479], loss: 1.997676
Epoch: [162/900], step: [2201/2479], loss: 1.950178
Epoch: [162/900], step: [2401/2479], loss: 1.826089
Test: epoch 162 loss: 5.183047
Train: epoch 163
Epoch: [163/900], step: [1/2479], loss: 1.990616
Epoch: [163/900], step: [201/2479], loss: 2.155948
Epoch: [163/900], step: [401/2479], loss: 1.678336
Epoch: [163/900], step: [601/2479], loss: 2.034271
Epoch: [163/900], step: [801/2479], loss: 1.369135
Epoch: [163/900], step: [1001/2479], loss: 2.274663
Epoch: [163/900], step: [1201/2479], loss: 1.605816
Epoch: [163/900], step: [1401/2479], loss: 1.713410
Epoch: [163/900], step: [1601/2479], loss: 2.113104
Epoch: [163/900], step: [1801/2479], loss: 1.953470
Epoch: [163/900], step: [2001/2479], loss: 2.122232
Epoch: [163/900], step: [2201/2479], loss: 1.590900
Epoch: [163/900], step: [2401/2479], loss: 1.687369
Test: epoch 163 loss: 4

Train: epoch 174
Epoch: [174/900], step: [1/2479], loss: 1.336418
Epoch: [174/900], step: [201/2479], loss: 1.776424
Epoch: [174/900], step: [401/2479], loss: 1.732653
Epoch: [174/900], step: [601/2479], loss: 2.205593
Epoch: [174/900], step: [801/2479], loss: 1.403462
Epoch: [174/900], step: [1001/2479], loss: 1.905711
Epoch: [174/900], step: [1201/2479], loss: 1.750279
Epoch: [174/900], step: [1401/2479], loss: 1.758340
Epoch: [174/900], step: [1601/2479], loss: 1.778705
Epoch: [174/900], step: [1801/2479], loss: 1.698875
Epoch: [174/900], step: [2001/2479], loss: 1.818724
Epoch: [174/900], step: [2201/2479], loss: 1.319787
Epoch: [174/900], step: [2401/2479], loss: 1.975859
Test: epoch 174 loss: 4.9384
Train: epoch 175
Epoch: [175/900], step: [1/2479], loss: 1.343374
Epoch: [175/900], step: [201/2479], loss: 1.497141
Epoch: [175/900], step: [401/2479], loss: 1.755767
Epoch: [175/900], step: [601/2479], loss: 1.555662
Epoch: [175/900], step: [801/2479], loss: 1.602208
Epoch: [175/900

In [None]:
response = model.get_response(session, "what's up?")
print(response)# response = model.get_reply(session, "Will it ran tomorrow?")
# print(response)

In [None]:
response = model.get_reply(session, "do you love to dance")
print(response)