In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import pandas as pd
from fastText import load_model
from matplotlib import pylab
from sklearn.manifold import TSNE
from tensorboardX import SummaryWriter

writer = SummaryWriter()

In [2]:
# Fasttext embeddings trained on train and val sets
# ./fasttext skipgram -input input_text_file -output output_model -dim 128 (fastText-0.1.0)
fasttext_model = load_model('word_vectors/fasttext_model.bin')
num_dims = 128

In [3]:
# vocab contains frequent words apperaing in the text along with their frequencies
# minimum frequency = 6
vocab_file = open('finished_files/vocab')
# Store appearing words
vocab_words = {}
for line in vocab_file:
    li = line.split()
    if len(li) == 2:
        word, freq = li
        vocab_words[word] = freq
# Final word to id dictionary    
word2id = {}
tokens = ['<pad>', '<unk>', '<sos>', '<eos>']
for token in tokens:
    word2id[token] = len(word2id)
# Retrieve words from fasttext model and keep only those which are also present in 'vocab'
fasttext_words = fasttext_model.get_words()
for word in fasttext_words:
    if word in vocab_words:
        word2id[word] = len(word2id)        
vocab_size = len(word2id)
# Reverse dictionary
id2word = dict(zip(word2id.values(), word2id.keys()))
# Embeddings
embeddings = np.zeros((vocab_size, num_dims))
# <pad> token vector contains all zeros. Rest sampled from a normal distribution
mu, sigma = 0, 0.05
for i in range(1, len(tokens)):
    embeddings[i] = np.random.normal(mu, sigma, num_dims)
# Get word vectors from fasttext model and store in embeddings matrix
for i in range(len(tokens), vocab_size):
    embeddings[i] = fasttext_model.get_word_vector(id2word[i])
    
del fasttext_model, vocab_words

In [4]:
temp = {}
for i in range(10000):
    temp[i] = id2word[i]
id2word = temp
embeddings = embeddings[:10000]
word2id = dict(zip(id2word.values(), id2word.keys()))

vocab_size = len(word2id)

num_points = 500

tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact')
two_d_embeddings = tsne.fit_transform(embeddings[1:num_points+1, :])

def plot(embeddings, labels):
    assert embeddings.shape[0] >= len(labels), 'More labels than embeddings'
    pylab.figure(figsize=(15,15))  # in inches
    for i, label in enumerate(labels):
        x, y = embeddings[i,:]
        pylab.scatter(x, y)
        pylab.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')
    pylab.show()

words = [id2word[i] for i in range(1, num_points+1)]
plot(two_d_embeddings, words)

In [5]:
batch_size = 64
max_article_size = 50 #400
max_abstract_size = 15 #100
hidden_size = 512

In [6]:
class Batch:
    def __init__(self):
        self.abstract = (None, None)
        self.article = (None, None)

In [7]:
class BatchGenerator: 
    
    def __init__(self, batch_size, dataframe):
        self.batch_size = batch_size
        # train, valid, or test dataframe imported from csv
        self.df = dataframe
        self.generator = self.row_generator()
        
        
    def row_generator(self):
        for row in self.df.itertuples(index=False):
            yield row
            
    def build_batch(self, rows):
        # If number of rows less than batch size, get extra rows from the beginning of the dataframe
        if len(rows) < self.batch_size:
            temp_generator = self.row_generator()
            for i in range(self.batch_size - len(rows)):
                rows.append(self.get_row(temp_generator))
                
        # Get lengths of all the sequences in the batch upto max number of tokens
        # + 1 is for the <eos> token
        abstract_lengths = torch.cuda.LongTensor(
            [len(row.abstract.split()[:max_abstract_size]) for row in rows]) + 1
        article_lengths = torch.cuda.LongTensor(
            [len(row.article.split()[:max_article_size]) for row in rows]) + 1 
        abs_len = torch.max(abstract_lengths)
        art_len = torch.max(article_lengths) 
        
        # Variables containing abstracts and articles of the batch
        abstracts = torch.cuda.LongTensor(abs_len, self.batch_size).fill_(0) # zero padding
        articles = torch.cuda.LongTensor(art_len, self.batch_size).fill_(0) # zero padding
        
        # Sort rows in descending order of sequence (article) lengths
        article_lengths, indices = torch.sort(article_lengths, descending=True)
        rows = [rows[i] for i in indices]
        abstract_lengths = torch.cuda.LongTensor([abstract_lengths[i] for i in indices])
        
        for i in range(self.batch_size):
            # Tokenize abstract and take max_abstract_size number of tokens
            tokens = rows[i].abstract.split()[:max_abstract_size]
            tokens.append('<eos>')
            # Convert each token to word index
            # <unk> token index for unknown words
            token_list = torch.LongTensor([word2id[token] if token in word2id 
                                           else word2id['<unk>'] for token in tokens])
            # Store as column in abstracts variable with zero padding
            abstracts[:,i][:len(token_list)] = token_list
            
            # Same for articles
            tokens = rows[i].article.split()[:max_article_size]
            tokens.append('<eos>')
            token_list = torch.LongTensor([word2id[token] if token in word2id 
                                           else word2id['<unk>'] for token in tokens])
            articles[:,i][:len(token_list)] = token_list
            
        batch = Batch()
        batch.article = (Variable(articles), article_lengths)
        batch.abstract = (Variable(abstracts), abstract_lengths)
        return batch
            
    def get_row(self, generator):
        row = generator.__next__()
        while not isinstance(row.article, str):
            row = generator.__next__()
        return row
        
        
    def get_batch(self):
        rows = []
        for b in range(self.batch_size):
            try: rows.append(self.get_row(self.generator))
            except StopIteration: break
        if rows: return self.build_batch(rows)
        else: raise StopIteration

In [8]:
class Encoder(nn.Module):
    
    def __init__(self, batch_size):
        super(Encoder, self).__init__()
        self.batch_size = batch_size
        
        # Hidden layer and cell state of model
        # Initialize before calling model
        self.hidden = None
        
        # Lookup table that stores word embeddings
        self.embed = nn.Embedding(vocab_size, num_dims).cuda()
        self.embed.weight.data.copy_(torch.from_numpy(embeddings))
        self.embed.weight.requires_grad = False
        
        # Pytorch lstm module
        self.lstm = nn.LSTM(num_dims, hidden_size, num_layers=1, bidirectional=True)
        self.lstm.cuda()
        
        # Linear transformation 
        self.linear_transform = nn.Linear(hidden_size, vocab_size)
    
    # Funtion to initialize hidden layers
    def init_hidden(self, batch_size, volatile=False):
        tensor1 = torch.cuda.FloatTensor(1 * 2, batch_size, hidden_size).fill_(0)
        tensor2 = torch.cuda.FloatTensor(1 * 2, batch_size, hidden_size).fill_(0)
        return (Variable(tensor1, volatile=volatile), Variable(tensor2, volatile=volatile))
    
    def forward(self, articles, article_lengths):
        # Embedding lookup
        input = self.embed(articles)
        # input to pack_padded_sequence can be of Txbx*
        # where T is the length of longest sequence
        # b is batch size
        # batch is sorted in descending order of sequence lengths
        #packed_input = pack_padded_sequence(input, list(article_lengths))
        #packed_output, self.hidden = self.lstm(packed_input, self.hidden)
        _, self.hidden = self.lstm(input, self.hidden)
        
        #output = self.linear_transform(self.hidden[0][hidden_layers - 1])
        hidden = ((self.hidden[0][0] + self.hidden[0][1]), (self.hidden[1][0] + self.hidden[1][1]))
        output = self.linear_transform(hidden[0])
        
        # Final hidden state
        return hidden, output
    
encoder = Encoder(batch_size)
encoder.cuda()

Encoder(
  (embed): Embedding(10000, 128)
  (lstm): LSTM(128, 512, bidirectional=True)
  (linear_transform): Linear(in_features=512, out_features=10000, bias=True)
)

In [9]:
class Decoder(nn.Module):
    
    def __init__(self):
        super(Decoder, self).__init__()
        
        # Lookup table that stores word embeddings
        self.embed = nn.Embedding(vocab_size, num_dims).cuda()
        self.embed.weight.data.copy_(torch.from_numpy(embeddings))
        self.embed.weight.requires_grad = False
    
        self.hidden = None
        self.lstm_cell = nn.LSTMCell(num_dims, hidden_size).cuda()

        # Linear transformation 
        self.linear_transform = nn.Linear(hidden_size, vocab_size)

    def forward(self, input):
        # input is a LongTensor of size batch_size
        input = self.embed(input) 

        self.hidden = self.lstm_cell(input, self.hidden)

        # output has shape (batch_size, vocab_size)
        output = self.linear_transform(self.hidden[0])
        return output
    
decoder = Decoder()
decoder.cuda()

Decoder(
  (embed): Embedding(10000, 128)
  (lstm_cell): LSTMCell(128, 512)
  (linear_transform): Linear(in_features=512, out_features=10000, bias=True)
)

for name, param in decoder.named_parameters():
    if param.requires_grad:
        print (name, param.data)

In [10]:
learning_rate = 4.0 #3.0, 3.5

# Filter parameters that do not require gradients
encoder_parameters = filter(lambda p: p.requires_grad, encoder.parameters())
decoder_parameters = filter(lambda p: p.requires_grad, decoder.parameters())
# Optimizers
encoder_optimizer = torch.optim.SGD(encoder_parameters, lr=learning_rate)
decoder_optimizer = torch.optim.SGD(decoder_parameters, lr=learning_rate)
# Loss function
# Way to accumulate loss on sequences with variable lengths in batches :
# size_average: By default, the losses are averaged over observations for each minibatch.
# However, if the field size_average is set to False, the losses are instead summed for each minibatch. 
# Ignored if reduce is False.
# Set size_average to False and divide the loss by the number of non-padding tokens.
# ignore_index: Specifies a target value that is ignored and does not contribute to the input gradient. 
# When size_average is True, the loss is averaged over non-ignored targets.
# Set ignore_index to the padding value
loss_function = nn.CrossEntropyLoss(size_average=False, ignore_index=0).cuda() # 0 is the index of <pad>###

def train_model(batch):
    loss = 0
    # Clear optimizer gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    # Clear hidden state of LSTM
    encoder.hidden = encoder.init_hidden(batch_size)
    # articles, abstracts are LongTensor vairables of shape (max_sequence_length, batch_size)
    # containig word indices from the respective vocabs
    # lengths are LongTensor varibles of shape batch_size containing
    # lengths of all the sequences in the batch
    articles, article_lengths = batch.article
    abstracts, abstract_lengths = batch.abstract
    hiddenT, output = encoder(articles, article_lengths)
    
    decoder.hidden = hiddenT
    #input = Variable(torch.cuda.LongTensor(batch_size).fill_(2)) # 2 is the index of <sos>
    input = most_likely(output, batch_size)

    # Looping over all the sequences
    for t in range(torch.max(abstract_lengths)):
        output = decoder(input)
        input = most_likely(output, batch_size)
        loss += loss_function(output, abstracts[t])
        
    loss = loss/torch.sum(abstract_lengths)
    loss.backward()
    
    #nn.utils.clip_grad_norm(encoder.parameters(), 0.5)
    #nn.utils.clip_grad_norm(decoder.parameters(), 0.5)

    encoder_optimizer.step()
    decoder_optimizer.step()
    
    # Initialize hidden_list for next batch of inputs
    decoder.hidden_list = []
    
    return loss

In [11]:
def validation_loss(df):
    batch_size = 1
    generator = BatchGenerator(batch_size, df)
    loss = 0
    step = 0
    while True:
        try:
            batch = generator.get_batch()
            step += 1
        except StopIteration: break
        loss += calc_loss(batch, batch_size)
    loss = loss/step
    return loss

def calc_loss(batch, batch_size):
    loss = 0
    encoder.hidden = encoder.init_hidden(batch_size, volatile=True)
    articles, article_lengths = batch.article
    abstracts, abstract_lengths = batch.abstract
    
    articles.volatile = True
    abstracts.volatile = True
        
    hiddenT, output = encoder(articles, article_lengths) ###
    for layer in range(hidden_layers):
        decoder.hidden_list.append((hiddenT[0][layer], hiddenT[1][layer])) 
    #input = Variable(torch.cuda.LongTensor(batch_size).fill_(2), volatile=True)
    input = most_likely(output, batch_size)
    
    for t in range(torch.max(abstract_lengths)):
        output = decoder(input)
        input = most_likely(output, batch_size)
        loss += loss_function(output, abstracts[t])
    loss = loss/torch.sum(abstract_lengths)
    decoder.hidden_list = []
    return loss

In [12]:
def most_likely(output, batch_size):
    if batch_size > 1:
        softmax = nn.Softmax(dim=1)
        output = softmax(output)
        _, next_input = torch.topk(output, 1, dim=1)
    else: 
        softmax = nn.Softmax(dim=0)
        output = softmax(output)
        _, next_input = torch.topk(output, 1)
    return next_input

In [13]:
cudnn.benchmark = True
cudnn.fasttest = True
epochs = 7000 #7000

encoder.train()
decoder.train()

train_df = pd.read_csv('datasets/train.csv')
val_df = pd.read_csv('datasets/val.csv')
iteration = 1

for epoch in range(epochs):
    
    if epoch % 1500 == 0: #500, 275
        learning_rate = learning_rate / 2 #2
        # Filter parameters that do not require gradients
        encoder_parameters = filter(lambda p: p.requires_grad, encoder.parameters())
        decoder_parameters = filter(lambda p: p.requires_grad, decoder.parameters())
        # Optimizers
        encoder_optimizer = torch.optim.SGD(encoder_parameters, lr=learning_rate)
        decoder_optimizer = torch.optim.SGD(decoder_parameters, lr=learning_rate)
        print('')
        print('learning rate: %f' % learning_rate)
        print('')
        
    generator = BatchGenerator(batch_size, train_df[:64]) #64

    while True:
        try: 
            batch = generator.get_batch()
        except StopIteration: break
        loss = train_model(batch)
        
        if iteration % 2 == 0:
            print('Average minibatch loss at step %d: %.3f' % (iteration, loss))
            writer.add_scalar('train_loss', loss, iteration)
            writer.export_scalars_to_json("./all_scalars.json")
        
        """if iteration % 8 == 0:    
            encoder.eval()
            decoder.eval()
            val_loss = validation_loss(val_df[:8]) # truncating validation dataframe
            print('Validation loss: %.3f' % val_loss)
            
            writer.add_scalar('valid_loss', val_loss, iteration)
            writer.export_scalars_to_json("./all_scalars.json")
            
            encoder.train()
            decoder.train()"""
        iteration += 1


learning rate: 1.500000

Average minibatch loss at step 2: 9.184
Average minibatch loss at step 4: 9.103
Average minibatch loss at step 6: 9.002
Average minibatch loss at step 8: 8.866
Average minibatch loss at step 10: 8.606
Average minibatch loss at step 12: 7.718
Average minibatch loss at step 14: 7.916
Average minibatch loss at step 16: 7.962
Average minibatch loss at step 18: 8.134
Average minibatch loss at step 20: 7.511
Average minibatch loss at step 22: 7.275
Average minibatch loss at step 24: 7.185
Average minibatch loss at step 26: 7.203
Average minibatch loss at step 28: 6.955
Average minibatch loss at step 30: 7.650
Average minibatch loss at step 32: 7.061
Average minibatch loss at step 34: 6.961
Average minibatch loss at step 36: 7.327
Average minibatch loss at step 38: 6.891
Average minibatch loss at step 40: 7.030
Average minibatch loss at step 42: 7.131
Average minibatch loss at step 44: 6.769
Average minibatch loss at step 46: 6.752
Average minibatch loss at step 48: 

Average minibatch loss at step 394: 6.198
Average minibatch loss at step 396: 6.131
Average minibatch loss at step 398: 6.484
Average minibatch loss at step 400: 5.879
Average minibatch loss at step 402: 6.045
Average minibatch loss at step 404: 6.131
Average minibatch loss at step 406: 6.451
Average minibatch loss at step 408: 6.183
Average minibatch loss at step 410: 5.933
Average minibatch loss at step 412: 6.102
Average minibatch loss at step 414: 5.992
Average minibatch loss at step 416: 6.304
Average minibatch loss at step 418: 6.405
Average minibatch loss at step 420: 6.198
Average minibatch loss at step 422: 6.060
Average minibatch loss at step 424: 6.112
Average minibatch loss at step 426: 6.455
Average minibatch loss at step 428: 5.939
Average minibatch loss at step 430: 5.915
Average minibatch loss at step 432: 6.390
Average minibatch loss at step 434: 6.668
Average minibatch loss at step 436: 6.144
Average minibatch loss at step 438: 6.190
Average minibatch loss at step 440

Average minibatch loss at step 786: 6.077
Average minibatch loss at step 788: 5.892
Average minibatch loss at step 790: 5.876
Average minibatch loss at step 792: 5.953
Average minibatch loss at step 794: 5.811
Average minibatch loss at step 796: 6.105
Average minibatch loss at step 798: 5.823
Average minibatch loss at step 800: 6.140
Average minibatch loss at step 802: 6.014
Average minibatch loss at step 804: 6.278
Average minibatch loss at step 806: 5.878
Average minibatch loss at step 808: 6.020
Average minibatch loss at step 810: 6.209
Average minibatch loss at step 812: 5.757
Average minibatch loss at step 814: 5.873
Average minibatch loss at step 816: 6.041
Average minibatch loss at step 818: 5.853
Average minibatch loss at step 820: 5.876
Average minibatch loss at step 822: 5.918
Average minibatch loss at step 824: 5.923
Average minibatch loss at step 826: 5.864
Average minibatch loss at step 828: 5.955
Average minibatch loss at step 830: 5.866
Average minibatch loss at step 832

Average minibatch loss at step 1174: 5.751
Average minibatch loss at step 1176: 5.803
Average minibatch loss at step 1178: 5.669
Average minibatch loss at step 1180: 5.761
Average minibatch loss at step 1182: 5.621
Average minibatch loss at step 1184: 5.542
Average minibatch loss at step 1186: 5.740
Average minibatch loss at step 1188: 5.824
Average minibatch loss at step 1190: 5.753
Average minibatch loss at step 1192: 5.802
Average minibatch loss at step 1194: 5.861
Average minibatch loss at step 1196: 5.611
Average minibatch loss at step 1198: 5.609
Average minibatch loss at step 1200: 5.600
Average minibatch loss at step 1202: 5.716
Average minibatch loss at step 1204: 5.795
Average minibatch loss at step 1206: 5.749
Average minibatch loss at step 1208: 5.830
Average minibatch loss at step 1210: 5.620
Average minibatch loss at step 1212: 5.696
Average minibatch loss at step 1214: 5.597
Average minibatch loss at step 1216: 5.490
Average minibatch loss at step 1218: 5.726
Average min

Average minibatch loss at step 1558: 5.702
Average minibatch loss at step 1560: 5.707
Average minibatch loss at step 1562: 5.676
Average minibatch loss at step 1564: 5.641
Average minibatch loss at step 1566: 5.567
Average minibatch loss at step 1568: 5.417
Average minibatch loss at step 1570: 5.665
Average minibatch loss at step 1572: 5.843
Average minibatch loss at step 1574: 5.628
Average minibatch loss at step 1576: 5.632
Average minibatch loss at step 1578: 5.723
Average minibatch loss at step 1580: 5.499
Average minibatch loss at step 1582: 5.565
Average minibatch loss at step 1584: 5.626
Average minibatch loss at step 1586: 5.649
Average minibatch loss at step 1588: 5.698
Average minibatch loss at step 1590: 5.796
Average minibatch loss at step 1592: 5.677
Average minibatch loss at step 1594: 5.519
Average minibatch loss at step 1596: 5.638
Average minibatch loss at step 1598: 5.618
Average minibatch loss at step 1600: 5.419
Average minibatch loss at step 1602: 5.640
Average min

Average minibatch loss at step 1942: 5.518
Average minibatch loss at step 1944: 5.548
Average minibatch loss at step 1946: 5.461
Average minibatch loss at step 1948: 5.553
Average minibatch loss at step 1950: 5.464
Average minibatch loss at step 1952: 5.385
Average minibatch loss at step 1954: 5.527
Average minibatch loss at step 1956: 5.688
Average minibatch loss at step 1958: 5.582
Average minibatch loss at step 1960: 5.596
Average minibatch loss at step 1962: 5.606
Average minibatch loss at step 1964: 5.400
Average minibatch loss at step 1966: 5.480
Average minibatch loss at step 1968: 5.355
Average minibatch loss at step 1970: 5.573
Average minibatch loss at step 1972: 5.556
Average minibatch loss at step 1974: 5.592
Average minibatch loss at step 1976: 5.539
Average minibatch loss at step 1978: 5.471
Average minibatch loss at step 1980: 5.540
Average minibatch loss at step 1982: 5.418
Average minibatch loss at step 1984: 5.270
Average minibatch loss at step 1986: 5.488
Average min

Average minibatch loss at step 2326: 5.416
Average minibatch loss at step 2328: 5.450
Average minibatch loss at step 2330: 5.316
Average minibatch loss at step 2332: 5.403
Average minibatch loss at step 2334: 5.357
Average minibatch loss at step 2336: 5.194
Average minibatch loss at step 2338: 5.486
Average minibatch loss at step 2340: 5.633
Average minibatch loss at step 2342: 5.446
Average minibatch loss at step 2344: 5.435
Average minibatch loss at step 2346: 5.497
Average minibatch loss at step 2348: 5.314
Average minibatch loss at step 2350: 5.289
Average minibatch loss at step 2352: 5.232
Average minibatch loss at step 2354: 5.313
Average minibatch loss at step 2356: 5.396
Average minibatch loss at step 2358: 5.458
Average minibatch loss at step 2360: 5.445
Average minibatch loss at step 2362: 5.491
Average minibatch loss at step 2364: 5.381
Average minibatch loss at step 2366: 5.317
Average minibatch loss at step 2368: 5.137
Average minibatch loss at step 2370: 5.360
Average min

Average minibatch loss at step 2710: 5.280
Average minibatch loss at step 2712: 5.259
Average minibatch loss at step 2714: 5.160
Average minibatch loss at step 2716: 5.222
Average minibatch loss at step 2718: 5.174
Average minibatch loss at step 2720: 4.979
Average minibatch loss at step 2722: 5.192
Average minibatch loss at step 2724: 5.414
Average minibatch loss at step 2726: 5.300
Average minibatch loss at step 2728: 5.278
Average minibatch loss at step 2730: 5.415
Average minibatch loss at step 2732: 5.154
Average minibatch loss at step 2734: 5.183
Average minibatch loss at step 2736: 5.117
Average minibatch loss at step 2738: 5.253
Average minibatch loss at step 2740: 5.305
Average minibatch loss at step 2742: 5.248
Average minibatch loss at step 2744: 5.257
Average minibatch loss at step 2746: 5.155
Average minibatch loss at step 2748: 5.205
Average minibatch loss at step 2750: 5.165
Average minibatch loss at step 2752: 4.968
Average minibatch loss at step 2754: 5.166
Average min

Average minibatch loss at step 3094: 5.166
Average minibatch loss at step 3096: 5.094
Average minibatch loss at step 3098: 4.989
Average minibatch loss at step 3100: 5.070
Average minibatch loss at step 3102: 5.080
Average minibatch loss at step 3104: 4.806
Average minibatch loss at step 3106: 5.174
Average minibatch loss at step 3108: 5.222
Average minibatch loss at step 3110: 5.124
Average minibatch loss at step 3112: 5.141
Average minibatch loss at step 3114: 5.167
Average minibatch loss at step 3116: 5.019
Average minibatch loss at step 3118: 4.995
Average minibatch loss at step 3120: 5.112
Average minibatch loss at step 3122: 5.018
Average minibatch loss at step 3124: 5.131
Average minibatch loss at step 3126: 5.081
Average minibatch loss at step 3128: 5.127
Average minibatch loss at step 3130: 4.987
Average minibatch loss at step 3132: 5.031
Average minibatch loss at step 3134: 5.048
Average minibatch loss at step 3136: 4.779
Average minibatch loss at step 3138: 5.217
Average min

Average minibatch loss at step 3478: 4.851
Average minibatch loss at step 3480: 4.880
Average minibatch loss at step 3482: 4.744
Average minibatch loss at step 3484: 4.840
Average minibatch loss at step 3486: 4.871
Average minibatch loss at step 3488: 4.503
Average minibatch loss at step 3490: 4.956
Average minibatch loss at step 3492: 5.012
Average minibatch loss at step 3494: 4.976
Average minibatch loss at step 3496: 4.955
Average minibatch loss at step 3498: 4.932
Average minibatch loss at step 3500: 4.764
Average minibatch loss at step 3502: 4.741
Average minibatch loss at step 3504: 4.813
Average minibatch loss at step 3506: 4.736
Average minibatch loss at step 3508: 4.831
Average minibatch loss at step 3510: 4.845
Average minibatch loss at step 3512: 4.951
Average minibatch loss at step 3514: 4.708
Average minibatch loss at step 3516: 4.771
Average minibatch loss at step 3518: 4.837
Average minibatch loss at step 3520: 4.514
Average minibatch loss at step 3522: 4.943
Average min

Average minibatch loss at step 3862: 4.559
Average minibatch loss at step 3864: 4.673
Average minibatch loss at step 3866: 4.532
Average minibatch loss at step 3868: 4.564
Average minibatch loss at step 3870: 4.741
Average minibatch loss at step 3872: 4.241
Average minibatch loss at step 3874: 5.817
Average minibatch loss at step 3876: 6.272
Average minibatch loss at step 3878: 5.960
Average minibatch loss at step 3880: 5.738
Average minibatch loss at step 3882: 5.213
Average minibatch loss at step 3884: 4.799
Average minibatch loss at step 3886: 4.675
Average minibatch loss at step 3888: 4.668
Average minibatch loss at step 3890: 4.610
Average minibatch loss at step 3892: 4.691
Average minibatch loss at step 3894: 4.601
Average minibatch loss at step 3896: 4.668
Average minibatch loss at step 3898: 4.550
Average minibatch loss at step 3900: 4.632
Average minibatch loss at step 3902: 4.617
Average minibatch loss at step 3904: 4.135
Average minibatch loss at step 3906: 4.703
Average min

Average minibatch loss at step 4246: 4.415
Average minibatch loss at step 4248: 4.384
Average minibatch loss at step 4250: 4.285
Average minibatch loss at step 4252: 4.335
Average minibatch loss at step 4254: 4.370
Average minibatch loss at step 4256: 3.702
Average minibatch loss at step 4258: 4.422
Average minibatch loss at step 4260: 4.498
Average minibatch loss at step 4262: 4.368
Average minibatch loss at step 4264: 4.400
Average minibatch loss at step 4266: 4.379
Average minibatch loss at step 4268: 4.177
Average minibatch loss at step 4270: 4.155
Average minibatch loss at step 4272: 4.407
Average minibatch loss at step 4274: 4.319
Average minibatch loss at step 4276: 4.355
Average minibatch loss at step 4278: 4.309
Average minibatch loss at step 4280: 4.397
Average minibatch loss at step 4282: 4.311
Average minibatch loss at step 4284: 4.295
Average minibatch loss at step 4286: 4.342
Average minibatch loss at step 4288: 3.749
Average minibatch loss at step 4290: 4.486
Average min

Average minibatch loss at step 4630: 3.946
Average minibatch loss at step 4632: 4.047
Average minibatch loss at step 4634: 4.029
Average minibatch loss at step 4636: 3.984
Average minibatch loss at step 4638: 4.144
Average minibatch loss at step 4640: 3.285
Average minibatch loss at step 4642: 4.211
Average minibatch loss at step 4644: 4.450
Average minibatch loss at step 4646: 4.157
Average minibatch loss at step 4648: 4.192
Average minibatch loss at step 4650: 4.089
Average minibatch loss at step 4652: 3.999
Average minibatch loss at step 4654: 4.030
Average minibatch loss at step 4656: 4.447
Average minibatch loss at step 4658: 4.034
Average minibatch loss at step 4660: 4.073
Average minibatch loss at step 4662: 3.975
Average minibatch loss at step 4664: 4.064
Average minibatch loss at step 4666: 4.092
Average minibatch loss at step 4668: 3.988
Average minibatch loss at step 4670: 4.147
Average minibatch loss at step 4672: 3.367
Average minibatch loss at step 4674: 4.171
Average min

Average minibatch loss at step 5014: 3.656
Average minibatch loss at step 5016: 3.743
Average minibatch loss at step 5018: 3.813
Average minibatch loss at step 5020: 3.708
Average minibatch loss at step 5022: 3.767
Average minibatch loss at step 5024: 2.941
Average minibatch loss at step 5026: 3.879
Average minibatch loss at step 5028: 4.057
Average minibatch loss at step 5030: 3.777
Average minibatch loss at step 5032: 3.941
Average minibatch loss at step 5034: 3.777
Average minibatch loss at step 5036: 3.706
Average minibatch loss at step 5038: 3.619
Average minibatch loss at step 5040: 3.915
Average minibatch loss at step 5042: 3.765
Average minibatch loss at step 5044: 3.970
Average minibatch loss at step 5046: 3.639
Average minibatch loss at step 5048: 3.850
Average minibatch loss at step 5050: 3.807
Average minibatch loss at step 5052: 3.679
Average minibatch loss at step 5054: 4.063
Average minibatch loss at step 5056: 3.046
Average minibatch loss at step 5058: 3.931
Average min

Average minibatch loss at step 5398: 3.268
Average minibatch loss at step 5400: 3.504
Average minibatch loss at step 5402: 3.754
Average minibatch loss at step 5404: 3.566
Average minibatch loss at step 5406: 3.552
Average minibatch loss at step 5408: 2.537
Average minibatch loss at step 5410: 3.395
Average minibatch loss at step 5412: 3.671
Average minibatch loss at step 5414: 3.429
Average minibatch loss at step 5416: 3.521
Average minibatch loss at step 5418: 3.381
Average minibatch loss at step 5420: 3.316
Average minibatch loss at step 5422: 3.382
Average minibatch loss at step 5424: 3.440
Average minibatch loss at step 5426: 3.402
Average minibatch loss at step 5428: 3.368
Average minibatch loss at step 5430: 3.208
Average minibatch loss at step 5432: 3.353
Average minibatch loss at step 5434: 3.458
Average minibatch loss at step 5436: 3.377
Average minibatch loss at step 5438: 3.471
Average minibatch loss at step 5440: 2.536
Average minibatch loss at step 5442: 3.407
Average min

Average minibatch loss at step 5782: 2.776
Average minibatch loss at step 5784: 2.900
Average minibatch loss at step 5786: 3.088
Average minibatch loss at step 5788: 3.047
Average minibatch loss at step 5790: 3.124
Average minibatch loss at step 5792: 2.099
Average minibatch loss at step 5794: 3.003
Average minibatch loss at step 5796: 3.302
Average minibatch loss at step 5798: 2.995
Average minibatch loss at step 5800: 3.103
Average minibatch loss at step 5802: 2.946
Average minibatch loss at step 5804: 2.970
Average minibatch loss at step 5806: 2.963
Average minibatch loss at step 5808: 3.013
Average minibatch loss at step 5810: 2.929
Average minibatch loss at step 5812: 2.911
Average minibatch loss at step 5814: 2.802
Average minibatch loss at step 5816: 2.907
Average minibatch loss at step 5818: 3.074
Average minibatch loss at step 5820: 3.047
Average minibatch loss at step 5822: 3.240
Average minibatch loss at step 5824: 1.978
Average minibatch loss at step 5826: 2.946
Average min

Average minibatch loss at step 6166: 2.342
Average minibatch loss at step 6168: 2.597
Average minibatch loss at step 6170: 2.763
Average minibatch loss at step 6172: 2.622
Average minibatch loss at step 6174: 2.733
Average minibatch loss at step 6176: 1.457
Average minibatch loss at step 6178: 2.451
Average minibatch loss at step 6180: 2.690
Average minibatch loss at step 6182: 2.597
Average minibatch loss at step 6184: 2.790
Average minibatch loss at step 6186: 2.512
Average minibatch loss at step 6188: 2.507
Average minibatch loss at step 6190: 2.526
Average minibatch loss at step 6192: 2.599
Average minibatch loss at step 6194: 2.553
Average minibatch loss at step 6196: 2.461
Average minibatch loss at step 6198: 2.333
Average minibatch loss at step 6200: 2.508
Average minibatch loss at step 6202: 2.575
Average minibatch loss at step 6204: 2.524
Average minibatch loss at step 6206: 2.597
Average minibatch loss at step 6208: 1.417
Average minibatch loss at step 6210: 2.344
Average min

Average minibatch loss at step 6550: 1.901
Average minibatch loss at step 6552: 2.053
Average minibatch loss at step 6554: 2.224
Average minibatch loss at step 6556: 2.042
Average minibatch loss at step 6558: 2.290
Average minibatch loss at step 6560: 1.035
Average minibatch loss at step 6562: 1.814
Average minibatch loss at step 6564: 2.046
Average minibatch loss at step 6566: 2.041
Average minibatch loss at step 6568: 2.192
Average minibatch loss at step 6570: 1.999
Average minibatch loss at step 6572: 1.997
Average minibatch loss at step 6574: 1.867
Average minibatch loss at step 6576: 2.000
Average minibatch loss at step 6578: 2.020
Average minibatch loss at step 6580: 1.828
Average minibatch loss at step 6582: 1.780
Average minibatch loss at step 6584: 1.925
Average minibatch loss at step 6586: 2.101
Average minibatch loss at step 6588: 2.010
Average minibatch loss at step 6590: 2.162
Average minibatch loss at step 6592: 1.058
Average minibatch loss at step 6594: 1.772
Average min

Average minibatch loss at step 6934: 1.351
Average minibatch loss at step 6936: 1.499
Average minibatch loss at step 6938: 1.692
Average minibatch loss at step 6940: 1.903
Average minibatch loss at step 6942: 2.538
Average minibatch loss at step 6944: 1.973
Average minibatch loss at step 6946: 2.867
Average minibatch loss at step 6948: 2.129
Average minibatch loss at step 6950: 2.401
Average minibatch loss at step 6952: 1.936
Average minibatch loss at step 6954: 1.690
Average minibatch loss at step 6956: 1.620
Average minibatch loss at step 6958: 1.465
Average minibatch loss at step 6960: 1.571
Average minibatch loss at step 6962: 1.528
Average minibatch loss at step 6964: 1.523
Average minibatch loss at step 6966: 1.327
Average minibatch loss at step 6968: 1.394
Average minibatch loss at step 6970: 1.572
Average minibatch loss at step 6972: 1.503
Average minibatch loss at step 6974: 1.572
Average minibatch loss at step 6976: 0.649
Average minibatch loss at step 6978: 1.311
Average min

Average minibatch loss at step 7318: 0.884
Average minibatch loss at step 7320: 0.937
Average minibatch loss at step 7322: 0.985
Average minibatch loss at step 7324: 0.920
Average minibatch loss at step 7326: 0.971
Average minibatch loss at step 7328: 0.377
Average minibatch loss at step 7330: 0.801
Average minibatch loss at step 7332: 0.934
Average minibatch loss at step 7334: 1.016
Average minibatch loss at step 7336: 1.093
Average minibatch loss at step 7338: 0.909
Average minibatch loss at step 7340: 0.847
Average minibatch loss at step 7342: 0.818
Average minibatch loss at step 7344: 0.968
Average minibatch loss at step 7346: 0.976
Average minibatch loss at step 7348: 0.796
Average minibatch loss at step 7350: 0.774
Average minibatch loss at step 7352: 0.917
Average minibatch loss at step 7354: 1.038
Average minibatch loss at step 7356: 0.960
Average minibatch loss at step 7358: 0.995
Average minibatch loss at step 7360: 0.373
Average minibatch loss at step 7362: 0.800
Average min

Average minibatch loss at step 7702: 0.504
Average minibatch loss at step 7704: 0.560
Average minibatch loss at step 7706: 0.593
Average minibatch loss at step 7708: 0.575
Average minibatch loss at step 7710: 0.606
Average minibatch loss at step 7712: 0.247
Average minibatch loss at step 7714: 0.525
Average minibatch loss at step 7716: 0.568
Average minibatch loss at step 7718: 0.591
Average minibatch loss at step 7720: 0.753
Average minibatch loss at step 7722: 0.618
Average minibatch loss at step 7724: 0.532
Average minibatch loss at step 7726: 0.496
Average minibatch loss at step 7728: 0.549
Average minibatch loss at step 7730: 0.566
Average minibatch loss at step 7732: 0.494
Average minibatch loss at step 7734: 0.458
Average minibatch loss at step 7736: 0.519
Average minibatch loss at step 7738: 0.561
Average minibatch loss at step 7740: 0.515
Average minibatch loss at step 7742: 0.542
Average minibatch loss at step 7744: 0.210
Average minibatch loss at step 7746: 0.474
Average min

Average minibatch loss at step 8086: 0.314
Average minibatch loss at step 8088: 0.335
Average minibatch loss at step 8090: 0.324
Average minibatch loss at step 8092: 0.322
Average minibatch loss at step 8094: 0.306
Average minibatch loss at step 8096: 0.130
Average minibatch loss at step 8098: 0.298
Average minibatch loss at step 8100: 0.323
Average minibatch loss at step 8102: 0.394
Average minibatch loss at step 8104: 0.388
Average minibatch loss at step 8106: 0.366
Average minibatch loss at step 8108: 0.287
Average minibatch loss at step 8110: 0.275
Average minibatch loss at step 8112: 0.304
Average minibatch loss at step 8114: 0.350
Average minibatch loss at step 8116: 0.292
Average minibatch loss at step 8118: 0.288
Average minibatch loss at step 8120: 0.305
Average minibatch loss at step 8122: 0.306
Average minibatch loss at step 8124: 0.285
Average minibatch loss at step 8126: 0.286
Average minibatch loss at step 8128: 0.135
Average minibatch loss at step 8130: 0.274
Average min

KeyboardInterrupt: 

torch.save(encoder.state_dict(), 'encoder')
torch.save(decoder.state_dict(), 'decoder')