In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import pandas as pd
from fastText import load_model
from matplotlib import pylab
from sklearn.manifold import TSNE
from tensorboardX import SummaryWriter

writer = SummaryWriter()

In [2]:
# Fasttext embeddings trained on train and val sets
# ./fasttext skipgram -input input_text_file -output output_model -dim 128 (fastText-0.1.0)
fasttext_model = load_model('word_vectors/fasttext_model.bin')
num_dims = 128

In [3]:
# vocab contains frequent words apperaing in the text along with their frequencies
# minimum frequency = 6
vocab_file = open('finished_files/vocab')
# Store appearing words
vocab_words = {}
for line in vocab_file:
    li = line.split()
    if len(li) == 2:
        word, freq = li
        vocab_words[word] = freq
# Final word to id dictionary    
word2id = {}
tokens = ['<pad>', '<unk>', '<sos>', '<eos>']
for token in tokens:
    word2id[token] = len(word2id)
# Retrieve words from fasttext model and keep only those which are also present in 'vocab'
fasttext_words = fasttext_model.get_words()
for word in fasttext_words:
    if word in vocab_words:
        word2id[word] = len(word2id)        
vocab_size = len(word2id)
# Reverse dictionary
id2word = dict(zip(word2id.values(), word2id.keys()))
# Embeddings
embeddings = np.zeros((vocab_size, num_dims))
# <pad> token vector contains all zeros. Rest sampled from a normal distribution
mu, sigma = 0, 0.05
for i in range(1, len(tokens)):
    embeddings[i] = np.random.normal(mu, sigma, num_dims)
# Get word vectors from fasttext model and store in embeddings matrix
for i in range(len(tokens), vocab_size):
    embeddings[i] = fasttext_model.get_word_vector(id2word[i])
    
del fasttext_model, vocab_words

In [4]:
temp = {}
for i in range(1000):
    temp[i] = id2word[i]
id2word = temp
embeddings = embeddings[:1000]
word2id = dict(zip(id2word.values(), id2word.keys()))

vocab_size = len(word2id)

num_points = 500

tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact')
two_d_embeddings = tsne.fit_transform(embeddings[1:num_points+1, :])

def plot(embeddings, labels):
    assert embeddings.shape[0] >= len(labels), 'More labels than embeddings'
    pylab.figure(figsize=(15,15))  # in inches
    for i, label in enumerate(labels):
        x, y = embeddings[i,:]
        pylab.scatter(x, y)
        pylab.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')
    pylab.show()

words = [id2word[i] for i in range(1, num_points+1)]
plot(two_d_embeddings, words)

In [5]:
batch_size = 64
max_article_size = 25 #400
max_abstract_size = 10 #100
hidden_size = 512
hidden_layers = 3

In [6]:
class Batch:
    def __init__(self):
        self.abstract = (None, None)
        self.article = (None, None)

In [7]:
class BatchGenerator: 
    
    def __init__(self, batch_size, dataframe):
        self.batch_size = batch_size
        # train, valid, or test dataframe imported from csv
        self.df = dataframe
        self.generator = self.row_generator()
        
        
    def row_generator(self):
        for row in self.df.itertuples(index=False):
            yield row
            
    def build_batch(self, rows):
        # If number of rows less than batch size, get extra rows from the beginning of the dataframe
        if len(rows) < self.batch_size:
            temp_generator = self.row_generator()
            for i in range(self.batch_size - len(rows)):
                rows.append(self.get_row(temp_generator))
                
        # Get lengths of all the sequences in the batch upto max number of tokens
        # + 1 is for the <eos> token
        abstract_lengths = torch.cuda.LongTensor(
            [len(row.abstract.split()[:max_abstract_size]) for row in rows]) + 1
        article_lengths = torch.cuda.LongTensor(
            [len(row.article.split()[:max_article_size]) for row in rows]) + 1 
        abs_len = torch.max(abstract_lengths)
        art_len = torch.max(article_lengths) 
        
        # Variables containing abstracts and articles of the batch
        abstracts = torch.cuda.LongTensor(abs_len, self.batch_size).fill_(0) # zero padding
        articles = torch.cuda.LongTensor(art_len, self.batch_size).fill_(0) # zero padding
        
        # Sort rows in descending order of sequence (article) lengths
        article_lengths, indices = torch.sort(article_lengths, descending=True)
        rows = [rows[i] for i in indices]
        abstract_lengths = torch.cuda.LongTensor([abstract_lengths[i] for i in indices])
        
        for i in range(self.batch_size):
            # Tokenize abstract and take max_abstract_size number of tokens
            tokens = rows[i].abstract.split()[:max_abstract_size]
            tokens.append('<eos>')
            # Convert each token to word index
            # <unk> token index for unknown words
            token_list = torch.LongTensor([word2id[token] if token in word2id 
                                           else word2id['<unk>'] for token in tokens])
            # Store as column in abstracts variable with zero padding
            abstracts[:,i][:len(token_list)] = token_list
            
            # Same for articles
            tokens = rows[i].article.split()[:max_article_size]
            tokens.append('<eos>')
            token_list = torch.LongTensor([word2id[token] if token in word2id 
                                           else word2id['<unk>'] for token in tokens])
            articles[:,i][:len(token_list)] = token_list
            
        batch = Batch()
        batch.article = (Variable(articles), article_lengths)
        batch.abstract = (Variable(abstracts), abstract_lengths)
        return batch
            
    def get_row(self, generator):
        row = generator.__next__()
        while not isinstance(row.article, str):
            row = generator.__next__()
        return row
        
        
    def get_batch(self):
        rows = []
        for b in range(self.batch_size):
            try: rows.append(self.get_row(self.generator))
            except StopIteration: break
        if rows: return self.build_batch(rows)
        else: raise StopIteration

In [8]:
class Encoder(nn.Module):
    
    def __init__(self, batch_size):
        super(Encoder, self).__init__()
        self.batch_size = batch_size
        
        # Hidden layer and cell state of model
        # Initialize before calling model
        self.hidden = None
        
        # Lookup table that stores word embeddings
        self.embed = nn.Embedding(vocab_size, num_dims).cuda()
        self.embed.weight.data.copy_(torch.from_numpy(embeddings))
        self.embed.weight.requires_grad = False
        
        # Pytorch lstm module
        self.lstm = nn.LSTM(num_dims, hidden_size, hidden_layers)
        self.lstm.cuda()
        
        # Linear transformation 
        self.linear_transform = nn.Linear(hidden_size, vocab_size)
    
    # Funtion to initialize hidden layers
    def init_hidden(self, batch_size, volatile=False):
        tensor1 = torch.cuda.FloatTensor(hidden_layers, batch_size, hidden_size).fill_(0)
        tensor2 = torch.cuda.FloatTensor(hidden_layers, batch_size, hidden_size).fill_(0)
        return (Variable(tensor1, volatile=volatile), Variable(tensor2, volatile=volatile))
    
    def forward(self, articles, article_lengths):
        # Embedding lookup
        input = self.embed(articles)
        # input to pack_padded_sequence can be of Txbx*
        # where T is the length of longest sequence
        # b is batch size
        # batch is sorted in descending order of sequence lengths
        #packed_input = pack_padded_sequence(input, list(article_lengths))
        #packed_output, self.hidden = self.lstm(packed_input, self.hidden)
        _, self.hidden = self.lstm(input, self.hidden)
        
        output = self.linear_transform(self.hidden[0][hidden_layers - 1])
        
        # Final hidden state
        return self.hidden, output
    
encoder = Encoder(batch_size)
encoder.cuda()

Encoder(
  (embed): Embedding(1000, 128)
  (lstm): LSTM(128, 512, num_layers=3)
  (linear_transform): Linear(in_features=512, out_features=1000, bias=True)
)

In [9]:
class Decoder(nn.Module):
    
    def __init__(self):
        super(Decoder, self).__init__()
        
        # Lookup table that stores word embeddings
        self.embed = nn.Embedding(vocab_size, num_dims).cuda()
        self.embed.weight.data.copy_(torch.from_numpy(embeddings))
        self.embed.weight.requires_grad = False
    
        # Cell and hidden states
        self.cell_list = []
        self.hidden_list = []
    
        # First cell takes word embeddings as input
        self.cell_list.append(nn.LSTMCell(num_dims, hidden_size).cuda())
        for cell in range(1, hidden_layers):
            self.cell_list.append(nn.LSTMCell(hidden_size, hidden_size).cuda())
        # ModlueList Holds submodules in a list. 
        # ModuleList can be indexed like a regular Python list, 
        # but modules it contains are properly registered, 
        # and will be visible by all Module methods.
        self.cell_list=nn.ModuleList(self.cell_list) 
        
        # Linear transformation 
        self.linear_transform = nn.Linear(hidden_size, vocab_size)

    def forward(self, input):
        # input is a LongTensor of size batch_size
        input = self.embed(input) 
        # Each item in hidden list is a tuple of previous cell and hidden states
        for layer in range(hidden_layers):
            self.hidden_list[layer] = self.cell_list[layer](input, self.hidden_list[layer])
            input = self.hidden_list[layer][0]
        # output has shape (batch_size, vocab_size)
        output = self.linear_transform(self.hidden_list[hidden_layers - 1][0])
        return output
    
decoder = Decoder()
decoder.cuda()

Decoder(
  (embed): Embedding(1000, 128)
  (cell_list): ModuleList(
    (0): LSTMCell(128, 512)
    (1): LSTMCell(512, 512)
    (2): LSTMCell(512, 512)
  )
  (linear_transform): Linear(in_features=512, out_features=1000, bias=True)
)

In [10]:
learning_rate = 0.85 #5.0

# Filter parameters that do not require gradients
encoder_parameters = filter(lambda p: p.requires_grad, encoder.parameters())
decoder_parameters = filter(lambda p: p.requires_grad, decoder.parameters())
# Optimizers
encoder_optimizer = torch.optim.SGD(encoder_parameters, lr=learning_rate)
decoder_optimizer = torch.optim.SGD(decoder_parameters, lr=learning_rate)
# Loss function
# Way to accumulate loss on sequences with variable lengths in batches :
# size_average: By default, the losses are averaged over observations for each minibatch.
# However, if the field size_average is set to False, the losses are instead summed for each minibatch. 
# Ignored if reduce is False.
# Set size_average to False and divide the loss by the number of non-padding tokens.
# ignore_index: Specifies a target value that is ignored and does not contribute to the input gradient. 
# When size_average is True, the loss is averaged over non-ignored targets.
# Set ignore_index to the padding value
loss_function = nn.CrossEntropyLoss(size_average=False, ignore_index=0).cuda() # 0 is the index of <pad>###

def train_model(batch):
    loss = 0
    # Clear optimizer gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    # Clear hidden state of LSTM
    encoder.hidden = encoder.init_hidden(batch_size)
    # articles, abstracts are LongTensor vairables of shape (max_sequence_length, batch_size)
    # containig word indices from the respective vocabs
    # lengths are LongTensor varibles of shape batch_size containing
    # lengths of all the sequences in the batch
    articles, article_lengths = batch.article
    abstracts, abstract_lengths = batch.abstract
    hiddenT, output = encoder(articles, article_lengths)
    
    # Seperate hidden states corresponding to the the two layers of the encoder
    # and append to hidden state list of decoder as tuples for each layer.
    for layer in range(hidden_layers):
        decoder.hidden_list.append((hiddenT[0][layer], hiddenT[1][layer]))
    #input = Variable(torch.cuda.LongTensor(batch_size).fill_(2)) # 2 is the index of <sos>
    input = most_likely(output, batch_size)

    # Looping over all the sequences
    for t in range(torch.max(abstract_lengths)):
        output = decoder(input)
        input = most_likely(output, batch_size)
        loss += loss_function(output, abstracts[t])
        
    loss = loss/torch.sum(abstract_lengths)
    loss.backward()
    
    nn.utils.clip_grad_norm(encoder.parameters(), 5)
    nn.utils.clip_grad_norm(decoder.parameters(), 5)

    encoder_optimizer.step()
    decoder_optimizer.step()
    
    # Initialize hidden_list for next batch of inputs
    decoder.hidden_list = []
    
    return loss

In [11]:
def validation_loss(df):
    batch_size = 1
    generator = BatchGenerator(batch_size, df)
    loss = 0
    step = 0
    while True:
        try:
            batch = generator.get_batch()
            step += 1
        except StopIteration: break
        loss += calc_loss(batch, batch_size)
    loss = loss/step
    return loss

def calc_loss(batch, batch_size):
    loss = 0
    encoder.hidden = encoder.init_hidden(batch_size, volatile=True)
    articles, article_lengths = batch.article
    abstracts, abstract_lengths = batch.abstract
    
    articles.volatile = True
    abstracts.volatile = True
        
    hiddenT, output = encoder(articles, article_lengths) ###
    for layer in range(hidden_layers):
        decoder.hidden_list.append((hiddenT[0][layer], hiddenT[1][layer])) 
    #input = Variable(torch.cuda.LongTensor(batch_size).fill_(2), volatile=True)
    input = most_likely(output, batch_size)
    
    for t in range(torch.max(abstract_lengths)):
        output = decoder(input)
        input = most_likely(output, batch_size)
        loss += loss_function(output, abstracts[t])
    loss = loss/torch.sum(abstract_lengths)
    decoder.hidden_list = []
    return loss

In [12]:
def most_likely(output, batch_size):
    if batch_size > 1:
        softmax = nn.Softmax(dim=1)
        output = softmax(output)
        _, next_input = torch.topk(output, 1, dim=1)
    else: 
        softmax = nn.Softmax(dim=0)
        output = softmax(output)
        _, next_input = torch.topk(output, 1)
    return next_input

In [13]:
cudnn.benchmark = True
cudnn.fasttest = True
epochs = 7000

encoder.train()
decoder.train()

train_df = pd.read_csv('datasets/train.csv')
val_df = pd.read_csv('datasets/val.csv')
iteration = 1

for epoch in range(epochs):
    
    if epoch % 4000 == 0:
        learning_rate = learning_rate / 10
        # Filter parameters that do not require gradients
        encoder_parameters = filter(lambda p: p.requires_grad, encoder.parameters())
        decoder_parameters = filter(lambda p: p.requires_grad, decoder.parameters())
        # Optimizers
        encoder_optimizer = torch.optim.SGD(encoder_parameters, lr=learning_rate)
        decoder_optimizer = torch.optim.SGD(decoder_parameters, lr=learning_rate)
        print('')
        print('learning rate: %f' % learning_rate)
        print('')
        
    generator = BatchGenerator(batch_size, train_df[:64]) 

    while True:
        try: 
            batch = generator.get_batch()
        except StopIteration: break
        loss = train_model(batch)
        
        if iteration % 2 == 0:
            print('Average minibatch loss at step %d: %.3f' % (iteration, loss))
            writer.add_scalar('train_loss', loss, iteration)
            writer.export_scalars_to_json("./all_scalars.json")
        
        if iteration % 8 == 0:    
            encoder.eval()
            decoder.eval()
            val_loss = validation_loss(val_df[:8]) # truncating validation dataframe
            print('Validation loss: %.3f' % val_loss)
            
            writer.add_scalar('valid_loss', val_loss, iteration)
            writer.export_scalars_to_json("./all_scalars.json")
            
            encoder.train()
            decoder.train()
        iteration += 1


learning rate: 0.085000

Average minibatch loss at step 2: 6.886
Average minibatch loss at step 4: 6.853
Average minibatch loss at step 6: 6.821
Average minibatch loss at step 8: 6.788
Validation loss: 6.768
Average minibatch loss at step 10: 6.755
Average minibatch loss at step 12: 6.721
Average minibatch loss at step 14: 6.687
Average minibatch loss at step 16: 6.652
Validation loss: 6.624
Average minibatch loss at step 18: 6.616
Average minibatch loss at step 20: 6.580
Average minibatch loss at step 22: 6.543
Average minibatch loss at step 24: 6.504
Validation loss: 6.467
Average minibatch loss at step 26: 6.464
Average minibatch loss at step 28: 6.422
Average minibatch loss at step 30: 6.379
Average minibatch loss at step 32: 6.333
Validation loss: 6.285
Average minibatch loss at step 34: 6.285
Average minibatch loss at step 36: 6.234
Average minibatch loss at step 38: 6.180
Average minibatch loss at step 40: 6.122
Validation loss: 6.057
Average minibatch loss at step 42: 6.059
Av

Average minibatch loss at step 348: 3.553
Average minibatch loss at step 350: 3.548
Average minibatch loss at step 352: 3.544
Validation loss: 3.783
Average minibatch loss at step 354: 3.540
Average minibatch loss at step 356: 3.535
Average minibatch loss at step 358: 3.531
Average minibatch loss at step 360: 3.527
Validation loss: 3.782
Average minibatch loss at step 362: 3.522
Average minibatch loss at step 364: 3.518
Average minibatch loss at step 366: 3.513
Average minibatch loss at step 368: 3.509
Validation loss: 3.780
Average minibatch loss at step 370: 3.504
Average minibatch loss at step 372: 3.499
Average minibatch loss at step 374: 3.495
Average minibatch loss at step 376: 3.490
Validation loss: 3.778
Average minibatch loss at step 378: 3.485
Average minibatch loss at step 380: 3.480
Average minibatch loss at step 382: 3.476
Average minibatch loss at step 384: 3.471
Validation loss: 3.776
Average minibatch loss at step 386: 3.466
Average minibatch loss at step 388: 3.461
Ave

Average minibatch loss at step 694: 3.169
Average minibatch loss at step 696: 3.168
Validation loss: 3.866
Average minibatch loss at step 698: 3.166
Average minibatch loss at step 700: 3.165
Average minibatch loss at step 702: 3.164
Average minibatch loss at step 704: 3.162
Validation loss: 3.869
Average minibatch loss at step 706: 3.161
Average minibatch loss at step 708: 3.159
Average minibatch loss at step 710: 3.158
Average minibatch loss at step 712: 3.157
Validation loss: 3.871
Average minibatch loss at step 714: 3.155
Average minibatch loss at step 716: 3.154
Average minibatch loss at step 718: 3.152
Average minibatch loss at step 720: 3.151
Validation loss: 3.873
Average minibatch loss at step 722: 3.150
Average minibatch loss at step 724: 3.148
Average minibatch loss at step 726: 3.147
Average minibatch loss at step 728: 3.146
Validation loss: 3.876
Average minibatch loss at step 730: 3.144
Average minibatch loss at step 732: 3.143
Average minibatch loss at step 734: 3.141
Ave

Average minibatch loss at step 1040: 2.957
Validation loss: 4.026
Average minibatch loss at step 1042: 2.956
Average minibatch loss at step 1044: 2.955
Average minibatch loss at step 1046: 2.954
Average minibatch loss at step 1048: 2.953
Validation loss: 4.030
Average minibatch loss at step 1050: 2.952
Average minibatch loss at step 1052: 2.951
Average minibatch loss at step 1054: 2.950
Average minibatch loss at step 1056: 2.949
Validation loss: 4.034
Average minibatch loss at step 1058: 2.948
Average minibatch loss at step 1060: 2.947
Average minibatch loss at step 1062: 2.946
Average minibatch loss at step 1064: 2.944
Validation loss: 4.038
Average minibatch loss at step 1066: 2.943
Average minibatch loss at step 1068: 2.942
Average minibatch loss at step 1070: 2.941
Average minibatch loss at step 1072: 2.940
Validation loss: 4.042
Average minibatch loss at step 1074: 2.939
Average minibatch loss at step 1076: 2.938
Average minibatch loss at step 1078: 2.937
Average minibatch loss at

Average minibatch loss at step 1378: 2.793
Average minibatch loss at step 1380: 2.793
Average minibatch loss at step 1382: 2.792
Average minibatch loss at step 1384: 2.792
Validation loss: 4.177
Average minibatch loss at step 1386: 2.791
Average minibatch loss at step 1388: 2.790
Average minibatch loss at step 1390: 2.790
Average minibatch loss at step 1392: 2.789
Validation loss: 4.180
Average minibatch loss at step 1394: 2.789
Average minibatch loss at step 1396: 2.788
Average minibatch loss at step 1398: 2.788
Average minibatch loss at step 1400: 2.787
Validation loss: 4.184
Average minibatch loss at step 1402: 2.786
Average minibatch loss at step 1404: 2.786
Average minibatch loss at step 1406: 2.785
Average minibatch loss at step 1408: 2.785
Validation loss: 4.188
Average minibatch loss at step 1410: 2.784
Average minibatch loss at step 1412: 2.784
Average minibatch loss at step 1414: 2.783
Average minibatch loss at step 1416: 2.783
Validation loss: 4.191
Average minibatch loss at

Average minibatch loss at step 1716: 2.707
Average minibatch loss at step 1718: 2.707
Average minibatch loss at step 1720: 2.707
Validation loss: 4.328
Average minibatch loss at step 1722: 2.706
Average minibatch loss at step 1724: 2.706
Average minibatch loss at step 1726: 2.705
Average minibatch loss at step 1728: 2.705
Validation loss: 4.332
Average minibatch loss at step 1730: 2.704
Average minibatch loss at step 1732: 2.704
Average minibatch loss at step 1734: 2.703
Average minibatch loss at step 1736: 2.703
Validation loss: 4.335
Average minibatch loss at step 1738: 2.703
Average minibatch loss at step 1740: 2.702
Average minibatch loss at step 1742: 2.702
Average minibatch loss at step 1744: 2.701
Validation loss: 4.339
Average minibatch loss at step 1746: 2.701
Average minibatch loss at step 1748: 2.700
Average minibatch loss at step 1750: 2.700
Average minibatch loss at step 1752: 2.699
Validation loss: 4.343
Average minibatch loss at step 1754: 2.699
Average minibatch loss at

Average minibatch loss at step 2056: 2.638
Validation loss: 4.476
Average minibatch loss at step 2058: 2.638
Average minibatch loss at step 2060: 2.638
Average minibatch loss at step 2062: 2.637
Average minibatch loss at step 2064: 2.637
Validation loss: 4.479
Average minibatch loss at step 2066: 2.637
Average minibatch loss at step 2068: 2.636
Average minibatch loss at step 2070: 2.636
Average minibatch loss at step 2072: 2.636
Validation loss: 4.483
Average minibatch loss at step 2074: 2.635
Average minibatch loss at step 2076: 2.635
Average minibatch loss at step 2078: 2.634
Average minibatch loss at step 2080: 2.634
Validation loss: 4.486
Average minibatch loss at step 2082: 2.634
Average minibatch loss at step 2084: 2.633
Average minibatch loss at step 2086: 2.633
Average minibatch loss at step 2088: 2.633
Validation loss: 4.490
Average minibatch loss at step 2090: 2.632
Average minibatch loss at step 2092: 2.632
Average minibatch loss at step 2094: 2.632
Average minibatch loss at

Average minibatch loss at step 2394: 2.582
Average minibatch loss at step 2396: 2.582
Average minibatch loss at step 2398: 2.582
Average minibatch loss at step 2400: 2.581
Validation loss: 4.616
Average minibatch loss at step 2402: 2.581
Average minibatch loss at step 2404: 2.581
Average minibatch loss at step 2406: 2.580
Average minibatch loss at step 2408: 2.580
Validation loss: 4.619
Average minibatch loss at step 2410: 2.580
Average minibatch loss at step 2412: 2.579
Average minibatch loss at step 2414: 2.579
Average minibatch loss at step 2416: 2.579
Validation loss: 4.622
Average minibatch loss at step 2418: 2.579
Average minibatch loss at step 2420: 2.578
Average minibatch loss at step 2422: 2.578
Average minibatch loss at step 2424: 2.578
Validation loss: 4.625
Average minibatch loss at step 2426: 2.577
Average minibatch loss at step 2428: 2.577
Average minibatch loss at step 2430: 2.577
Average minibatch loss at step 2432: 2.576
Validation loss: 4.628
Average minibatch loss at

Average minibatch loss at step 2734: 2.535
Average minibatch loss at step 2736: 2.535
Validation loss: 4.740
Average minibatch loss at step 2738: 2.535
Average minibatch loss at step 2740: 2.534
Average minibatch loss at step 2742: 2.534
Average minibatch loss at step 2744: 2.534
Validation loss: 4.743
Average minibatch loss at step 2746: 2.534
Average minibatch loss at step 2748: 2.533
Average minibatch loss at step 2750: 2.533
Average minibatch loss at step 2752: 2.533
Validation loss: 4.746
Average minibatch loss at step 2754: 2.533
Average minibatch loss at step 2756: 2.532
Average minibatch loss at step 2758: 2.532
Average minibatch loss at step 2760: 2.532
Validation loss: 4.749
Average minibatch loss at step 2762: 2.532
Average minibatch loss at step 2764: 2.531
Average minibatch loss at step 2766: 2.531
Average minibatch loss at step 2768: 2.531
Validation loss: 4.751
Average minibatch loss at step 2770: 2.531
Average minibatch loss at step 2772: 2.530
Average minibatch loss at

Validation loss: 4.855
Average minibatch loss at step 3074: 2.496
Average minibatch loss at step 3076: 2.496
Average minibatch loss at step 3078: 2.496
Average minibatch loss at step 3080: 2.495
Validation loss: 4.858
Average minibatch loss at step 3082: 2.495
Average minibatch loss at step 3084: 2.495
Average minibatch loss at step 3086: 2.495
Average minibatch loss at step 3088: 2.495
Validation loss: 4.860
Average minibatch loss at step 3090: 2.494
Average minibatch loss at step 3092: 2.494
Average minibatch loss at step 3094: 2.494
Average minibatch loss at step 3096: 2.494
Validation loss: 4.863
Average minibatch loss at step 3098: 2.494
Average minibatch loss at step 3100: 2.493
Average minibatch loss at step 3102: 2.493
Average minibatch loss at step 3104: 2.493
Validation loss: 4.866
Average minibatch loss at step 3106: 2.493
Average minibatch loss at step 3108: 2.493
Average minibatch loss at step 3110: 2.492
Average minibatch loss at step 3112: 2.492
Validation loss: 4.868
Av

Average minibatch loss at step 3412: 2.465
Average minibatch loss at step 3414: 2.465
Average minibatch loss at step 3416: 2.464
Validation loss: 4.965
Average minibatch loss at step 3418: 2.464
Average minibatch loss at step 3420: 2.464
Average minibatch loss at step 3422: 2.464
Average minibatch loss at step 3424: 2.464
Validation loss: 4.968
Average minibatch loss at step 3426: 2.464
Average minibatch loss at step 3428: 2.463
Average minibatch loss at step 3430: 2.463
Average minibatch loss at step 3432: 2.463
Validation loss: 4.970
Average minibatch loss at step 3434: 2.463
Average minibatch loss at step 3436: 2.463
Average minibatch loss at step 3438: 2.463
Average minibatch loss at step 3440: 2.462
Validation loss: 4.973
Average minibatch loss at step 3442: 2.462
Average minibatch loss at step 3444: 2.462
Average minibatch loss at step 3446: 2.462
Average minibatch loss at step 3448: 2.462
Validation loss: 4.975
Average minibatch loss at step 3450: 2.462
Average minibatch loss at

Average minibatch loss at step 3752: 2.440
Validation loss: 5.065
Average minibatch loss at step 3754: 2.440
Average minibatch loss at step 3756: 2.440
Average minibatch loss at step 3758: 2.440
Average minibatch loss at step 3760: 2.440
Validation loss: 5.068
Average minibatch loss at step 3762: 2.439
Average minibatch loss at step 3764: 2.439
Average minibatch loss at step 3766: 2.439
Average minibatch loss at step 3768: 2.439
Validation loss: 5.070
Average minibatch loss at step 3770: 2.439
Average minibatch loss at step 3772: 2.439
Average minibatch loss at step 3774: 2.439
Average minibatch loss at step 3776: 2.439
Validation loss: 5.072
Average minibatch loss at step 3778: 2.439
Average minibatch loss at step 3780: 2.438
Average minibatch loss at step 3782: 2.438
Average minibatch loss at step 3784: 2.438
Validation loss: 5.075
Average minibatch loss at step 3786: 2.438
Average minibatch loss at step 3788: 2.438
Average minibatch loss at step 3790: 2.438
Average minibatch loss at

Average minibatch loss at step 4088: 2.409
Validation loss: 5.130
Average minibatch loss at step 4090: 2.409
Average minibatch loss at step 4092: 2.409
Average minibatch loss at step 4094: 2.409
Average minibatch loss at step 4096: 2.409
Validation loss: 5.130
Average minibatch loss at step 4098: 2.409
Average minibatch loss at step 4100: 2.409
Average minibatch loss at step 4102: 2.409
Average minibatch loss at step 4104: 2.409
Validation loss: 5.131
Average minibatch loss at step 4106: 2.408
Average minibatch loss at step 4108: 2.408
Average minibatch loss at step 4110: 2.408
Average minibatch loss at step 4112: 2.408
Validation loss: 5.131
Average minibatch loss at step 4114: 2.408
Average minibatch loss at step 4116: 2.408
Average minibatch loss at step 4118: 2.408
Average minibatch loss at step 4120: 2.408
Validation loss: 5.131
Average minibatch loss at step 4122: 2.408
Average minibatch loss at step 4124: 2.408
Average minibatch loss at step 4126: 2.408
Average minibatch loss at

KeyboardInterrupt: 

torch.save(encoder.state_dict(), 'encoder')
torch.save(decoder.state_dict(), 'decoder')