In [110]:
%matplotlib inline

import numpy as np
from matplotlib import pyplot as plt
import time
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
# from torchsummaryX import summary
from tests_hw4 import test_prediction, test_generation
from tqdm import tqdm
import gc 
gc.collect()

11

In [111]:
! nvidia-smi
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Sat Dec  3 04:24:32 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   56C    P0    29W /  70W |  12399MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [267]:
# TODO: define other hyperparameters here
NUM_EPOCHS = 100
BATCH_SIZE = 30
SEQ_LEN = 50
EMB_DIM = 200
HIDDEN_SIZE = 200
LR = 0.001
SEQ_LEN_PROB = 0.95
SEQ_LEN_STD = 5
LSTM_DROPOUT = 0.3
LSTM_LAYERS = 3

In [268]:
# load all that we need

dataset = np.load('../dataset/wiki.train.npy', allow_pickle=True)
devset = np.load('../dataset/wiki.valid.npy', allow_pickle=True)
fixtures_pred = np.load('../fixtures/prediction.npz')  # dev
fixtures_gen = np.load('../fixtures/generation.npy')  # dev
fixtures_pred_test = np.load('../fixtures/prediction_test.npz')  # test
fixtures_gen_test = np.load('../fixtures/generation_test.npy')  # test
vocab = np.load('../dataset/vocab.npy')

In [269]:
# data loader

class DataLoaderForLanguageModeling(DataLoader):
    """
        TODO: Define data loader logic here
    """
    def __init__(self, dataset, batch_size, seq_len, shuffle=True):
        self.dataset = dataset
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.shuffle = shuffle
        self.num_batches = (len(np.concatenate(dataset))-1)//batch_size


    def __len__(self):
      return int(self.num_batches // self.seq_len)
      
    def __iter__(self):
        """
            You may implement some of the techniques in https://arxiv.org/pdf/1708.02182.pdf
            example: Variable length backpropagation sequences (Section 4.1)
        """
        ## dataset = Array of articles; article = array of ints
        # 1. Randomly shuffle all the articles from the WikiText-2 dataset.
        if(self.shuffle):
            np.random.shuffle(self.dataset)
        # 2. Concatenate all text in one long string.
        data = np.concatenate(self.dataset)
        # 3. Group the sequences into batches.
        self.num_batches = (len(data) - 1) // self.batch_size # One less since need offset for label
        inputs  = data[0:self.num_batches * self.batch_size].reshape(self.batch_size,-1)
        targets = data[1:self.num_batches * self.batch_size + 1].reshape(self.batch_size,-1)
        inputs = torch.from_numpy(inputs).to(dtype=torch.long)
        targets = torch.from_numpy(targets).to(dtype=torch.long)
        # 4. Run a loop that returns a tuple of (input, label) on every iteration with yield.
        offset = 0
        cur_seq_len = self.seq_len
        while((offset + cur_seq_len) < inputs.shape[1]):
            input_ = inputs[:, offset : offset+cur_seq_len]
            target = targets[:, offset : offset+cur_seq_len]
            offset += cur_seq_len
            yield (input_, target)
            # Update cur_seq_len
            cur_seq_len = self.seq_len if (np.random.rand() < SEQ_LEN_PROB) else self.seq_len//2
            cur_seq_len_temp = int(np.random.normal(cur_seq_len, SEQ_LEN_STD))
            cur_seq_len = cur_seq_len_temp if (cur_seq_len_temp > 0) else cur_seq_len

# # TEST       
# test = DataLoaderForLanguageModeling(dataset, BATCH_SIZE, SEQ_LEN)
# for i,(test_inputs, test_targets) in enumerate(test.__iter__()):
#     print('---------')
#     print('iter: ', i)
#     print('shape: ', test_inputs.shape)
#     print('type: ', test_inputs.dtype, ', ', test_targets.dtype)
#     for batch_idx in range(0, test.batch_size):
#         tmpstr1 = ['    ']
#         tmpstr2 = ['    ']
#         for seq_idx in range(0, test.seq_len):
#             tmpstr1.append(vocab[test_inputs[batch_idx, seq_idx]])
#             tmpstr2.append(vocab[test_targets[batch_idx, seq_idx]])
#         print(' '.join(tmpstr1))
#         print(' '.join(tmpstr2))
#         print()
#     if(i > 3):
#         break

In [270]:
# model

class Model(nn.Module):
    """
        TODO: Define your model here
    """
    def __init__(self, vocab_size:int, embedding_dim:int, hidden_size:int):
        super(Model, self).__init__()
        # Embedding: vocab_size -> embedding_dim
        # LSTM: embedding_dim -> hidden_size
        # Classifier: hidden_size -> vocab_size
        self.embedding = nn.Sequential(
            nn.Embedding(vocab_size, embedding_dim),
        )
        self.lstm = nn.LSTM(
            input_size = embedding_dim,
            hidden_size = hidden_size,
            num_layers = LSTM_LAYERS,
            # bidirectional = True,
            dropout = LSTM_DROPOUT,
            batch_first=True)
        self.classifier = nn.Sequential(
            torch.nn.Linear(hidden_size * 1, vocab_size),
        )


    def forward(self, x, h_in = None):
        # Feel free to add extra arguments to forward (like an argument to pass in the hiddens)
        out = self.embedding(x)
        out, h_out = self.lstm(out, h_in) if h_in else self.lstm(out)
        out = self.classifier(out)
        return out, h_out

# # TEST
# model = Model(len(vocab), EMB_DIM, HIDDEN_SIZE)
# test_input = torch.randint(0, len(vocab), (BATCH_SIZE, SEQ_LEN), dtype=torch.long)
# test_output, test_hidden = model(test_input)
# print('Input : ', test_input.shape, ', ', test_input.dtype)
# print('Output: ', test_output.shape, ', ', test_output.dtype) # (batch, seq_len, vocab_size)
# # summary(model, test_input)

In [271]:
# model trainer

class Trainer:
    def __init__(self, model, loader, max_epochs=1, run_id='exp'):
        """
            Use this class to train your model
        """
        # feel free to add any other parameters here
        self.model = model
        self.loader = loader
        self.train_losses = []
        self.val_losses = []
        self.predictions = []
        self.predictions_test = []
        self.generated_logits = []
        self.generated = []
        self.generated_logits_test = []
        self.generated_test = []
        self.epochs = 0
        self.max_epochs = max_epochs
        self.run_id = run_id
        
        # TODO: Define your optimizer and criterion here
        # feel free to define a learning rate scheduler as well if you want
        self.optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
        self.criterion = nn.CrossEntropyLoss() # Correct???

    def train(self):
        self.model.train() # set to training mode
        epoch_loss = 0
        num_batches = 0
        batch_bar = tqdm(total=len(self.loader), dynamic_ncols=True, leave=False, position=0, desc='Batch')
        for batch_num, (inputs, targets) in enumerate(self.loader):
            epoch_loss += self.train_batch(inputs, targets)
            batch_bar.update()
        epoch_loss = epoch_loss / (batch_num + 1)
        print('[TRAIN]  Epoch [%d/%d]   Loss: %.4f'
                      % (self.epochs + 1, self.max_epochs, epoch_loss))
        self.train_losses.append(epoch_loss)

    def train_batch(self, inputs, targets):
        """ 
            TODO: Define code for training a single batch of inputs
            
            :return 
                    (float) loss value
        """
        self.optimizer.zero_grad()
        # Forwards
        inputs = inputs.to(device)
        outputs, _ = self.model(inputs)
        # Compute loss
        targets = targets.to(device)
        loss = self.criterion(
            outputs.reshape(-1, outputs.shape[2]), # (instances, classes)
            targets.reshape(-1) # (instances, )
        )
        # Backwards
        loss.backward()
        self.optimizer.step()
        return loss

    
    def test(self):
        # don't change these
        self.model.eval() # set to eval mode
        predictions = TestLanguageModel.predict(fixtures_pred['inp'], self.model) # get predictions
        self.predictions.append(predictions)
        generated_logits = TestLanguageModel.generate(fixtures_gen, 10, self.model) # generated predictions for 10 words
        generated_logits_test = TestLanguageModel.generate(fixtures_gen_test, 10, self.model)
        nll = test_prediction(predictions, fixtures_pred['out'])
        generated = test_generation(fixtures_gen, generated_logits, vocab)
        generated_test = test_generation(fixtures_gen_test, generated_logits_test, vocab)
        self.val_losses.append(nll)
        
        self.generated.append(generated)
        self.generated_test.append(generated_test)
        self.generated_logits.append(generated_logits)
        self.generated_logits_test.append(generated_logits_test)
        
        # generate predictions for test data
        predictions_test = TestLanguageModel.predict(fixtures_pred_test['inp'], self.model) # get predictions
        self.predictions_test.append(predictions_test)
            
        print('[VAL]  Epoch [%d/%d]   Loss: %.4f'
                      % (self.epochs + 1, self.max_epochs, nll))
        self.epochs += 1

        return nll

    def save(self):
        # don't change these
        model_path = os.path.join('experiments', self.run_id, 'model-{}.pkl'.format(self.epochs))
        torch.save({'state_dict': self.model.state_dict()},
            model_path)
        np.save(os.path.join('experiments', self.run_id, 'predictions-{}.npy'.format(self.epochs)), self.predictions[-1])
        np.save(os.path.join('experiments', self.run_id, 'predictions-test-{}.npy'.format(self.epochs)), self.predictions_test[-1])
        np.save(os.path.join('experiments', self.run_id, 'generated_logits-{}.npy'.format(self.epochs)), self.generated_logits[-1])
        np.save(os.path.join('experiments', self.run_id, 'generated_logits-test-{}.npy'.format(self.epochs)), self.generated_logits_test[-1])
        with open(os.path.join('experiments', self.run_id, 'generated-{}.txt'.format(self.epochs)), 'w') as fw:
            fw.write(self.generated[-1])
        with open(os.path.join('experiments', self.run_id, 'generated-{}-test.txt'.format(self.epochs)), 'w') as fw:
            fw.write(self.generated_test[-1])


In [272]:
class TestLanguageModel:
    def predict(inp, model):
        """
            TODO: write prediction code here
            
            :param inp:
            :return: a np.ndarray of logits
        """
        outputs, _ = model(torch.Tensor(inp).long().to(device))
        # predictions = torch.argmax(outputs[:,-1,:], dim=1).unsqueeze(0)
        predictions = outputs[:,-1,:]
        return predictions.cpu().detach().numpy()

        
    def generate(inp, forward, model):
        """
            TODO: write generation code here

            Generate a sequence of words given a starting sequence.
            :param inp: Initial sequence of words (batch size, length)
            :param forward: number of additional words to generate
            :return: generated words (batch size, forward)
        """        
        new_words = []
        hidden = None
        cur_inp = torch.clone(torch.Tensor(inp).long())
        for i in range(0, forward):
          out, hidden = model(cur_inp.to(device), hidden)
          cur_new_words = torch.argmax(out, dim=2)[:,-1] # Only grab last word per sequence for each batch
          new_words.append(cur_new_words)
          cur_inp = torch.unsqueeze(cur_new_words, dim=1) # (batch,) -> (batch,seq)
        new_words = torch.stack(new_words, dim=1) # (batch, forward)
        return new_words.cpu().detach().numpy()

# # TEST
# test_input = torch.randint(0, len(vocab), (BATCH_SIZE, SEQ_LEN))
# test_model = Model(len(vocab), EMB_DIM, HIDDEN_SIZE)
# test_output = TestLanguageModel.predict(test_input, test_model)
# print('Test predict : ', test_output.shape)
# test_output = TestLanguageModel.generate(test_input, 20, test_model)
# print('Test generate: ', test_output.shape)

In [273]:
run_id = str(int(time.time()))
if not os.path.exists('./experiments'):
    os.mkdir('./experiments')
os.mkdir('./experiments/%s' % run_id)
print("Saving models, predictions, and generated words to ./experiments/%s" % run_id)

Saving models, predictions, and generated words to ./experiments/1670055271


In [274]:
model = Model(len(vocab), embedding_dim=EMB_DIM, hidden_size=HIDDEN_SIZE).to(device)

loader = DataLoaderForLanguageModeling(
    dataset=dataset, 
    batch_size=BATCH_SIZE,
    seq_len=SEQ_LEN,
    shuffle=True
)
trainer = Trainer(
    model=model, 
    loader=loader, 
    max_epochs=NUM_EPOCHS, 
    run_id=run_id
)

In [275]:
# best_nll = 1e30 
print('best_nll: ', best_nll)
for epoch in range(NUM_EPOCHS):
    print('Epoch: ', epoch+1, '/', NUM_EPOCHS)
    trainer.train()
    nll = trainer.test()
    if nll < best_nll:
        best_nll = nll
        print("Saving model, predictions and generated output for epoch "+str(epoch)+" with NLL: "+ str(best_nll))
        trainer.save()
    

best_nll:  4.586572
Epoch:  1 / 100


                                                                                                      

[TRAIN]  Epoch [1/100]   Loss: 7.0412
[VAL]  Epoch [1/100]   Loss: 6.1084
Epoch:  2 / 100


                                                                                                      

[TRAIN]  Epoch [2/100]   Loss: 6.1670
[VAL]  Epoch [2/100]   Loss: 5.4204
Epoch:  3 / 100


                                                                                                      

[TRAIN]  Epoch [3/100]   Loss: 5.7264
[VAL]  Epoch [3/100]   Loss: 5.1344
Epoch:  4 / 100


                                                                                                      

[TRAIN]  Epoch [4/100]   Loss: 5.4759
[VAL]  Epoch [4/100]   Loss: 4.9271
Epoch:  5 / 100


                                                                                                      

[TRAIN]  Epoch [5/100]   Loss: 5.2789
[VAL]  Epoch [5/100]   Loss: 4.8629
Epoch:  6 / 100


                                                                                                      

[TRAIN]  Epoch [6/100]   Loss: 5.1128
[VAL]  Epoch [6/100]   Loss: 4.7262
Epoch:  7 / 100


                                                                                                      

[TRAIN]  Epoch [7/100]   Loss: 4.9697
[VAL]  Epoch [7/100]   Loss: 4.6850
Epoch:  8 / 100


                                                                                                      

[TRAIN]  Epoch [8/100]   Loss: 4.8496
[VAL]  Epoch [8/100]   Loss: 4.6215
Epoch:  9 / 100


                                                                                                      

[TRAIN]  Epoch [9/100]   Loss: 4.7442
[VAL]  Epoch [9/100]   Loss: 4.6385
Epoch:  10 / 100


                                                                                                      

[TRAIN]  Epoch [10/100]   Loss: 4.6520
[VAL]  Epoch [10/100]   Loss: 4.6460
Epoch:  11 / 100


                                                                                                      

[TRAIN]  Epoch [11/100]   Loss: 4.5735
[VAL]  Epoch [11/100]   Loss: 4.5928
Epoch:  12 / 100


                                                                                                      

[TRAIN]  Epoch [12/100]   Loss: 4.5009
[VAL]  Epoch [12/100]   Loss: 4.6603
Epoch:  13 / 100


                                                                                                      

[TRAIN]  Epoch [13/100]   Loss: 4.4352
[VAL]  Epoch [13/100]   Loss: 4.6125
Epoch:  14 / 100


                                                                                                      

[TRAIN]  Epoch [14/100]   Loss: 4.3782
[VAL]  Epoch [14/100]   Loss: 4.6148
Epoch:  15 / 100


                                                                                                      

[TRAIN]  Epoch [15/100]   Loss: 4.3207
[VAL]  Epoch [15/100]   Loss: 4.6484
Epoch:  16 / 100


                                                                                                      

[TRAIN]  Epoch [16/100]   Loss: 4.2669
[VAL]  Epoch [16/100]   Loss: 4.5758
Saving model, predictions and generated output for epoch 15 with NLL: 4.575823
Epoch:  17 / 100


                                                                                                      

[TRAIN]  Epoch [17/100]   Loss: 4.2200
[VAL]  Epoch [17/100]   Loss: 4.6956
Epoch:  18 / 100


                                                                                                      

[TRAIN]  Epoch [18/100]   Loss: 4.1740
[VAL]  Epoch [18/100]   Loss: 4.6989
Epoch:  19 / 100


                                                                                                      

[TRAIN]  Epoch [19/100]   Loss: 4.1323
[VAL]  Epoch [19/100]   Loss: 4.6569
Epoch:  20 / 100


                                                                                                      

[TRAIN]  Epoch [20/100]   Loss: 4.0926
[VAL]  Epoch [20/100]   Loss: 4.6908
Epoch:  21 / 100


                                                                                                      

[TRAIN]  Epoch [21/100]   Loss: 4.0532
[VAL]  Epoch [21/100]   Loss: 4.6378
Epoch:  22 / 100


                                                                                                      

[TRAIN]  Epoch [22/100]   Loss: 4.0182
[VAL]  Epoch [22/100]   Loss: 4.6411
Epoch:  23 / 100


                                                                                                      

[TRAIN]  Epoch [23/100]   Loss: 3.9864
[VAL]  Epoch [23/100]   Loss: 4.6563
Epoch:  24 / 100


                                                                                                      

[TRAIN]  Epoch [24/100]   Loss: 3.9518
[VAL]  Epoch [24/100]   Loss: 4.6949
Epoch:  25 / 100


                                                                                                      

[TRAIN]  Epoch [25/100]   Loss: 3.9219
[VAL]  Epoch [25/100]   Loss: 4.7608
Epoch:  26 / 100


                                                                                                      

[TRAIN]  Epoch [26/100]   Loss: 3.8897
[VAL]  Epoch [26/100]   Loss: 4.6707
Epoch:  27 / 100


                                                                                                      

[TRAIN]  Epoch [27/100]   Loss: 3.8637
[VAL]  Epoch [27/100]   Loss: 4.6778
Epoch:  28 / 100


                                                                                                      

[TRAIN]  Epoch [28/100]   Loss: 3.8358
[VAL]  Epoch [28/100]   Loss: 4.7814
Epoch:  29 / 100


                                                                                                      

[TRAIN]  Epoch [29/100]   Loss: 3.8069
[VAL]  Epoch [29/100]   Loss: 4.7571
Epoch:  30 / 100


                                                                                                      

[TRAIN]  Epoch [30/100]   Loss: 3.7847
[VAL]  Epoch [30/100]   Loss: 4.6969
Epoch:  31 / 100


                                                                                                      

[TRAIN]  Epoch [31/100]   Loss: 3.7617
[VAL]  Epoch [31/100]   Loss: 4.7675
Epoch:  32 / 100


                                                                                                      

[TRAIN]  Epoch [32/100]   Loss: 3.7380
[VAL]  Epoch [32/100]   Loss: 4.7360
Epoch:  33 / 100


                                                                                                      

[TRAIN]  Epoch [33/100]   Loss: 3.7139
[VAL]  Epoch [33/100]   Loss: 4.7502
Epoch:  34 / 100


                                                                                                      

[TRAIN]  Epoch [34/100]   Loss: 3.6935
[VAL]  Epoch [34/100]   Loss: 4.8309
Epoch:  35 / 100


                                                                                                      

[TRAIN]  Epoch [35/100]   Loss: 3.6739
[VAL]  Epoch [35/100]   Loss: 4.8241
Epoch:  36 / 100


                                                                                                      

[TRAIN]  Epoch [36/100]   Loss: 3.6561
[VAL]  Epoch [36/100]   Loss: 4.7863
Epoch:  37 / 100


                                                                                                      

[TRAIN]  Epoch [37/100]   Loss: 3.6354
[VAL]  Epoch [37/100]   Loss: 4.8172
Epoch:  38 / 100


                                                                                                      

[TRAIN]  Epoch [38/100]   Loss: 3.6127
[VAL]  Epoch [38/100]   Loss: 4.8042
Epoch:  39 / 100


                                                                                                      

[TRAIN]  Epoch [39/100]   Loss: 3.5956
[VAL]  Epoch [39/100]   Loss: 4.7753
Epoch:  40 / 100


                                                                                                      

[TRAIN]  Epoch [40/100]   Loss: 3.5770
[VAL]  Epoch [40/100]   Loss: 4.8421
Epoch:  41 / 100


                                                                                                      

[TRAIN]  Epoch [41/100]   Loss: 3.5629
[VAL]  Epoch [41/100]   Loss: 4.8355
Epoch:  42 / 100


                                                                                                      

[TRAIN]  Epoch [42/100]   Loss: 3.5437
[VAL]  Epoch [42/100]   Loss: 4.7410
Epoch:  43 / 100


                                                                                                      

[TRAIN]  Epoch [43/100]   Loss: 3.5313
[VAL]  Epoch [43/100]   Loss: 4.8146
Epoch:  44 / 100


                                                                                                      

[TRAIN]  Epoch [44/100]   Loss: 3.5130
[VAL]  Epoch [44/100]   Loss: 4.8825
Epoch:  45 / 100


                                                                                                      

[TRAIN]  Epoch [45/100]   Loss: 3.4956
[VAL]  Epoch [45/100]   Loss: 4.8490
Epoch:  46 / 100


                                                                                                      

[TRAIN]  Epoch [46/100]   Loss: 3.4817
[VAL]  Epoch [46/100]   Loss: 4.8802
Epoch:  47 / 100


                                                                                                      

[TRAIN]  Epoch [47/100]   Loss: 3.4667
[VAL]  Epoch [47/100]   Loss: 4.8610
Epoch:  48 / 100


                                                                                                      

[TRAIN]  Epoch [48/100]   Loss: 3.4483
[VAL]  Epoch [48/100]   Loss: 4.9207
Epoch:  49 / 100


                                                                                                      

[TRAIN]  Epoch [49/100]   Loss: 3.4360
[VAL]  Epoch [49/100]   Loss: 4.8911
Epoch:  50 / 100


                                                                                                      

[TRAIN]  Epoch [50/100]   Loss: 3.4223
[VAL]  Epoch [50/100]   Loss: 4.9158
Epoch:  51 / 100


                                                                                                      

[TRAIN]  Epoch [51/100]   Loss: 3.4077
[VAL]  Epoch [51/100]   Loss: 4.9233
Epoch:  52 / 100


                                                                                                      

[TRAIN]  Epoch [52/100]   Loss: 3.3968
[VAL]  Epoch [52/100]   Loss: 4.9485
Epoch:  53 / 100


                                                                                                      

[TRAIN]  Epoch [53/100]   Loss: 3.3787
[VAL]  Epoch [53/100]   Loss: 5.0109
Epoch:  54 / 100


                                                                                                      

[TRAIN]  Epoch [54/100]   Loss: 3.3672
[VAL]  Epoch [54/100]   Loss: 4.9321
Epoch:  55 / 100


                                                                                                      

[TRAIN]  Epoch [55/100]   Loss: 3.3592
[VAL]  Epoch [55/100]   Loss: 4.8482
Epoch:  56 / 100


                                                                                                      

[TRAIN]  Epoch [56/100]   Loss: 3.3434
[VAL]  Epoch [56/100]   Loss: 4.9889
Epoch:  57 / 100


                                                                                                      

[TRAIN]  Epoch [57/100]   Loss: 3.3351
[VAL]  Epoch [57/100]   Loss: 5.0060
Epoch:  58 / 100


                                                                                                      

[TRAIN]  Epoch [58/100]   Loss: 3.3187
[VAL]  Epoch [58/100]   Loss: 4.9630
Epoch:  59 / 100


                                                                                                      

[TRAIN]  Epoch [59/100]   Loss: 3.3102
[VAL]  Epoch [59/100]   Loss: 5.0375
Epoch:  60 / 100


                                                                                                      

[TRAIN]  Epoch [60/100]   Loss: 3.2986
[VAL]  Epoch [60/100]   Loss: 4.9619
Epoch:  61 / 100


                                                                                                      

[TRAIN]  Epoch [61/100]   Loss: 3.2905
[VAL]  Epoch [61/100]   Loss: 4.9891
Epoch:  62 / 100


                                                                                                      

[TRAIN]  Epoch [62/100]   Loss: 3.2754
[VAL]  Epoch [62/100]   Loss: 5.0192
Epoch:  63 / 100


                                                                                                      

[TRAIN]  Epoch [63/100]   Loss: 3.2621
[VAL]  Epoch [63/100]   Loss: 4.9754
Epoch:  64 / 100


                                                                                                      

[TRAIN]  Epoch [64/100]   Loss: 3.2537
[VAL]  Epoch [64/100]   Loss: 4.9835
Epoch:  65 / 100


                                                                                                      

[TRAIN]  Epoch [65/100]   Loss: 3.2404
[VAL]  Epoch [65/100]   Loss: 5.1331
Epoch:  66 / 100


                                                                                                      

[TRAIN]  Epoch [66/100]   Loss: 3.2334
[VAL]  Epoch [66/100]   Loss: 5.0445
Epoch:  67 / 100


                                                                                                      

[TRAIN]  Epoch [67/100]   Loss: 3.2219
[VAL]  Epoch [67/100]   Loss: 5.0058
Epoch:  68 / 100


                                                                                                      

[TRAIN]  Epoch [68/100]   Loss: 3.2186
[VAL]  Epoch [68/100]   Loss: 5.0326
Epoch:  69 / 100


                                                                                                      

[TRAIN]  Epoch [69/100]   Loss: 3.2035
[VAL]  Epoch [69/100]   Loss: 4.9400
Epoch:  70 / 100


                                                                                                      

[TRAIN]  Epoch [70/100]   Loss: 3.1948
[VAL]  Epoch [70/100]   Loss: 5.0962
Epoch:  71 / 100


                                                                                                      

[TRAIN]  Epoch [71/100]   Loss: 3.1854
[VAL]  Epoch [71/100]   Loss: 5.0296
Epoch:  72 / 100


                                                                                                      

[TRAIN]  Epoch [72/100]   Loss: 3.1785
[VAL]  Epoch [72/100]   Loss: 4.9731
Epoch:  73 / 100


                                                                                                      

[TRAIN]  Epoch [73/100]   Loss: 3.1690
[VAL]  Epoch [73/100]   Loss: 5.0374
Epoch:  74 / 100


                                                                                                      

[TRAIN]  Epoch [74/100]   Loss: 3.1603
[VAL]  Epoch [74/100]   Loss: 5.0738
Epoch:  75 / 100


                                                                                                      

[TRAIN]  Epoch [75/100]   Loss: 3.1527
[VAL]  Epoch [75/100]   Loss: 5.0719
Epoch:  76 / 100


                                                                                                      

[TRAIN]  Epoch [76/100]   Loss: 3.1408
[VAL]  Epoch [76/100]   Loss: 5.1459
Epoch:  77 / 100


                                                                                                      

[TRAIN]  Epoch [77/100]   Loss: 3.1335
[VAL]  Epoch [77/100]   Loss: 4.9898
Epoch:  78 / 100


                                                                                                      

[TRAIN]  Epoch [78/100]   Loss: 3.1260
[VAL]  Epoch [78/100]   Loss: 5.0524
Epoch:  79 / 100


                                                                                                      

[TRAIN]  Epoch [79/100]   Loss: 3.1198
[VAL]  Epoch [79/100]   Loss: 4.9784
Epoch:  80 / 100


                                                                                                      

[TRAIN]  Epoch [80/100]   Loss: 3.1068
[VAL]  Epoch [80/100]   Loss: 5.0554
Epoch:  81 / 100


                                                                                                      

[TRAIN]  Epoch [81/100]   Loss: 3.1008
[VAL]  Epoch [81/100]   Loss: 5.0683
Epoch:  82 / 100


                                                                                                      

[TRAIN]  Epoch [82/100]   Loss: 3.0952
[VAL]  Epoch [82/100]   Loss: 5.1030
Epoch:  83 / 100


                                                                                                      

[TRAIN]  Epoch [83/100]   Loss: 3.0859
[VAL]  Epoch [83/100]   Loss: 5.0606
Epoch:  84 / 100


                                                                                                      

[TRAIN]  Epoch [84/100]   Loss: 3.0755
[VAL]  Epoch [84/100]   Loss: 5.0748
Epoch:  85 / 100


                                                                                                      

[TRAIN]  Epoch [85/100]   Loss: 3.0642
[VAL]  Epoch [85/100]   Loss: 5.0690
Epoch:  86 / 100


                                                                                                      

[TRAIN]  Epoch [86/100]   Loss: 3.0604
[VAL]  Epoch [86/100]   Loss: 5.0562
Epoch:  87 / 100


                                                                                                      

[TRAIN]  Epoch [87/100]   Loss: 3.0516
[VAL]  Epoch [87/100]   Loss: 5.1928
Epoch:  88 / 100


                                                                                                      

[TRAIN]  Epoch [88/100]   Loss: 3.0445
[VAL]  Epoch [88/100]   Loss: 5.0466
Epoch:  89 / 100


                                                                                                      

[TRAIN]  Epoch [89/100]   Loss: 3.0394
[VAL]  Epoch [89/100]   Loss: 5.0837
Epoch:  90 / 100


                                                                                                      

[TRAIN]  Epoch [90/100]   Loss: 3.0332
[VAL]  Epoch [90/100]   Loss: 5.1188
Epoch:  91 / 100


                                                                                                      

[TRAIN]  Epoch [91/100]   Loss: 3.0277
[VAL]  Epoch [91/100]   Loss: 5.0954
Epoch:  92 / 100


                                                                                                      

[TRAIN]  Epoch [92/100]   Loss: 3.0178
[VAL]  Epoch [92/100]   Loss: 5.2573
Epoch:  93 / 100


                                                                                                      

[TRAIN]  Epoch [93/100]   Loss: 3.0122
[VAL]  Epoch [93/100]   Loss: 5.1831
Epoch:  94 / 100


                                                                                                      

[TRAIN]  Epoch [94/100]   Loss: 3.0061
[VAL]  Epoch [94/100]   Loss: 5.1663
Epoch:  95 / 100


                                                                                                      

[TRAIN]  Epoch [95/100]   Loss: 2.9987
[VAL]  Epoch [95/100]   Loss: 5.1860
Epoch:  96 / 100


                                                                                                      

[TRAIN]  Epoch [96/100]   Loss: 2.9956
[VAL]  Epoch [96/100]   Loss: 5.1192
Epoch:  97 / 100


                                                                                                      

[TRAIN]  Epoch [97/100]   Loss: 2.9833
[VAL]  Epoch [97/100]   Loss: 5.1521
Epoch:  98 / 100


                                                                                                      

[TRAIN]  Epoch [98/100]   Loss: 2.9786
[VAL]  Epoch [98/100]   Loss: 5.1490
Epoch:  99 / 100


                                                                                                      

[TRAIN]  Epoch [99/100]   Loss: 2.9737
[VAL]  Epoch [99/100]   Loss: 5.2295
Epoch:  100 / 100


                                                                                                      

[TRAIN]  Epoch [100/100]   Loss: 2.9629
[VAL]  Epoch [100/100]   Loss: 5.1313


In [276]:
# Don't change these
# plot training curves
# plt.figure()
# plt.plot(range(1, trainer.epochs + 1), trainer.train_losses, label='Training losses')
# plt.plot(range(1, trainer.epochs + 1), trainer.val_losses, label='Validation losses')
# plt.xlabel('Epochs')
# plt.ylabel('NLL')
# plt.legend()
# plt.show()

In [277]:
# see generated output
print (trainer.generated[-1]) # get last generated output

Input | Output #0: while the group was en route , but only three were ultimately able to attack . None of them were | not considered to be a great deal of <unk> and
Input | Output #1: <unk> , where he remained on loan until 30 June 2010 . <eol> = = = Return to Manchester United | World Cup = = = <eol> In the summer of
Input | Output #2: 25 April 2013 , denoting shipments of 500 @,@ 000 copies . <eol> The song became One Direction 's fourth | single , and the first track from the album ,
Input | Output #3: , and Bruce R. ) one daughter ( Wendy J. <unk> ) and two grandchildren , died in <unk> , | symbolising <unk> 515 to <unk> <unk> <unk> . <eol> =
Input | Output #4: Warrior were examples of this type . Because their armor was so heavy , they could only carry a single | @-@ up for the first time in which the Chinese
Input | Output #5: the embassy at 1 : 49 and landed on Guam at 2 : 23 ; twenty minutes later , Ambassador | the <unk> 's <unk> . <eol> = = = <unk>
Input | Output #6: <unk> , 