In [1]:
%matplotlib inline

import numpy as np
from matplotlib import pyplot as plt
import time
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from tests import test_prediction, test_generation

In [2]:
# load all that we need

dataset = np.load('../dataset/wiki.train.npy', allow_pickle=True)
devset = np.load('../dataset/wiki.valid.npy', allow_pickle=True)
fixtures_pred = np.load('../fixtures/prediction.npz')  # dev
fixtures_gen = np.load('../fixtures/generation.npy')  # dev
fixtures_pred_test = np.load('../fixtures/prediction_test.npz')  # test
fixtures_gen_test = np.load('../fixtures/generation_test.npy')  # test
vocab = np.load('../dataset/vocab.npy')

In [3]:
# data loader

class LanguageModelDataLoader(DataLoader):
    def __init__(self, dataset, batch_size, shuffle=True):
        
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.seq_len = 50 #window size


    def __iter__(self):
        # concatenate your articles and build into batches
        if self.shuffle:
            np.random.shuffle(self.dataset)
        dataset= np.concatenate(self.dataset)
        seq_remainder = len(dataset) % self.seq_len
        if seq_remainder == 0:
            seq_remainder += 1
        
        
        
        X_list = dataset[:-seq_remainder].reshape(-1, self.seq_len)
        
        
        Y_list = dataset[1:-seq_remainder + 1].reshape(-1, self.seq_len)
        for i in range(len(X_list)//self.batch_size):
            yield (torch.LongTensor(X_list[i*self.batch_size:(i+1)* self.batch_size]), torch.LongTensor(Y_list[i*self.batch_size:(i+1)* self.batch_size])) 

        
        

In [4]:
# model

class LanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super(LanguageModel, self).__init__()
        
        self.embed = nn.Embedding(vocab_size, 512)
        self.rnn = nn.LSTM(input_size = 512,hidden_size=1024,num_layers=3, batch_first=True)
        self.prob_out = nn.Linear(1024, vocab_size)
        


    def forward(self, x, hiddens=None):
        embed = self.embed(x)
        if hiddens is not None:
            out, hidden = self.rnn(embed, hiddens)
        else:
            out, hidden = self.rnn(embed)
        out = self.prob_out(out)
        return out, hidden
    


In [5]:
# model trainer

class LanguageModelTrainer:
    def __init__(self, model, loader, max_epochs=1, run_id='exp'):
        self.model = model
        self.loader = loader
        self.train_losses = []
        self.val_losses = []
        self.predictions = []
        self.predictions_test = []
        self.generated_logits = []
        self.generated = []
        self.generated_logits_test = []
        self.generated_test = []
        self.epochs = 0
        self.max_epochs = max_epochs
        self.run_id = run_id
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), 1e-3, weight_decay=1e-6)
        self.criterion = nn.CrossEntropyLoss()


    def train(self):
        self.model.train() # set to training mode
        epoch_loss = 0
        num_batches = 0
        for batch_num, (inputs, targets) in enumerate(self.loader):
            epoch_loss += self.train_batch(inputs, targets)
        epoch_loss = epoch_loss / (batch_num + 1)
        self.epochs += 1
        print('[TRAIN]  Epoch [%d/%d]   Loss: %.4f'
                      % (self.epochs, self.max_epochs, epoch_loss))
        self.train_losses.append(epoch_loss)

    def train_batch(self, inputs, targets):
        inputs, targets = inputs.to('cuda'), targets.to('cuda')
        outputs = self.model(inputs)[0]
        self.optimizer.zero_grad()
        
        
        loss = self.criterion(outputs.permute(0,2,1), targets)
        loss.backward()
        self.optimizer.step()
        return loss
        
        

    
    def test(self):
        # don't change these
        self.model.eval() # set to eval mode
        predictions = TestLanguageModel.prediction(fixtures_pred['inp'], self.model) # get predictions
        self.predictions.append(predictions)
        generated_logits = TestLanguageModel.generation(fixtures_gen, 10, self.model) # generated predictions for 10 words
        generated_logits_test = TestLanguageModel.generation(fixtures_gen_test, 10, self.model)
        nll = test_prediction(predictions, fixtures_pred['out'])
        generated = test_generation(fixtures_gen, generated_logits, vocab)
        generated_test = test_generation(fixtures_gen_test, generated_logits_test, vocab)
        self.val_losses.append(nll)
        
        self.generated.append(generated)
        self.generated_test.append(generated_test)
        self.generated_logits.append(generated_logits)
        self.generated_logits_test.append(generated_logits_test)
        
        # generate predictions for test data
        predictions_test = TestLanguageModel.prediction(fixtures_pred_test['inp'], self.model) # get predictions
        self.predictions_test.append(predictions_test)
            
        print('[VAL]  Epoch [%d/%d]   Loss: %.4f'
                      % (self.epochs, self.max_epochs, nll))
        return nll

    def save(self):
        model_path = os.path.join('experiments', self.run_id, 'model-{}.pkl'.format(self.epochs))
        torch.save({'state_dict': self.model.state_dict()},
            model_path)
        np.save(os.path.join('experiments', self.run_id, 'predictions-{}.npy'.format(self.epochs)), self.predictions[-1])
        np.save(os.path.join('experiments', self.run_id, 'predictions-test-{}.npy'.format(self.epochs)), self.predictions_test[-1])
        np.save(os.path.join('experiments', self.run_id, 'generated_logits-{}.npy'.format(self.epochs)), self.generated_logits[-1])
        np.save(os.path.join('experiments', self.run_id, 'generated_logits-test-{}.npy'.format(self.epochs)), self.generated_logits_test[-1])
        with open(os.path.join('experiments', self.run_id, 'generated-{}.txt'.format(self.epochs)), 'w') as fw:
            fw.write(self.generated[-1])
        with open(os.path.join('experiments', self.run_id, 'generated-{}-test.txt'.format(self.epochs)), 'w') as fw:
            fw.write(self.generated_test[-1])


In [6]:
class TestLanguageModel:
    def prediction(inp, model):
        """
            
            :param inp:
            :return: a np.ndarray of logits
        """
        inp = torch.LongTensor(inp).to('cuda')
        output = model(inp)[0]
        prediction = output[:, -1].detach().cpu().numpy()
        return prediction

        
    def generation(inp, forward, model):
        """

            Generate a sequence of words given a starting sequence.
            :param inp: Initial sequence of words (batch size, length)
            :param forward: number of additional words to generate
            :return: generated words (batch size, forward)
        """        
        model.eval()
        generated_words = []
        with torch.no_grad():
            inp = torch.LongTensor(inp).to('cuda')
            output, hidden = model(inp)

            output = output[:,-1, :]
            curr_word = torch.argmax(output, dim=1)
            curr_word = curr_word.unsqueeze(1)
            generated_words.append(curr_word)
            if (forward > 1):

                    for i in range(forward - 1):
                        output, hidden = model(curr_word, hidden)
                        output = output[:, -1, :]
                        curr_word = torch.argmax(output, dim=1)
                        curr_word = curr_word.unsqueeze(1)
                        generated_words.append(curr_word)

            
            generated_words = torch.cat(generated_words, dim=1)
        return generated_words.detach().cpu().numpy()
            
        
        

In [7]:

NUM_EPOCHS = 12
BATCH_SIZE = 64


In [8]:
run_id = str(int(time.time()))
if not os.path.exists('./experiments'):
    os.mkdir('./experiments')
os.mkdir('./experiments/%s' % run_id)
print("Saving models, predictions, and generated words to ./experiments/%s" % run_id)

Saving models, predictions, and generated words to ./experiments/1610157072


In [9]:
model = LanguageModel(len(vocab))
model.to('cuda')
loader = LanguageModelDataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=True)
trainer = LanguageModelTrainer(model=model, loader=loader, max_epochs=NUM_EPOCHS, run_id=run_id)

In [None]:
best_nll = 1e30 
for epoch in range(NUM_EPOCHS):
    trainer.train()
    nll = trainer.test()
    if nll < best_nll:
        best_nll = nll
        print("Saving model, predictions and generated output for epoch "+str(epoch)+" with NLL: "+ str(best_nll))
        trainer.save()
    

[TRAIN]  Epoch [1/12]   Loss: 7.4509
[VAL]  Epoch [1/12]   Loss: 6.2043
Saving model, predictions and generated output for epoch 0 with NLL: 6.2043266
[TRAIN]  Epoch [2/12]   Loss: 6.3008
[VAL]  Epoch [2/12]   Loss: 5.2565
Saving model, predictions and generated output for epoch 1 with NLL: 5.2564735
[TRAIN]  Epoch [3/12]   Loss: 5.7506
[VAL]  Epoch [3/12]   Loss: 4.9647
Saving model, predictions and generated output for epoch 2 with NLL: 4.9647474


In [None]:
# Don't change these
# plot training curves
plt.figure()
plt.plot(range(1, trainer.epochs + 1), trainer.train_losses, label='Training losses')
plt.plot(range(1, trainer.epochs + 1), trainer.val_losses, label='Validation losses')
plt.xlabel('Epochs')
plt.ylabel('NLL')
plt.legend()
plt.show()

In [None]:
# see generated output
print (trainer.generated[-1]) # get last generated output