In [1]:
import numpy as np 
import torch 
import torchtext

from lstm import HILanguageModelDataset, LanguageModel

# LSTM-Based Neural Language Model

&nbsp;

### This model is trained from scratch and is much smaller and slower to train that the GPT2 model. Nonetheless it requires much less resources to train. During training, it should consume at most 3GB from GPU memory.

In [2]:
path = "../data/hi_all_text.txt"

TEXT = torchtext.data.Field()
nlm_data = HILanguageModelDataset(path=path, text_field=TEXT)

train, val, test = nlm_data.split(split_ratio=[0.9, 0.05, 0.05], text_field=TEXT)

TEXT.build_vocab(train)



In [3]:
train_iter, val_iter, test_iter = torchtext.data.BPTTIterator.splits((train, val, test), batch_size=20, bptt_len=35, repeat=False)



In [4]:
model = LanguageModel(650, len(TEXT.vocab.itos), 2, 0.05, 0.5).cuda()

In [5]:
def zero_states():
    return (torch.autograd.Variable(torch.zeros(2, 20, 650)).cuda(), 
                    torch.autograd.Variable(torch.zeros(2, 20, 650)).cuda())

In [6]:
lr = 1.0
epochs = 39
decay_rate = 0.8
max_epochs = 6

loss = torch.nn.CrossEntropyLoss(reduction='sum').cuda()
op = torch.optim.SGD(model.parameters(), lr=1.0)

def set_lr(epoch):
    lr_decay = decay_rate ** max(epoch + 1 - max_epochs, 0.0)
    return lr_decay

lr_schedule = torch.optim.lr_scheduler.LambdaLR(op, set_lr)

In [None]:
import sys, timeit 

epochs = 39
start_time = timeit.default_timer()

for epoch in range(epochs):
    print('Epoch:', epoch)
    lr_schedule.step()

    # run training iteration
    states = zero_states()
    losses = 0
    for step, batch in enumerate(train_iter):
        x, y = batch.text, batch.target
        op.zero_grad()
        pred, states = model(x.cuda(), states, train=True)
        y_cuda = y.cuda()
        batch_loss = loss(pred.view(-1, len(TEXT.vocab.itos)), y_cuda.view(-1))

        losses += (batch_loss.cpu().detach() / (20*35))

        batch_loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        op.step()
        sys.stdout.write('\r' + 'Training: ' + str(epoch) + '   Progress: ' + str(step) + 
                           '/' + str(len(train_iter)) + '   Loss: ' + str(np.around(losses/ (step+1), 3)) +
                        ' Perplexity: ' + str(np.exp(np.around(losses/ (step+1), 3)))
                        )
    print('')

    # run validation iteration
    states = zero_states()
    losses = 0
    for step, batch in enumerate(val_iter):
        x, y = batch.text, batch.target.view(-1)
        pred, states = model(x.cuda(), states, train=False)
        y_cuda = y.cuda()
        batch_loss = loss(pred, y_cuda)

        losses += (batch_loss.cpu().detach() / (20*35))

        sys.stdout.write('\r' + 'Validation:' + str(epoch) + '   Progress: ' + str(step) + 
                           '/' + str(len(val_iter)) + '   Loss: ' + str(np.around(losses/ (step+1), 3)) +
                         ' Perplexity: ' + str(np.exp(np.around(losses/ (step+1), 3)))
                        )

    print('')
    print('')
print('')
print(timeit.default_timer() - start_time)