In [1]:
import torchtext
import torch
import torch.nn as nn
import random
import numpy as np
from torchtext.vocab import Vectors

In [98]:
random.seed(4)
torch.manual_seed(4)
np.random.seed(4)

In [99]:
BATCH_SIZE = 32
EMBEDDING_SIZE = 100
HIDDEN_SIZE = 100
MAX_VOCAB_SIZE = 20000
BRTT_LEN = 50
device = torch.device("cpu")

In [101]:
TEXT = torchtext.data.Field(lower=True)
train,val,test = torchtext.datasets.LanguageModelingDataset.splits(path=".\data",
                                                                 train="text8.train.txt",
                                                                 test="text8.test.txt",
                                                                 validation="text8.dev.txt",
                                                                 text_field=TEXT)

In [102]:
TEXT.build_vocab(train,max_size=MAX_VOCAB_SIZE)
TEXT.vocab.itos[:10]

['<unk>', '<pad>', 'the', 'of', 'and', 'one', 'in', 'a', 'to', 'zero']

In [104]:
train_iter,val_iter,test_iter = torchtext.data.BPTTIterator.splits((train,val,test),
                                                                  batch_size=BATCH_SIZE,
                                                                  device=device,
                                                                  bptt_len=BPTT_LEN,
                                                                  repeat=False,
                                                                  shuffle=True)

In [107]:
it = iter(train_iter)
batch = next(it)
batch.text

tensor([[4815,   50,    6,  ..., 9116,   33,    7],
        [3143, 2748,  495,  ...,  893,  277,  317],
        [  13,    8,  850,  ...,  664,  824, 1602],
        ...,
        [   8,   34,  522,  ..., 5237,    3,   12],
        [3628, 1266,  968,  ...,    3,    2,    6],
        [   2,   54,   78,  ...,   12,  185, 3027]])

In [109]:
" ".join(TEXT.vocab.itos[i] for i in batch.text[:,0].data)
" ".join(TEXT.vocab.itos[i] for i in batch.target[:,0].data)

'originated as a term of abuse first used against early working class radicals including the <unk> of the english revolution and the sans <unk> of the french revolution whilst the term is still used in a pejorative way to describe any act that used violent means to destroy the organization'

In [126]:
class LSTMModel(nn.Module):
    def __init__(self,vocab_size,embed_size,hidden_size):
        super(LSTMModel,self).__init__()
        self.embed = nn.Embedding(vocab_size,embed_size)
        self.lstm = nn.LSTM(embed_size,hidden_size)
        self.linear = nn.Linear(hidden_size,vocab_size)
        self.hidden_size = hidden_size
    def forward(self,text,hidden):
        emd = self.embed(text)
        output,hidden = self.lstm(emd,hidden)
        output_vocab = self.linear(output.view(-1,output.shape[2]))
        output_vocab = output_vocab.view(output.shape[0],output.shape[1],output_vocab.shape[-1])
        return output_vocab,hidden
    def init_hidden(self,bsz,reqiures_grad=True):
        weight = next(self.parameters())
        return (weight.new_zeros((1,bsz,self.hidden_size),requires_grad=True)
               ,weight.new_zeros((1,bsz,self.hidden_size),requires_grad=True))

In [127]:
model = LSTMModel(vocab_size=len(TEXT.vocab),
                 embed_size=EMBEDDING_SIZE,
                 hidden_size=HIDDEN_SIZE)
print(next(model.parameters()))
loss_fn = nn.CrossEntropyLoss()
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,0.5)

Parameter containing:
tensor([[ 0.0608, -2.1596,  0.1694,  ..., -0.1983,  0.2163, -1.6990],
        [ 0.0036, -0.3936,  0.0605,  ...,  0.3735, -0.3195, -0.2003],
        [ 0.4077, -0.0182, -0.2772,  ...,  0.2706, -0.9632, -0.6868],
        ...,
        [ 1.1100, -0.0937, -0.2067,  ..., -0.9200,  1.5634, -3.0697],
        [ 0.6796, -1.5666,  0.4953,  ...,  1.6620, -1.2221, -0.0389],
        [ 0.1453,  0.1320,  0.2410,  ..., -0.3433, -0.8504, -0.0663]],
       requires_grad=True)


In [128]:
def repackage_hidden(h):
    if isinstance(h,torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

In [129]:
VOCAB_SIZE = len(TEXT.vocab)

In [134]:
def evaluate(model,data):
    model.eval()
    total_loss = 0
    total_count = 0
    it = iter(data)
    with torch.no_grad():
        hidden = model.init_hidden(BATCH_SIZE,reqiures_grad=False)
        for i,batch in enumerate(it):
            data,target = batch.text,batch.target
            hidden = repackage_hidden(hidden)
            output,hidden = model(data,hidden)
            
            loss = loss_fn(output.view(-1,VOCAB_SIZE),target.view(-1))
            total_loss =loss * np.multiply(*data.size())
            total_count = np.multiply(*data.size())
    loss = total_loss / total_count
    model.train()
    return loss

In [135]:
NUM_EPOCHS = 2
GRAD_CLIP = 5.


val_losses = []
for epoch in range(NUM_EPOCHS):
    model.train()
    hidden = model.init_hidden(BATCH_SIZE)
    it = iter(train_iter)
    for i,batch in enumerate(it):
        data,target = batch.text,batch.target
        hidden = repackage_hidden(hidden)
        output,hidden = model(data,hidden)
        loss = loss_fn(output.view(-1,VOCAB_SIZE),target.view(-1))
        optimizer.zero_grad()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(),GRAD_CLIP)
        
        optimizer.step()
        if i % 10 :
            print("epoch",epoch,"iteration",i,"loss",loss.item())
            
        if i % 10000 == 0:
            val_loss = evaluate(model,val_iter)
            if len(val_losses)==0 or val_loss<min(val_losses):
                torch.save(model.state_dict(),"lm.pth")
                print("save")
            else:
                scheduler.step()
            val_losses.append(val_loss)

KeyboardInterrupt: 

In [None]:
from torchtext import data

SEED = 1234

torch.manual_seed(SEED)
n