<a href="https://colab.research.google.com/github/marofmar/TIL/blob/master/2019_12_20_Fri_LSTM_tagging.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt 

import numpy as np

%matplotlib inline

In [2]:
training_data = [
                 ('I am a boy'.lower().split(), ['N','V','D','N']),
                 ('You are a girl'.lower().split(), ['N', 'V','D','N']),
                 ('He loves elephants'.lower().split(), ['N', 'V', 'N']),
                 ('Jacky wears a glasses'.lower().split(), ['N','V','D','N']),
                 ('The cat loves fish'.lower().split(), ['D','N','V','N'])
]
# N for noun, V for verb, D for Determinants? like the or a/an
tag2idx = {'D':0, 'N':1, 'V':2}
word2idx = {}
for sentence, tags in training_data:
    for word in sentence:
        if word not in word2idx:
            word2idx[word] = len(word2idx)
print(word2idx)

{'i': 0, 'am': 1, 'a': 2, 'boy': 3, 'you': 4, 'are': 5, 'girl': 6, 'he': 7, 'loves': 8, 'elephants': 9, 'jacky': 10, 'wears': 11, 'glasses': 12, 'the': 13, 'cat': 14, 'fish': 15}


In [0]:
def prepare_seq(sentence, dic):
    idxs = [dic[w] for w in sentence]
    idxs = np.array(idxs)
    return torch.from_numpy(idxs)

# will be used to both sentences and tags to change the words into numbers

In [0]:
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) 
        self.lstm = nn.LSTM(embedding_dim, hidden_dim) 
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size) 
        self.hidden = self.init_hidden() 
    def init_hidden(self):
        return (torch.zeros(1,1,self.hidden_dim),
                torch.zeros(1,1,self.hidden_dim))
    def forward(self, sentence):
        embeds = self.word_embeddings(sentence) 
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden) 
        tag_outputs = self.hidden2tag(lstm_out.view(len(sentence), -1)) 
        tag_scores = F.log_softmax(tag_outputs, dim = 1) 
        return tag_scores 




In [0]:
# set params for training
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word2idx), len(tag2idx)) 

loss_function = nn.NLLLoss() 
optimizer = optim.SGD(model.parameters(), lr = 0.01) 


In [13]:
n_epochs = 500
for i in range(n_epochs):
    epoch_loss = 0.0
    for sentence, tags in training_data:
        model.zero_grad()
        model.hidden = model.init_hidden() 
        inputs = prepare_seq(sentence, word2idx)
        target = prepare_seq(tags, tag2idx) 
        pred = model(inputs)
        loss = loss_function(pred, target) 
        epoch_loss += loss.item() 
        loss.backward()
        optimizer.step() 
    if (i%20==19):
        print("Epoch: %d, Loss: %1.5f" %(i+1, epoch_loss/len(training_data)))


Epoch: 20, Loss: 0.72428
Epoch: 40, Loss: 0.69491
Epoch: 60, Loss: 0.66565
Epoch: 80, Loss: 0.63657
Epoch: 100, Loss: 0.60757
Epoch: 120, Loss: 0.57847
Epoch: 140, Loss: 0.54906
Epoch: 160, Loss: 0.51919
Epoch: 180, Loss: 0.48876
Epoch: 200, Loss: 0.45778
Epoch: 220, Loss: 0.42636
Epoch: 240, Loss: 0.39470
Epoch: 260, Loss: 0.36314
Epoch: 280, Loss: 0.33211
Epoch: 300, Loss: 0.30218
Epoch: 320, Loss: 0.27392
Epoch: 340, Loss: 0.24780
Epoch: 360, Loss: 0.22412
Epoch: 380, Loss: 0.20297
Epoch: 400, Loss: 0.18428
Epoch: 420, Loss: 0.16786
Epoch: 440, Loss: 0.15348
Epoch: 460, Loss: 0.14090
Epoch: 480, Loss: 0.12986
Epoch: 500, Loss: 0.12016


In [14]:
test_sentence = 'I am the girl'.lower().split()
inputs = prepare_seq(test_sentence, word2idx) 
tag_scores = model(inputs)

_, predicted = torch.max(tag_scores, 1)
print(tag_scores , predicted)

tensor([[-3.3181, -0.5852, -0.8995],
        [-2.9943, -1.9148, -0.2200],
        [-0.1783, -2.6058, -2.4142],
        [-3.5550, -0.1299, -2.3728]], grad_fn=<LogSoftmaxBackward>) tensor([1, 2, 0, 1])
