In [23]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [11]:
training_data = [
    ("The cat ate the cheese".lower().split(), ["DET", "NN", "V", "DET", "NN"]),
    ("She read that book".lower().split(), ["NN", "V", "DET", "NN"]),
    ("The dog loves art".lower().split(), ["DET", "NN", "V", "NN"]),
    ("The elephant answers the phone".lower().split(), ["DET", "NN", "V", "DET", "NN"])
]

word2idx = {}
for line, tags in training_data:
    for word in line:
        if word not in word2idx:
           word2idx[word] = len(word2idx)

tag2idx = {"DET": 0, "NN": 1, "V": 2}

In [4]:
print(word2idx)

{'the': 0, 'cat': 1, 'ate': 2, 'cheese': 3, 'she': 4, 'read': 5, 'that': 6, 'book': 7, 'dog': 8, 'loves': 9, 'art': 10, 'elephant': 11, 'answers': 12, 'phone': 13}


In [7]:
# helper function to convert sequence of words into tensor of correspondig indexes
def prepare_sequence(seq, to_idx):
    idxs = np.array([to_idx[w] for w in seq])
    return torch.from_numpy(idxs)


In [9]:
print(prepare_sequence(["the", "dog", "answers", "the", "phone"], word2idx))

tensor([ 0,  8, 12,  0, 13])


In [24]:
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, target_size):
        super(LSTMTagger, self).__init__()

        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, target_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim), torch.zeros(1, 1, self.hidden_dim))

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)

        tag_outputs = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_outputs, dim=1)
        return tag_scores

In [25]:
# the embedding dimension defines the size of our word vectors
# for our simple vocabulary and training set, we will keep these small
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word2idx), len(tag2idx))

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [26]:
# test model BEFORE training to see later that it learns something and performs better
test_sentense = "The cheese loves the elephant".lower().split()

inputs = prepare_sequence(test_sentense, word2idx)
tag_scores = model(inputs)
print(tag_scores)

tensor([[-1.1056, -1.0358, -1.1583],
        [-1.2082, -1.0120, -1.0854],
        [-1.1566, -0.9996, -1.1475],
        [-1.1029, -1.0392, -1.1573],
        [-1.0837, -1.0457, -1.1705]], grad_fn=<LogSoftmaxBackward>)


In [27]:
_, predicted_tags = torch.max(tag_scores, dim=1)
print(predicted_tags)

tensor([1, 1, 1, 1, 1])


In [29]:
n_epochs = 300

for epoch in range(n_epochs):
    epoch_loss = 0.0

    for sentence, tags in training_data:
        model.zero_grad()
        model.hidden = model.init_hidden()

        sentence_in = prepare_sequence(sentence, word2idx)
        targets = prepare_sequence(tags, tag2idx)

        tag_scores = model(sentence_in)

        loss = loss_function(tag_scores, targets)
        epoch_loss += loss.item()

        loss.backward()
        optimizer.step()
    if epoch%20==19:
        print("Epoch {} Loss {}".format(epoch, epoch_loss/len(training_data)))



Epoch 19 Loss 0.032280809711664915
Epoch 39 Loss 0.02875902969390154
Epoch 59 Loss 0.02587804989889264
Epoch 79 Loss 0.023482127115130424
Epoch 99 Loss 0.021461260970681906
Epoch 119 Loss 0.019735854817554355
Epoch 139 Loss 0.018247031373903155
Epoch 159 Loss 0.01695034746080637
Epoch 179 Loss 0.015811806311830878
Epoch 199 Loss 0.014804745558649302
Epoch 219 Loss 0.013908273307606578
Epoch 239 Loss 0.013105457415804267
Epoch 259 Loss 0.012382772751152515
Epoch 279 Loss 0.011729001766070724
Epoch 299 Loss 0.01113503216765821


In [30]:
# test model with same test sentence as before
test_sentense = "The cheese loves the elephant".lower().split()

inputs = prepare_sequence(test_sentense, word2idx)
tag_scores = model(inputs)
print(tag_scores)

tensor([[-6.9385e-03, -4.9743e+00, -1.3731e+01],
        [-6.2323e+00, -2.4220e-03, -7.6969e+00],
        [-1.2311e+01, -4.0028e+00, -1.8438e-02],
        [-3.6149e-03, -5.6370e+00, -1.0009e+01],
        [-6.6501e+00, -9.4885e-02, -2.4166e+00]], grad_fn=<LogSoftmaxBackward>)


In [33]:
_, predicted_tags = torch.max(tag_scores, dim=1)
print(predicted_tags)

tensor([0, 1, 2, 0, 1])
