In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x20362b26cd0>

In [3]:
lstm = nn.LSTM(3,3)
inputs = [torch.randn(1,3) for _ in range(5)]

In [4]:
inputs

[tensor([[-0.5525,  0.6355, -0.3968]]),
 tensor([[-0.6571, -1.6428,  0.9803]]),
 tensor([[-0.0421, -0.8206,  0.3133]]),
 tensor([[-1.1352,  0.3773, -0.2824]]),
 tensor([[-2.5667, -1.4303,  0.5009]])]

In [5]:
# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))

In [10]:
torch.randn(2,3)

tensor([[-0.6933, -0.1668, -0.9999],
        [-1.6476,  0.8098,  0.0554]])

In [11]:
hidden

(tensor([[[ 0.5438, -0.4057,  1.1341]]]),
 tensor([[[-1.1115,  0.3501, -0.7703]]]))

In [12]:
for i in inputs:
    out, hidden = lstm(i.view(1,1,-1), hidden)

In [13]:
inputs[0].view(1,1,-1)

tensor([[[-0.5525,  0.6355, -0.3968]]])

In [14]:
inputs = torch.cat(inputs).view(len(inputs), 1, -1)

In [15]:
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))

In [16]:
out, hidden = lstm(inputs, hidden)

In [19]:
print(out)

tensor([[[-0.3688, -0.0674, -0.2511]],

        [[-0.5567, -0.0197, -0.5766]],

        [[-0.4100, -0.0230, -0.4181]],

        [[-0.1916,  0.0364, -0.3571]],

        [[-0.3634,  0.0865, -0.3080]]], grad_fn=<StackBackward>)


In [20]:
print (hidden)

(tensor([[[-0.3634,  0.0865, -0.3080]]], grad_fn=<StackBackward>), tensor([[[-1.1089,  0.3915, -0.3779]]], grad_fn=<StackBackward>))


In [21]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

In [22]:
training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

In [23]:
training_data

[(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN']),
 (['Everybody', 'read', 'that', 'book'], ['NN', 'V', 'DET', 'NN'])]

In [24]:
word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

print(word_to_ix)
tag_to_ix = {"DET" : 0, "NN" : 1, "V" : 2}
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [28]:
class LSTMTageer(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTageer, self).__init__()
        self.hidden_dim = hidden_dim
        
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        
        self.hiddent2tag = nn.Linear(hidden_dim, tagset_size)
    
    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hiddent2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [29]:
model = LSTMTageer(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
len(tag_to_ix)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)

tensor([[-1.0576, -1.3281, -0.9474],
        [-1.0085, -1.4165, -0.9347],
        [-1.0357, -1.3481, -0.9537],
        [-1.0511, -1.3358, -0.9480],
        [-1.1008, -1.2607, -0.9573]])


In [30]:
training_data

[(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN']),
 (['Everybody', 'read', 'that', 'book'], ['NN', 'V', 'DET', 'NN'])]

In [33]:
for epoch in range(1):
    for sentence, tags in training_data:
        model.zero_grad()
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)
        print(sentence_in)
        print(targets)
        tag_scocres = model(sentence_in)
        
        loss = loss_function(tag_scocres, targets)
        loss.backward()
        optimizer.step()


tensor([0, 1, 2, 3, 4])
tensor([0, 1, 2, 0, 1])
tensor([5, 6, 7, 8])
tensor([1, 2, 0, 1])


In [None]:
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    
    print(tag_scores)