In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [14]:
# Expects all of LSTM's tensors to be 3D tensors.
torch.manual_seed(1)

lstm = nn.LSTM(3,3) # input_dim = 3, output_dim = 3
inputs = [torch.randn(1,3) for _ in range(5)] # sequence of length = 5

hidden = (torch.randn(1,1,3),
         torch.rand(1,1,3))

for i in inputs:
    out, hidden = lstm(i.view(1,1,-1), hidden) #view function ???????????
    
inputs = torch.cat(inputs).view(len(inputs),1,-1) #cat function torch ????????
hidden = (torch.randn(1,1,3), torch.randn(1,1,3)) # cleaning hidden state
out, hidden = lstm(inputs, hidden)
print out
print hidden

tensor([[[-0.2219, -0.1523, -0.1093]],

        [[-0.5302, -0.0478, -0.0874]],

        [[-0.3949, -0.0266, -0.0578]],

        [[-0.1881,  0.0340, -0.0527]],

        [[-0.3633,  0.0779,  0.0662]]])
(tensor([[[-0.3633,  0.0779,  0.0662]]]), tensor([[[-1.1421,  0.3910,  0.0783]]]))


In [18]:
# SPEECH TAGGING USING LSTM
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

training_data = [('The dog ate the apple'.split(),["DET","NN","V","DET","NN"]),
                 ("Everybody Read that book".split(),["NN","V","DET","NN"])]
                 
word_to_ix = {}

for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print word_to_ix

tag_to_ix = {"DET":0, "NN":1, "V":2}

EMBEDDING_DIM = 6
HIDDEN_DIM = 6


{'Everybody': 5, 'ate': 2, 'apple': 4, 'that': 7, 'Read': 6, 'dog': 1, 'book': 8, 'the': 3, 'The': 0}


In [19]:
# best way to sort a Dictionary????????

In [22]:
# Create the Model 
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger,self).__init__()
        
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) #word embeddings as inputs
        self.lstm = nn.LSTM(embedding_dim,hidden_dim) # maps hidden space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        # (num_layers, minibatch_size, hidden_dim)
        return (torch.zeros(1,1,self.hidden_dim),
               torch.zeros(1,1,self.hidden_dim))
    
    
    def forward(self,sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence),1,-1),self.hidden)
        tag_space = self.hidden2tag(lstm_out.view(len(sentence),-1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [23]:
# INITIALISING
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [24]:
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print tag_scores

tensor([[-1.3043, -0.9473, -1.0763],
        [-1.2956, -0.9301, -1.1035],
        [-1.3964, -0.9481, -1.0078],
        [-1.3005, -0.9466, -1.0802],
        [-1.2084, -1.0743, -1.0222]])


In [27]:
# TRAINING
for epoch in range(300):
    for sentence, tags in training_data:
        model.zero_grad()
        model.hidden = model.init_hidden()
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)
        
        tag_scores = model(sentence_in)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

In [None]:
#Differnce between model.zero_grad and optimizer.zero_grad() ?????????

In [28]:
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print tag_scores

tensor([[-0.0911, -2.6834, -3.9746],
        [-3.8265, -0.0349, -4.3787],
        [-3.6292, -4.7611, -0.0357],
        [-0.0388, -4.0810, -3.8538],
        [-4.4154, -0.0127, -7.5296]])


In [41]:
values, indices = torch.max(tag_scores,1)
print indices

tensor([ 0,  1,  2,  0,  1])


In [39]:
# {"DET":0, "NN":1, "V":2}
# 0 1 2 0 1
# DET NN V DET NN