In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

## LSTM in PyTorch
* All inputs must be 3D tensors
* Semantics
  * axis 1  :  sequence
  * axis 2  :  indexes of instances in minibatch
  * axis 3  :  indexes of elements of input

In [None]:
lstm = nn.LSTM(3,3) # input dim is 3 , output_dim is 3

inputs =[torch.randn(1,3) for _ in range(5)]

torch.manual_seed(1)

# initialize hidden state.
hidden = (torch.randn(1,1,3),
         torch.randn(1,1,3))


for idx, i in enumerate(inputs):
    #Step through the sequence one element at a time.
    #after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1,1,-1),hidden)
    
    print("\nIteration ", idx, "\n")
    print("input : ", i,'\n',"output : ", out,"\n","hidden : ",hidden)

In [None]:
# alternatively , can computer entire sequence all at once
inputs1 = torch.cat(inputs).view(len(inputs),1,-1)
print(inputs1.size())
torch.manual_seed(1)
hidden = (torch.randn(1,1,3),
          torch.randn(1,1,3))

out, hidden = lstm(inputs1,hidden)
print("output \n", out)
print("hidden layer \n" , hidden)

# LSTM Tagger fro Part-of-Speetch Tagging

 * 

In [None]:
# Prepare data

def prepare_sequence(seq,to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

training_data= [
    ("the dog ate the apple".split(), ["DET","NN","V","DET","NN"]),
    ("Everybody read that book".split(), ["NN","V","DET","NN"])
]

word_to_ix ={}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word]= len(word_to_ix)
print(word_to_ix)
tag_to_ix ={"DET" : 0, "NN": 1, "V":2}

EMBEDDING_DIM = 6
HIDDEN_DIM = 6


In [None]:
print(training_data)

In [None]:
# Create the model: 

class LSTMTagger(nn.Module):
    
    def __init__(self,
                 embedding_dim,
                 hidden_dim,
                 vocab_size,
                 target_size):
        super(LSTMTagger, self).__init__()
        
        # embedding         
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        
        # The LSTM takes word embedding as inputs, and outputs hidden states
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        
        self.hidden2tag = nn.Linear(hidden_dim, target_size)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        # Initialize hidden states

        return(torch.zeros(1,1,self.hidden_dim),
               torch.zeros(1,1,self.hidden_dim))

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, self.hidden = self.lstm(
            embeds.view(len(sentence),1,-1), self.hidden)
        tag_space = self.hidden2tag(lstm_out.view(len(sentence),-1))
        tag_score = F.log_softmax(tag_space, dim = 1)
        return tag_score
        

In [None]:
# Train the model

model = LSTMTagger(EMBEDDING_DIM, 
                   HIDDEN_DIM,
                   len(word_to_ix),
                   len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(),lr=0.1)

# See the socre before training
# Note (i,j) is the socre for tag j for word i 

with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_score = model(inputs)
    print("inputs \n")
    print(inputs)
    print("\noutputs \n")
    print(tag_score)

In [None]:
for epoch in range(300):
    for sentence, tags in training_data:

        # step1 - clear accumulated gradients()
        model.zero_grad()
        
        # clear the hidden state of LSTM 
        model.hidden = model.init_hidden()
        
        # step2 - input to tensor
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)

        # step3 - forward pass
        tag_scores = model(sentence_in)
        
        # step3 - compute loss, gradients, and update paramters
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        
# see the score after training
with torch.no_grad():
    for i in range(2):
        inputs = prepare_sequence(training_data[i][0], word_to_ix)
        tag_scores = model(inputs)

        print(tag_scores)
        print(training_data[i][0])
        print(training_data[i][1])

print(tag_to_ix)
        