In [45]:
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable

torch.manual_seed(1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [46]:
train_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

In [47]:
word2idx = {}
char2idx = {}
tag2idx = {}

def prepare_char_seq(word, char2idx):
    idxs = []
    for char in word:
        idxs.append(char2idx[char])
        
    return idxs

def prepare_seq(seq, word2idx, char2idx):
    idxs = []
    for word in seq:
        idxs.append((word2idx[word], prepare_char_seq(word, char2idx)))
    
    return idxs

def preprare_tag(tag, tag2idx):
    idxs = []
    for t in tag:
        idxs.append(tag2idx[t])
    
    return torch.LongTensor(idxs).to(device)

In [48]:
for sent, tags in train_data:
    for word in sent:
        if word not in word2idx:
            word2idx[word] = len(word2idx)
        for char in word:
            if char not in char2idx:
                char2idx[char] = len(char2idx)
                
print(word2idx)
print(char2idx)

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}
{'T': 0, 'h': 1, 'e': 2, 'd': 3, 'o': 4, 'g': 5, 'a': 6, 't': 7, 'p': 8, 'l': 9, 'E': 10, 'v': 11, 'r': 12, 'y': 13, 'b': 14, 'k': 15}


In [49]:
tag2idx = {"DET": 0, "NN": 1, "V": 2}

In [50]:
char_embedding_dim = 3
char_hidden_dim = 3
word_embedding_dim = 6
hidden_dim = 6
word_vocab_size = len(word2idx)
char_vocab_size = len(char2idx)
tagset_size = len(tag2idx)

In [57]:
class LSTMTagger(nn.Module):
    def __init__(self, word_embedding_dim, char_embedding_dim, 
                 hidden_dim, char_hidden_dim,
                word_vocab_size, char_vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.word_embedding_dim = word_embedding_dim
        self.char_embedding_dim = char_embedding_dim
        
        self.char_embedding = nn.Embedding(char_vocab_size, char_embedding_dim)
        self.char_lstm = nn.LSTM(char_embedding_dim, char_hidden_dim)
        
        self.word_embedding = nn.Embedding(word_vocab_size, word_embedding_dim)
        self.word_lstm = nn.LSTM(word_embedding_dim + char_hidden_dim, hidden_dim)
        
        self.hidden_to_tag = nn.Linear(hidden_dim, tagset_size)
        
        self.hidden = self.init_hidden(hidden_dim)
        self.char_hidden = self.init_hidden(char_hidden_dim)
        
    def init_hidden(self, dim):
        return (Variable(torch.zeros(1, 1, dim), requires_grad=True), 
                Variable(torch.zeros(1, 1, dim), requires_grad=True))
    
    def forward(self, sentence):
        
#         print("sentence:", sentence)
        
        word_idxs = []
        lstm_char_result = []
        for word in sentence:
#             print("word:", word)
#             print("word[0]:", word[0]) # word idx
#             print("word[1]:", word[1]) # char idxs
            
            word_idxs.append(word[0])
            
            char_idx = torch.LongTensor(word[1]).to(device)
            char_embed = self.char_embedding(char_idx)
#             print("char_embed:", char_embed.shape)
            
            char_embed = char_embed.view(len(word[1]), 1, char_embedding_dim)
#             print("reshaped char_embed:", char_embed.shape)
            
            lstm_char_out, self.char_hidden = self.char_lstm(char_embed, self.char_hidden)
#             print("lstm_char_out:", lstm_char_out.shape)
            
#             print("lstm_char_out[-1]:", lstm_char_out[-1])
            
            lstm_char_result.append(lstm_char_out[-1])
            
        word_idxs = torch.LongTensor(word_idxs).to(device)
#         print("word_idxs:", word_idxs.shape)
        
        lstm_char_result = torch.stack(lstm_char_result)
#         print("lstm_char_result:", lstm_char_result.shape)
        
        
        word_embed = self.word_embedding(word_idxs)
#         print("word_embed:", word_embed.shape)
        
        word_embed = word_embed.view(len(sentence), 1, self.word_embedding_dim)
#         print("reshaped word_embed:", word_embed.shape)
        
        lstm_in = torch.cat((word_embed, lstm_char_result), dim=2)
#         print("lstm_in:", lstm_in.shape)
        
        
        lstm_out, self.hidden = self.word_lstm(lstm_in, self.hidden)
        
        tag = self.hidden_to_tag(lstm_out.view(len(sentence), -1))
        out = F.log_softmax(tag)
        return out


In [58]:
model = LSTMTagger(word_embedding_dim, char_embedding_dim,
                   hidden_dim, char_hidden_dim,
                   word_vocab_size, char_vocab_size, tagset_size).to(device)
print(model)

LSTMTagger(
  (char_embedding): Embedding(16, 3)
  (char_lstm): LSTM(3, 3)
  (word_embedding): Embedding(9, 6)
  (word_lstm): LSTM(9, 6)
  (hidden_to_tag): Linear(in_features=6, out_features=3, bias=True)
)


In [59]:
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [61]:
for epoch in range(300):
    for step, (sentence, tags) in enumerate(train_data):
        
        print(step)
        model.zero_grad()
        
        inputs = prepare_seq(sentence, word2idx, char2idx)
        targets = preprare_tag(tags, tag2idx)

        output = model(inputs)
        loss = criterion(output, targets)
        print(loss)
        
        loss.backward()
        optimizer.step()

0
tensor(1.1117)




RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.