In [1]:
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable

torch.manual_seed(1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
train_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

In [3]:
word2idx = {}
char2idx = {}

def prepare_char_seq(word, char2idx):
    idxs = []
    for char in word:
        idxs.append(char2idx[char])
        
    return idxs

def prepare_seq(seq, word2idx, char2idx):
    idxs = []
    for word in seq:
        idxs.append((word2idx[word], prepare_char_seq(word, char2idx)))
    
    return idxs

def preprare_tag(tag, tag2idx):
    idxs = []
    for t in tag:
        idxs.append(tag2idx[t])
    
    return torch.LongTensor(idxs).to(device)

In [4]:
for sent, tags in train_data:
    for word in sent:
        if word not in word2idx:
            word2idx[word] = len(word2idx)
        for char in word:
            if char not in char2idx:
                char2idx[char] = len(char2idx)
                
print(word2idx)
print(char2idx)

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}
{'T': 0, 'h': 1, 'e': 2, 'd': 3, 'o': 4, 'g': 5, 'a': 6, 't': 7, 'p': 8, 'l': 9, 'E': 10, 'v': 11, 'r': 12, 'y': 13, 'b': 14, 'k': 15}


In [5]:
tag2idx = {"DET": 0, "NN": 1, "V": 2}
idx2tag = {v: k for k, v in tag2idx.items()}

print(tag2idx)
print(idx2tag)

{'DET': 0, 'NN': 1, 'V': 2}
{0: 'DET', 1: 'NN', 2: 'V'}


In [6]:
char_embedding_dim = 3
char_hidden_dim = 3

word_embedding_dim = 6
hidden_dim = 6

word_vocab_size = len(word2idx)
char_vocab_size = len(char2idx)
tagset_size = len(tag2idx)

In [7]:
class LSTMTagger(nn.Module):
    def __init__(self, word_embedding_dim, char_embedding_dim, 
                 hidden_dim, char_hidden_dim,
                word_vocab_size, char_vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.word_embedding_dim = word_embedding_dim
        self.char_embedding_dim = char_embedding_dim
        
        self.char_embedding = nn.Embedding(char_vocab_size, char_embedding_dim)
        self.char_lstm = nn.LSTM(char_embedding_dim, char_hidden_dim)
        
        self.word_embedding = nn.Embedding(word_vocab_size, word_embedding_dim)
        self.word_lstm = nn.LSTM(word_embedding_dim + char_hidden_dim, hidden_dim)
        
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        
        self.hidden = self.init_hidden(hidden_dim)
        self.char_hidden = self.init_hidden(char_hidden_dim)
        
    def init_hidden(self, dim):
        return (torch.zeros(1, 1, dim).to(device),
                torch.zeros(1, 1, dim).to(device))
    
    def forward(self, sentence):
        word_idxs = []
        char_lstm_result = []
        for word in sentence:
            words = word[0]
            chars = word[1]
            
            # init character hidden
            self.char_hidden = self.init_hidden(char_hidden_dim)
            
            word_idxs.append(words)
            char_idx = torch.LongTensor(chars).to(device)
            
            char_emb_out = self.char_embedding(char_idx)
            char_emb_out = char_emb_out.view(len(chars), 1, self.char_embedding_dim)
            
            char_lstm_out, self.char_hidden = self.char_lstm(char_emb_out, self.char_hidden)
            char_lstm_result.append(char_lstm_out[-1])
    
        char_lstm_result = torch.stack(char_lstm_result)
            
        word_idxs = torch.LongTensor(word_idxs).to(device)
        
        word_emb_out = self.word_embedding(word_idxs)
        word_emb_out = word_emb_out.view(len(sentence), 1, self.word_embedding_dim)
        
        lstm_in = torch.cat((word_emb_out, char_lstm_result), dim=2)
        lstm_out, self.hidden = self.word_lstm(lstm_in, self.hidden)
        lstm_out = lstm_out.view(len(sentence), -1)
        
        tag = self.hidden2tag(lstm_out)
        out = F.log_softmax(tag)
        return out

In [8]:
model = LSTMTagger(word_embedding_dim, char_embedding_dim,
                   hidden_dim, char_hidden_dim,
                   word_vocab_size, char_vocab_size, tagset_size).to(device)
print(model)

LSTMTagger(
  (char_embedding): Embedding(16, 3)
  (char_lstm): LSTM(3, 3)
  (word_embedding): Embedding(9, 6)
  (word_lstm): LSTM(9, 6)
  (hidden2tag): Linear(in_features=6, out_features=3, bias=True)
)


In [9]:
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [10]:
# training

num_epochs = 1000

for epoch in range(num_epochs):
    for step, (sentence, tags) in enumerate(train_data):
        
        model.zero_grad()
        model.hidden = model.init_hidden(hidden_dim)
            
        inputs = prepare_seq(sentence, word2idx, char2idx)
        targets = preprare_tag(tags, tag2idx)

        output = model(inputs)
        loss = criterion(output, targets)
        
        loss.backward()
        optimizer.step()
    if (epoch+1)% 30 == 0:
        print("[%d/%d] loss:%.3f" % (epoch+1, num_epochs, loss.item()))



[30/1000] loss:0.962
[60/1000] loss:0.797
[90/1000] loss:0.536
[120/1000] loss:0.286
[150/1000] loss:0.159
[180/1000] loss:0.100
[210/1000] loss:0.070
[240/1000] loss:0.052
[270/1000] loss:0.041
[300/1000] loss:0.033
[330/1000] loss:0.028
[360/1000] loss:0.024
[390/1000] loss:0.021
[420/1000] loss:0.019
[450/1000] loss:0.017
[480/1000] loss:0.015
[510/1000] loss:0.014
[540/1000] loss:0.013
[570/1000] loss:0.012
[600/1000] loss:0.011
[630/1000] loss:0.010
[660/1000] loss:0.010
[690/1000] loss:0.009
[720/1000] loss:0.008
[750/1000] loss:0.008
[780/1000] loss:0.008
[810/1000] loss:0.007
[840/1000] loss:0.007
[870/1000] loss:0.007
[900/1000] loss:0.006
[930/1000] loss:0.006
[960/1000] loss:0.006
[990/1000] loss:0.006


In [11]:
# save model
torch.save(model.state_dict(), "speech_tag.pth")
print("model save complete!")

model save complete!


In [12]:
# test the model

model.eval()

test_data = "Everybody read that book".split()
test_word2idx = {}
test_char2idx = {}

for word in test_data:
    if word not in test_word2idx:
        test_word2idx[word] = len(test_word2idx)
    for char in word:
        if char not in test_char2idx:
            test_char2idx[char] = len(test_char2idx)
print(test_word2idx)
print(test_char2idx)

test_input = prepare_seq(test_data, test_word2idx, test_char2idx)
test_output = model(test_input)

print(test_output)

score, idx = torch.max(test_output, dim=1)
print(score, idx)

result = [idx2tag[i.item()] for i in idx]
print(result)


{'Everybody': 0, 'read': 1, 'that': 2, 'book': 3}
{'E': 0, 'v': 1, 'e': 2, 'r': 3, 'y': 4, 'b': 5, 'o': 6, 'd': 7, 'a': 8, 't': 9, 'h': 10, 'k': 11}
tensor([[-0.0329, -3.4346, -8.7481],
        [-6.6603, -0.0021, -7.0842],
        [-7.3668, -5.4001, -0.0052],
        [-0.0042, -6.0834, -6.2607]])
tensor(1.00000e-02 *
       [-3.2935, -0.2121, -0.5161, -0.4199]) tensor([ 0,  1,  2,  0])
['DET', 'NN', 'V', 'DET']


