# LSTM

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x2c162743a90>

In [2]:
input_dim = 3
output_dim = 3
lstm = nn.LSTM(input_dim, output_dim)
print(lstm)

LSTM(3, 3)


In [6]:
# sequence 생성
inputs = [torch.randn(1, 3) for _ in range(5)]
print(inputs)

[tensor([[-0.9999, -1.6476,  0.8098]]), tensor([[ 0.0554,  1.1340, -0.5326]]), tensor([[ 0.6592, -1.5964, -0.3769]]), tensor([[-3.1020, -0.0995, -0.7213]]), tensor([[ 1.2708, -0.0020, -1.0952]])]


In [7]:
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))

# 한번에 하나의 단어만 lstm에 투입
for i in inputs:
    print(i.shape)
    out, hidden = lstm(i.view(1, 1, -1), hidden)

torch.Size([1, 3])
torch.Size([1, 3])
torch.Size([1, 3])
torch.Size([1, 3])
torch.Size([1, 3])


In [8]:
# 한꺼번에 sequence 전체를 lstm에 투입
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
print(inputs.shape)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))

torch.Size([5, 1, 3])


In [9]:
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.2763,  0.2370, -0.2530]],

        [[-0.1142,  0.3029, -0.1229]],

        [[-0.2502,  0.0923, -0.2127]],

        [[-0.0960,  0.0908, -0.1823]],

        [[-0.0137,  0.0445, -0.1324]]])
(tensor([[[-0.0137,  0.0445, -0.1324]]]), tensor([[[-0.0331,  0.1168, -0.4049]]]))


# An LSTM for Part-of-Speech Tagging

In [10]:
# prepard data
# sequence vector to indexs vector
def prepare_sequence(seq, word2idx):
    idxs = [word2idx[w] for w in seq]
    return torch.LongTensor(idxs)

In [12]:
# input: 문장의 token
# target: token들의 품사
train_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

word2idx = {}
for sentence, tag in train_data:
    for word in sentence:
        if word not in word2idx:
            word2idx[word] = len(word2idx)
print(word2idx)

tag2idx = {"DET": 0, "NN": 1, "V": 2}

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [24]:
embedding_dim = 6
hidden_dim = 6
vocab_size = len(word2idx)
tagset_size = len(tag2idx)

class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger,self).__init__()
        
        self.hidden_dim = hidden_dim
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        
        self.hidden = self.init_hidden()
    
    def init_hidden(self):
        # num_layers, minibatch_size, hidden_dim
        return (torch.zeros(1, 1, self.hidden_dim), torch.zeros(1, 1, self.hidden_dim))
    
    def forward(self, x):
#         print("Original input size:", x.shape)
        emb = self.embedding(x)
#         print("Embedding output size:", emb.shape)
        emb = emb.view(len(x), 1, -1) 
#         print("Resized embedding output size:", emb.shape)
        
        lstm_out, self.hidden = self.lstm(emb.view(len(x), 1, -1), self.hidden)
        tag_out = self.hidden2tag(lstm_out.view(len(x), -1))
        out = F.log_softmax(tag_out, dim=1)
        return out

In [25]:
model = LSTMTagger(embedding_dim, hidden_dim, vocab_size, tagset_size)
print(model)

LSTMTagger(
  (embedding): Embedding(9, 6)
  (lstm): LSTM(6, 6)
  (hidden2tag): Linear(in_features=6, out_features=3, bias=True)
)


In [26]:
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [27]:
# 학습 전의 output 확인
with torch.no_grad():
    inputs = prepare_sequence(train_data[0][0], word2idx)
    outputs = model(inputs)
    print(outputs)

tensor([[-1.0118, -1.3521, -0.9735],
        [-1.0225, -1.2918, -1.0065],
        [-1.0567, -1.2470, -1.0077],
        [-1.0860, -1.1678, -1.0459],
        [-1.0265, -1.2483, -1.0363]])


In [28]:
# 학습 과정
import numpy as np
for epoch in range(300):
    losses = []
    for sentence, tags in train_data:
        model.zero_grad()
        model.hidden = model.init_hidden()
        
        sentence = prepare_sequence(sentence, word2idx)
        tags = prepare_sequence(tags, tag2idx)
        
#         print(sentence.shape)
#         print(tags.shape)
        
        outputs = model(sentence)
        loss = criterion(outputs, tags)
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
    print("[%d/%d] loss:%.3f" % (epoch+1, 300, np.mean(losses)))

[1/300] loss:1.153
[2/300] loss:1.141
[3/300] loss:1.130
[4/300] loss:1.120
[5/300] loss:1.112
[6/300] loss:1.105
[7/300] loss:1.098
[8/300] loss:1.092
[9/300] loss:1.087
[10/300] loss:1.082
[11/300] loss:1.078
[12/300] loss:1.074
[13/300] loss:1.071
[14/300] loss:1.068
[15/300] loss:1.065
[16/300] loss:1.063
[17/300] loss:1.060
[18/300] loss:1.058
[19/300] loss:1.056
[20/300] loss:1.054
[21/300] loss:1.052
[22/300] loss:1.050
[23/300] loss:1.049
[24/300] loss:1.047
[25/300] loss:1.046
[26/300] loss:1.044
[27/300] loss:1.043
[28/300] loss:1.041
[29/300] loss:1.040
[30/300] loss:1.038
[31/300] loss:1.037
[32/300] loss:1.036
[33/300] loss:1.034
[34/300] loss:1.033
[35/300] loss:1.031
[36/300] loss:1.030
[37/300] loss:1.028
[38/300] loss:1.027
[39/300] loss:1.026
[40/300] loss:1.024
[41/300] loss:1.023
[42/300] loss:1.021
[43/300] loss:1.019
[44/300] loss:1.018
[45/300] loss:1.016
[46/300] loss:1.015
[47/300] loss:1.013
[48/300] loss:1.011
[49/300] loss:1.010
[50/300] loss:1.008
[51/300] 

In [29]:
# 학습 후의 output 확인
with torch.no_grad():
    inputs = prepare_sequence(train_data[0][0], word2idx)
    outputs = model(inputs)
    print(outputs)

tensor([[-0.2016, -2.6027, -2.2211],
        [-4.7044, -0.0215, -4.4085],
        [-1.5090, -1.9637, -0.4486],
        [-0.0980, -4.1194, -2.5625],
        [-4.5778, -0.0152, -5.3282]])


In [48]:
# test
test_sentence = "The boy goes to high school"
test_sentence = test_sentence.split()

print(test_sentence)

word2idx = {}
for word in test_sentence:
    if word not in word2idx:
        word2idx[word] = len(word2idx)
    
tag2idx = {"DET": 0, "NN": 1, "V": 2}
idx2tag = {idx: tag for tag, idx in tag2idx.items()}

test_sentence = prepare_sequence(test_sentence, word2idx)
outputs = model(test_sentence)
scores, idxs = torch.max(outputs, dim=1)
result_tag = [idx2tag[idx.item()] for idx in idxs]
print(result_tag)

['The', 'boy', 'goes', 'to', 'high', 'school']
['DET', 'NN', 'V', 'DET', 'NN', 'NN']
