In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

In [20]:
#定义训练数据
training_data = [
    ("The cat ate the fish".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("They read that book".split(), ["NN", "V", "DET", "NN"])
]
#定义测试数据
testing_data=[("They ate the fish".split())]

In [21]:
training_data

[(['The', 'cat', 'ate', 'the', 'fish'], ['DET', 'NN', 'V', 'DET', 'NN']),
 (['They', 'read', 'that', 'book'], ['NN', 'V', 'DET', 'NN'])]

In [22]:
word_to_ix = {} # 单词的索引字典
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

{'The': 0, 'cat': 1, 'ate': 2, 'the': 3, 'fish': 4, 'They': 5, 'read': 6, 'that': 7, 'book': 8}


In [23]:
tag_to_ix = {"DET": 0, "NN": 1, "V": 2} # 手工设定词性标签数据字典

In [24]:
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        
        ## 这里采用PyTorch的nn.Embedding层，把整数转换为向量，参数为（词总数，每个词的向量长度（
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.hidden = self.init_hidden()
        
    # 初始化隐含状态State及C
    def init_hidden(self):
        return (torch.zeros(1,1,self.hidden_dim),
               torch.zeros(1,1,self.hidden_dim))
    
    def forward(self, sentence):
        # 获得词嵌入矩阵embeds
        embeds = self.word_embeddings(sentence)
        # 按lstm格式，修改embeds的形状
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence),1,-1),self.hidden)
        # 修改隐含状态的形状，作为全连接层的输入
        tag_space = self.hidden2tag(lstm_out.view(len(sentence),-1))
        # 计算每个单词属于各词性的概率
        tag_scores = F.log_softmax(tag_space,dim=1)
        return tag_scores

In [25]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return tensor

In [26]:
EMBEDDING_DIM = 10
HIDDEN_DIM = 3  #这里等于词性个数

model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix),len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.1)

In [27]:
inputs = prepare_sequence(training_data[0][0],word_to_ix)
tag_scores = model(inputs)
print(training_data[0][0])
print(inputs)
print(tag_scores)
print(torch.max(tag_scores,1))

['The', 'cat', 'ate', 'the', 'fish']
tensor([0, 1, 2, 3, 4])
tensor([[-1.0310, -1.4494, -0.8949],
        [-0.9570, -1.4486, -0.9648],
        [-0.9494, -1.4988, -0.9426],
        [-0.9904, -1.5677, -0.8674],
        [-1.2239, -1.5541, -0.7041]], grad_fn=<LogSoftmaxBackward>)
torch.return_types.max(
values=tensor([-0.8949, -0.9570, -0.9426, -0.8674, -0.7041], grad_fn=<MaxBackward0>),
indices=tensor([2, 0, 2, 2, 2]))


In [28]:
for epoch in range(400): # 我们要训练400次 
    for sentence, tags in training_data:
        # 清除网络先前的梯度值
        model.zero_grad()
        #重新初始化隐藏层数据
        model.hidden = model.init_hidden()
        # 按网络要求的格式处理输入数据和真实标签数据
        sentence_in = prepare_sequence(sentence,word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)
        # 实例化模型
        tag_scores = model(sentence_in)
        # 计算损失，反向传递梯度及更新模型参数
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        
        
# 查看模型训练的结果
inputs = prepare_sequence(training_data[0][0],word_to_ix)
tag_scores = model(inputs)
print(training_data[0][0])
print(tag_scores)
print(torch.max(tag_scores,1))

['The', 'cat', 'ate', 'the', 'fish']
tensor([[-0.2750, -1.4723, -4.5065],
        [-5.3480, -0.0094, -5.3721],
        [-3.3353, -2.2446, -0.1527],
        [-0.1542, -3.8914, -2.0997],
        [-3.8327, -0.1462, -2.1686]], grad_fn=<LogSoftmaxBackward>)
torch.return_types.max(
values=tensor([-0.2750, -0.0094, -0.1527, -0.1542, -0.1462], grad_fn=<MaxBackward0>),
indices=tensor([0, 1, 2, 0, 1]))


In [29]:
test_inputs = prepare_sequence(testing_data[0], word_to_ix)
tag_scores01 = model(test_inputs)
print(testing_data[0])
print(test_inputs)
print(tag_scores01)
print(torch.max(tag_scores01,1))

['They', 'ate', 'the', 'fish']
tensor([5, 2, 3, 4])
tensor([[-7.3369, -0.0082, -4.8858],
        [-3.6112, -2.3159, -0.1343],
        [-0.1556, -3.8931, -2.0897],
        [-3.8856, -0.1464, -2.1569]], grad_fn=<LogSoftmaxBackward>)
torch.return_types.max(
values=tensor([-0.0082, -0.1343, -0.1556, -0.1464], grad_fn=<MaxBackward0>),
indices=tensor([1, 2, 0, 1]))
