In [27]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

'''准备数据'''
training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", 'V', "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

# 给单词编码
word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print word_to_ix
tag_to_ix = {"DET":0, "NN":1, "V":2}

def prepare_sequence(seq, to_ix):
    '''把seq转换成ix列表，Variable格式。
    Args:
        seq: 句子，单词序列或者词性序列
        to_ix: 是word_to_ix, 或者tag_to_ix
    Returns:
        res: Variable, size = len(seq)
    '''
    idxs = [to_ix[word] for word in seq]
    res = Variable(torch.LongTensor(idxs))
    return res
    
'''超参设置'''
# 一般32,64
EMBEDDING_DIM = 6
HIDDEN_DIM = 6
LEARNING_RATE = 0.1

{'Everybody': 5, 'ate': 2, 'apple': 4, 'that': 7, 'read': 6, 'dog': 1, 'book': 8, 'the': 3, 'The': 0}


In [33]:
'''网络模型'''
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        # (词汇总数量, embedding维度)
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        # embedding as input, output hidden_dim 
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        # linear hidden state to tag  space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.hidden = self.init_hidden()
    
    def init_hidden(self):
        '''init hidden, h0 and c0'''
        # (num_layers*num_directions, minibatch_size, hidden_dim)
        h0 = Variable(torch.zeros(1, 1, self.hidden_dim))
        c0 = Variable(torch.zeros(1, 1, self.hidden_dim))
        return (h0, c0)
    
    def forward(self, sentence):
        ''' 前向计算
        Args:
            sentence: 单词列表，用id表示。Variable格式
        Returns:
            标签得分
        '''
        embeds = self.word_embeddings(sentence)
        lstm_out, self.hidden =  self.lstm(
            embeds.view(len(sentence), 1, -1), self.hidden)
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space)
        return tag_scores
 
'''定义loss和优化器'''
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))

loss_func = nn.NLLLoss()    # it is useful when you have an unbalanced training set
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
print training_data[0][0]
inputs = prepare_sequence(training_data[0][0], word_to_ix)
#print inputs.data
tag_scores = model(inputs)
print tag_scores

'''训练网络'''
for epoch in range(100):
    for sentence, tags in training_data:
        # 1. 清空梯度和初始化参数
        model.zero_grad()
        model.hidden = model.init_hidden()
        # 2. 准备数据
        sentence_in = prepare_sequence(sentence, word_to_ix)
        tags_in = prepare_sequence(tags, tag_to_ix)
        # 3. 前向计算
        tag_scores = model(sentence_in)
        # 4. 计算误差，梯度，更新参数
        loss = loss_func(tag_scores, tags_in)
        loss.backward()
        optimizer.step()

inputs = prepare_sequence(training_data[0][0], word_to_ix)
#print inputs.data
tag_scores = model(inputs)
print tag_scores

['The', 'dog', 'ate', 'the', 'apple']
Variable containing:
-0.9724 -1.1028 -1.2382
-0.9942 -1.1470 -1.1636
-0.9147 -1.1315 -1.2844
-0.9943 -1.1123 -1.1999
-0.9829 -1.0992 -1.2289
[torch.FloatTensor of size 5x3]

Variable containing:
-0.3184 -1.7903 -2.2464
-1.7638 -0.5437 -1.3943
-1.3609 -0.7046 -1.3892
-0.4327 -1.4474 -2.1536
-1.8662 -0.3600 -1.9133
[torch.FloatTensor of size 5x3]



**API总结**

In [13]:
# num_embeddings: vocab_size, embedding_dim: single embedding vector dim
embedding = nn.Embedding(num_embeddings = 10, embedding_dim = 3)
inputs = Variable(torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]])) 
print embedding(inputs).data.size()
# 一共5个词，5个向量，每个向量3维
print embedding(Variable(torch.LongTensor([1, 2, 3, 4, 5])))

torch.Size([2, 4, 3])
Variable containing:
 0.5355 -1.1695  0.9449
-2.2623  0.3127  0.1137
 1.3729 -1.6990  0.0285
-0.1547  0.2261 -0.6028
-0.4340 -1.3596 -0.4329
[torch.FloatTensor of size 5x3]

