In [1]:
# 使用BiLSTM简单实现，实现给定一个长句子，预测下一个单词
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import numpy as np


# 就一个句子，我们是要用这个句子前两个词预测第三个，前三个预测第四个,...
def make_data(sentence):
    input_data = []
    input_label = []
    words = sentence.split()
    for i in range(max_len - 1):## word_len
        input_data_tmp = [word2id[j] for j in words[:(i + 1)]]
        input_data_tmp = input_data_tmp + [0] * (max_len - len(input_data_tmp))  # padding操作
        input_label_tmp = word2id[words[i + 1]]
        input_data.append(np.eye(vocab_size)[input_data_tmp])  # 用对角矩阵初始化一下
#         np.eye()的函数，除了生成对角阵外，还可以将一个label数组，大小为(1,m)或者(m,1)的数组，转化成one-hot数组。
        input_label.append(input_label_tmp)
    return torch.Tensor(input_data), torch.LongTensor(input_label)


class BiLSTM(nn.Module):
    def __init__(self):
        super(BiLSTM, self).__init__()
        # LSTM层：input_size: 每个x的特征个数，hidden_size:隐藏层输出的维度， num_layer:lstm单元的个数
        self.lstm = nn.LSTM(input_size=vocab_size, hidden_size=n_hidden, bidirectional=True)
        self.fc = nn.Linear(n_hidden*2, vocab_size)

    def forward(self, X):
        batch_size = X.shape[0]
        the_input = X.transpose(0, 1)
        h_state = torch.randn(1*2, batch_size, n_hidden)  # 双向的就是把句子正着输入一次，再反着输入一次
        c_state = torch.randn(1*2, batch_size, n_hidden)
        the_output, (_, _) = self.lstm(the_input, (h_state, c_state))
        the_output = the_output[-1]
        res = self.fc(the_output)
        return res

In [2]:
sentence = ('GitHub Actions makes it easy to automate all your software '
            'workflows from continuous integration and delivery to issue triage and more')
vocab = list(set(sentence.split()))
word2id = {w: i for i, w in enumerate(vocab)}
# idx_to_species = dict((v,k) for k,v in species_to_idx.items()) 
# id2word = {i: w for i, w in enumerate(vocab)}##???
id2word = dict((v,k) for k,v in word2id.items()) 

vocab_size = len(vocab)
max_len = len(sentence.split())  # 去这个句子的长度，后续用来padding
n_hidden = 5

In [5]:
sentence.split()
# len(sentence.split())

['GitHub',
 'Actions',
 'makes',
 'it',
 'easy',
 'to',
 'automate',
 'all',
 'your',
 'software',
 'workflows',
 'from',
 'continuous',
 'integration',
 'and',
 'delivery',
 'to',
 'issue',
 'triage',
 'and',
 'more']

In [2]:
input_data, input_label = make_data(sentence)
dataset = Data.TensorDataset(input_data, input_label)
dataloader = Data.DataLoader(dataset, 16, True)

model = BiLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练
for epoch in range(10000):
    for x, y in dataloader:
        optimizer.zero_grad()
        
        pred = model(x)
        loss = criterion(pred, y)
        if (epoch + 1) % 1000 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
        
        loss.backward()
        optimizer.step()

# 预测
predict = model(input_data).data.max(1, keepdim=True)[1]  
# keepdim 表示是否需要保持输出的维度与输入一样
print(sentence)
print([id2word[n.item()] for n in predict.squeeze()])

  return torch.Tensor(input_data), torch.LongTensor(input_label)


Epoch: 1000 cost = 1.611466
Epoch: 1000 cost = 1.725193
Epoch: 2000 cost = 1.317163
Epoch: 2000 cost = 0.655809
Epoch: 3000 cost = 1.036820
Epoch: 3000 cost = 0.503574
Epoch: 4000 cost = 0.704783
Epoch: 4000 cost = 0.950300
Epoch: 5000 cost = 0.517302
Epoch: 5000 cost = 1.224710
Epoch: 6000 cost = 0.501025
Epoch: 6000 cost = 0.974599
Epoch: 7000 cost = 0.664167
Epoch: 7000 cost = 0.126774
Epoch: 8000 cost = 0.563299
Epoch: 8000 cost = 0.422852
Epoch: 9000 cost = 0.540960
Epoch: 9000 cost = 0.410004
Epoch: 10000 cost = 0.473942
Epoch: 10000 cost = 0.597568
GitHub Actions makes it easy to automate all your software workflows from continuous integration and delivery to issue triage and more
['to', 'to', 'to', 'Actions', 'Actions', 'automate', 'all', 'your', 'software', 'from', 'workflows', 'continuous', 'integration', 'and', 'delivery', 'to', 'issue', 'triage', 'and', 'more']
