In [10]:
import torch
import torch.nn as nn 
import torch.nn.functional as F 
import numpy as np
import torch.utils.data as Data
import torch.optim as optim
dtype = torch.FloatTensor

In [2]:
sentences = ["i like dog", "i love coffee", "i hate milk"]

word_list = " ".join(sentences).split()
vocab = list(set(word_list))
word2idx = {w: i for i, w in enumerate(vocab)}
idx2word = {i: w for i, w in enumerate(vocab)}
n_class = len(vocab)

In [4]:
batch_size = 2
n_step = 2
n_hidden = 5

def make_data(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word2idx[n] for n in word[:-1]]
        target = word2idx[word[-1]]

        input_batch.append(np.eye(n_class)[input])
        target_batch.append(target)

    return input_batch, target_batch

input_batch, target_batch = make_data(sentences)
input_batch, target_batch = torch.Tensor(input_batch), torch.LongTensor(target_batch)
dataset = Data.TensorDataset(input_batch, target_batch)
loader = Data.DataLoader(dataset, batch_size, True)

In [5]:
print(input_batch)
print(input_batch.shape)
print(np.eye(7)[[1, 2, 5]])

tensor([[[0., 0., 0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 1., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1., 0., 0.]]])
torch.Size([3, 2, 7])
[[0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]]


In [18]:
# torch要实现rnn输出需要再加一层fc，RNN模块仅实现了本身的embedding
class TextRNN(nn.Module):
    def __init__(self):
        super(TextRNN, self).__init__()
        # output：(seq_len, batch_size, hidden_size) 为每个时间步得到的hidden_state
        # hn：(num_layers, batch_size, hidden_size) 为最后一个时间步的hidden_state
        self.rnn = nn.RNN(input_size = n_class, hidden_size = n_hidden)
        # input_size 指的是每个单词用多少维的向量去编码
        # hidden_size 指的是输出维度是多少
        # fully connected layer
        self.fc = nn.Linear(n_hidden, n_class)

    def forward(self, hidden, X):
        #print(X.shape)
        # x: [batch_size, n_step, n_class] [2, 2, 7]
        X = X.transpose(0,1)
        #print(X.shape)
        # x: [n_step, batch_size, n_class] [2, 2, 7] 2个字符，每个字符的embedding为n_class维度， batchsize大小为2
        # hidden : [num_layers * num_directions, batch, hidden_size]
        out, hidden = self.rnn(X, hidden)
        # [2, 2, 5] [1, 2, 5] # 字符数,batchsize,维度
        #print(out.shape,hidden.shape)
        out = out[-1]
        model = self.fc(out)
        return model

model = TextRNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [19]:
# Training
for epoch in range(5000):
    for x, y in loader:
      # hidden : [num_layers * num_directions, batch, hidden_size]
      hidden = torch.zeros(1, x.shape[0], n_hidden) # h0
      
      # x : [batch_size, n_step, n_class]
      pred = model(hidden, x)
      #break

      # pred : [batch_size, n_class], y : [batch_size] (LongTensor, not one-hot)
      loss = criterion(pred, y)
      if (epoch + 1) % 1000 == 0:
          print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    #break

Epoch: 1000 cost = 0.083223
Epoch: 1000 cost = 0.074151
Epoch: 2000 cost = 0.013754
Epoch: 2000 cost = 0.010926
Epoch: 3000 cost = 0.003711
Epoch: 3000 cost = 0.003808
Epoch: 4000 cost = 0.001386
Epoch: 4000 cost = 0.001068
Epoch: 5000 cost = 0.000435
Epoch: 5000 cost = 0.000515


In [20]:
input = [sen.split()[:2] for sen in sentences]
#   
hidden = torch.zeros(1, len(input), n_hidden)
predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]
print([sen.split()[:2] for sen in sentences], '->', [idx2word[n.item()] for n in predict.squeeze()])

[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']
