In [1]:
import torch
import numpy as np
import torch.utils.data as Data
import torch.nn as nn 
import torch.optim as optim

In [2]:
char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
word2idx = {n: i for i, n in enumerate(char_arr)}
idx2word = {i: w for i, w in enumerate(char_arr)}
n_class = len(word2idx) # number of class(=number of vocab)

seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']

# TextLSTM Parameters
n_step = len(seq_data[0]) - 1 # (=3)
n_hidden = 128

def make_data(seq_data):
    input_batch, target_batch = [], []

    for seq in seq_data:
        input = [word2idx[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
        target = word2idx[seq[-1]] # 'e' is target
        input_batch.append(np.eye(n_class)[input])
        target_batch.append(target)

    return torch.Tensor(input_batch), torch.LongTensor(target_batch)



input_batch, target_batch = make_data(seq_data)
dataset = Data.TensorDataset(input_batch, target_batch)
loader = Data.DataLoader(dataset, batch_size=3, shuffle = True)



- 输入数据包括input, (h_0, c_0):
> input: shape = [seq_length, batch_size, input_size]的张量

> h_0: shape = [num_layers * num_directions, batch, hidden_size]的张量，它包含了在当前这个batch_size中每个句子的初始隐藏状态，num_layers就是LSTM的层数，如果bidirectional = True,则num_directions = 2,否则就是１，表示只有一个方向

> c_0: 与h_0的形状相同，它包含的是在当前这个batch_size中的每个句子的初始细胞状态。h_0,c_0如果不提供，那么默认是０

- 输出数据包括output, (h_t, c_t):

> output.shape = [seq_length, batch_size, num_directions * hidden_size]

> 它包含的LSTM的最后一层的输出特征(h_t),ｔ是batch_size中每个句子的长度.

> h_t.shape = [num_directions * num_layers, batch, hidden_size]

> c_t.shape = h_t.shape


In [7]:
class TextLSTM(nn.Module):
    def __init__(self):
        super(TextLSTM, self).__init__()
        '''
        input_size: 输入数据的特征维数，通常就是embedding_dim(词向量的维度)
        hidden_size: LSTM中隐层的维度
        num_layers: 循环神经网络的层数
        '''
        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)
        # fc
        self.fc = nn.Linear(n_hidden, n_class)

    def forward(self, X):
        # X: [batch_size, n_step, n_class]
        batch_size = X.shape[0]
        input = X.transpose(0, 1)  # X : [n_step, batch_size, n_class]

        hidden_state = torch.zeros(1, batch_size, n_hidden)   # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        cell_state = torch.zeros(1, batch_size, n_hidden)    # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]

        outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))
        outputs = outputs[-1]  # [batch_size, n_hidden]
        model = self.fc(outputs)
        return model

In [8]:
model = TextLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
# Training
for epoch in range(1000):
  for x, y in loader:
    pred = model(x)
    loss = criterion(pred, y)
    if (epoch + 1) % 100 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch: 0100 cost = 0.044431
Epoch: 0100 cost = 0.020772
Epoch: 0100 cost = 0.009116
Epoch: 0100 cost = 0.310088
Epoch: 0200 cost = 0.004284
Epoch: 0200 cost = 0.002145
Epoch: 0200 cost = 0.025762
Epoch: 0200 cost = 0.001766
Epoch: 0300 cost = 0.006433
Epoch: 0300 cost = 0.001788
Epoch: 0300 cost = 0.004324
Epoch: 0300 cost = 0.002038
Epoch: 0400 cost = 0.002484
Epoch: 0400 cost = 0.000199
Epoch: 0400 cost = 0.003649
Epoch: 0400 cost = 0.001134
Epoch: 0500 cost = 0.002063
Epoch: 0500 cost = 0.000298
Epoch: 0500 cost = 0.001528
Epoch: 0500 cost = 0.000287
Epoch: 0600 cost = 0.000237
Epoch: 0600 cost = 0.001984
Epoch: 0600 cost = 0.000206
Epoch: 0600 cost = 0.000701
Epoch: 0700 cost = 0.000731
Epoch: 0700 cost = 0.000760
Epoch: 0700 cost = 0.000178
Epoch: 0700 cost = 0.000505
Epoch: 0800 cost = 0.000067
Epoch: 0800 cost = 0.000708
Epoch: 0800 cost = 0.000519
Epoch: 0800 cost = 0.000041
Epoch: 0900 cost = 0.000408
Epoch: 0900 cost = 0.000105
Epoch: 0900 cost = 0.000413
Epoch: 0900 cost = 0

In [10]:
inputs = [sen[:3] for sen in seq_data]
predict = model(input_batch).data.max(1, keepdim=True)[1]
print(inputs, '->', [idx2word[n.item()] for n in predict.squeeze()])

['mak', 'nee', 'coa', 'wor', 'lov', 'hat', 'liv', 'hom', 'has', 'sta'] -> ['e', 'd', 'l', 'd', 'e', 'e', 'e', 'e', 'h', 'r']
