# 简单的RNN实作

## 程式参考来源：
- https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html
- https://pytorch.org/docs/stable/generated/torch.nn.RNN.html#torch.nn.RNN
- https://pytorch.org/text/stable/vocab.html
- https://pytorch.org/text/stable/functional.html#to-tensor
- https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html


## 载入相关套件

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchtext
import numpy as np

## 嵌入层测试

In [2]:
x = torch.LongTensor([[0,1,2], [3,4,5]])
embeds = nn.Embedding(6, 5) 
print(embeds(x))

tensor([[[-0.2505,  1.4970,  0.7773, -0.8844,  0.8700],
         [ 0.5507, -0.6831,  0.0107, -0.4041,  0.6599],
         [ 1.0529, -0.1430,  0.7113, -1.0951, -0.0185]],

        [[-2.1144, -1.1461,  0.5346,  1.4119, -0.4880],
         [-0.4536, -0.7272, -0.0827, -0.7152, -0.0144],
         [-1.1321,  0.8856, -0.0487, -0.7464,  0.0360]]],
       grad_fn=<EmbeddingBackward0>)


In [3]:
embeds.weight

Parameter containing:
tensor([[-0.2505,  1.4970,  0.7773, -0.8844,  0.8700],
        [ 0.5507, -0.6831,  0.0107, -0.4041,  0.6599],
        [ 1.0529, -0.1430,  0.7113, -1.0951, -0.0185],
        [-2.1144, -1.1461,  0.5346,  1.4119, -0.4880],
        [-0.4536, -0.7272, -0.0827, -0.7152, -0.0144],
        [-1.1321,  0.8856, -0.0487, -0.7464,  0.0360]], requires_grad=True)

In [4]:
x = torch.LongTensor([[1,2,3], [4,5,6]])
embeds = nn.Embedding(7, 5) 
print(embeds(x))

tensor([[[-0.2907,  0.2514, -1.0432,  0.5131,  2.2346],
         [ 0.0395,  0.3973,  0.2307,  1.0515,  0.6651],
         [-0.4278, -0.4222, -0.1849,  2.3283,  0.0529]],

        [[ 0.5288,  0.4761, -0.1157, -0.7658,  0.2662],
         [ 0.1298, -0.6534,  0.4405, -0.5044,  0.1633],
         [-1.5434, -0.9046, -0.1215,  2.0839, -0.6903]]],
       grad_fn=<EmbeddingBackward0>)


In [5]:
embeds = nn.Embedding(6, 5) 
x1 = torch.LongTensor([[0,1,2]])
x2 = torch.LongTensor([[3,4]])
print(embeds(x1))
print(embeds(x2))
embeds.weight

tensor([[[-0.1974, -0.2802, -0.1226, -1.1460, -1.0106],
         [ 0.7972, -0.8367,  0.1772,  0.6812, -0.2185],
         [ 1.4565, -1.0132,  1.6636,  0.0494, -1.0966]]],
       grad_fn=<EmbeddingBackward0>)
tensor([[[ 0.3134,  1.0122, -0.4550, -0.0074, -0.6427],
         [ 1.1022, -1.5268, -0.4586,  1.0043, -0.2670]]],
       grad_fn=<EmbeddingBackward0>)


Parameter containing:
tensor([[-0.1974, -0.2802, -0.1226, -1.1460, -1.0106],
        [ 0.7972, -0.8367,  0.1772,  0.6812, -0.2185],
        [ 1.4565, -1.0132,  1.6636,  0.0494, -1.0966],
        [ 0.3134,  1.0122, -0.4550, -0.0074, -0.6427],
        [ 1.1022, -1.5268, -0.4586,  1.0043, -0.2670],
        [-1.3189, -0.8618,  1.8304, -0.4719,  1.2202]], requires_grad=True)

In [6]:
embeds = nn.Embedding(6, 5, 5) 
x1 = torch.LongTensor([[0,1,2]])
x2 = torch.LongTensor([[3,4]])
x3 = torch.LongTensor([[3,4]])
print(embeds(x1))
print(embeds(x2))
print(embeds(x3))
embeds.weight

tensor([[[-0.9749, -0.4108,  0.3783, -0.5760,  0.7223],
         [-0.3931,  1.4548, -1.4096,  1.6366,  0.5608],
         [-0.5268,  1.1778,  0.3954, -0.4554, -1.0281]]],
       grad_fn=<EmbeddingBackward0>)
tensor([[[ 0.5586,  0.2118,  0.5157, -0.0731, -1.0896],
         [ 0.1027, -0.1333,  0.0766,  0.7858, -1.2786]]],
       grad_fn=<EmbeddingBackward0>)
tensor([[[ 0.5586,  0.2118,  0.5157, -0.0731, -1.0896],
         [ 0.1027, -0.1333,  0.0766,  0.7858, -1.2786]]],
       grad_fn=<EmbeddingBackward0>)


Parameter containing:
tensor([[-0.9749, -0.4108,  0.3783, -0.5760,  0.7223],
        [-0.3931,  1.4548, -1.4096,  1.6366,  0.5608],
        [-0.5268,  1.1778,  0.3954, -0.4554, -1.0281],
        [ 0.5586,  0.2118,  0.5157, -0.0731, -1.0896],
        [ 0.1027, -0.1333,  0.0766,  0.7858, -1.2786],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]], requires_grad=True)

In [8]:
# 测试资料
word_to_ix = {"hello": 0, "world": 1}
# 词汇表(vocabulary)含2个单字, 转换为5维的向量
embeds = nn.Embedding(2, 5) 
# 测试 hello
lookup_tensor = torch.LongTensor([word_to_ix["hello"]])
hello_embed = embeds(lookup_tensor)
print(hello_embed)

tensor([[0.2347, 0.0490, 0.1800, 0.6384, 0.4259]],
       grad_fn=<EmbeddingBackward0>)


## RNN层测试

In [348]:
torch.randn(5, 3, 10).shape

torch.Size([5, 3, 10])

In [349]:
# 测试资料
input = torch.randn(5, 10)
# 建立 RNN 物件
rnn = nn.RNN(10, 20, 2)
# RNN 处理
output, hn = rnn(input)
# 显示输出及隐藏层的维度
print(output.shape, hn.shape)

torch.Size([5, 20]) torch.Size([2, 20])


In [350]:
# 测试资料
input = torch.randn(5, 4, 10)
# 建立 RNN 物件
rnn = nn.RNN(10, 20, 2)
# RNN 处理
output, hn = rnn(input)
# 显示输出及隐藏层的维度
print(output.shape, hn.shape)

torch.Size([5, 4, 20]) torch.Size([2, 4, 20])


In [351]:
# 测试资料
input = torch.randn(5, 3, 10)
# 建立 RNN 物件
rnn = nn.RNN(10, 20, 2)
# 隐藏层的输入
h0 = torch.randn(2, 3, 20)
# RNN 处理
output, hn = rnn(input, h0)
# 显示输出及隐藏层的维度
print(output.shape, hn.shape)

torch.Size([5, 3, 20]) torch.Size([2, 3, 20])


## 分词

In [352]:
from torchtext.data.utils import get_tokenizer

tokenizer = get_tokenizer('basic_english')

text = 'Could have done better.'        
tokenizer(text)

['could', 'have', 'done', 'better', '.']

## 词汇表处理

In [353]:
from torchtext.vocab import vocab
from collections import Counter, OrderedDict

# BOW 统计
counter = Counter(tokenizer(text))
# 依出现次数降幂排列
sorted_by_freq_tuples = sorted(counter.items(), 
                       key=lambda x: x[1], reverse=True)
# 建立词汇字典
ordered_dict = OrderedDict(sorted_by_freq_tuples)

# 建立词汇表物件，并加一个未知单字(unknown)的索引值
vocab_object = torchtext.vocab.vocab(ordered_dict, specials=["<unk>"])
# 设定词汇表预设值为未知单字(unknown)的索引值
vocab_object.set_default_index(vocab_object["<unk>"])

# 测试
vocab_object['done']

3

In [354]:
vocab_object.get_itos()

['<unk>', 'could', 'have', 'done', 'better', '.']

In [355]:
vocab_object.__len__()

6

In [356]:
import string

string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [357]:
import string

def create_vocabulary(text_list):
    # 取得标点符号
    stopwords = list(string.punctuation)
    
    # 去除标点符号
    clean_text_list = []
    clean_tokens_list = []
    for text in text_list:
        tokens = tokenizer(text) 
        clean_tokens = []
        for w in tokens:
            if w not in stopwords:
                clean_tokens.append(w)
        clean_tokens_list += clean_tokens
        clean_text_list.append(' '.join(clean_tokens)) 
        
    # 建立词汇表物件
    counter = Counter(clean_tokens_list)    
    sorted_by_freq_tuples = sorted(counter.items(), 
                                   key=lambda x: x[1], reverse=True)
    ordered_dict = OrderedDict(sorted_by_freq_tuples)
    vocab_object = torchtext.vocab.vocab(ordered_dict, specials=["<unk>"])
    vocab_object.set_default_index(vocab_object["<unk>"])
    
    # 将输入字串转为索引值：自词汇表物件查询索引值
    clean_index_list = []
    for clean_tokens_list in clean_text_list:
        clean_index_list.append(
            vocab_object.lookup_indices(clean_tokens_list.split(' ')))
    
    # 输出 词汇表物件、去除标点符号的字串阵列、字串阵列的索引值
    return vocab_object, clean_text_list, clean_index_list

## 测试

In [358]:
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better.']

vocab_object, clean_text_list, clean_index_list = create_vocabulary(docs)
vocab_object.get_itos()

['<unk>',
 'work',
 'done',
 'good',
 'effort',
 'poor',
 'well',
 'great',
 'nice',
 'excellent',
 'weak',
 'not',
 'could',
 'have',
 'better']

In [359]:
clean_text_list 

['well done',
 'good work',
 'great effort',
 'nice work',
 'excellent',
 'weak',
 'poor effort',
 'not good',
 'poor work',
 'could have done better']

In [360]:
clean_index_list

[[6, 2],
 [3, 1],
 [7, 4],
 [8, 1],
 [9],
 [10],
 [5, 4],
 [11, 3],
 [5, 1],
 [12, 13, 2, 14]]

# 整合以上功能，实作一个简单的案例，说明相关的处理程序

## 建立词汇表：整理输入语句，截长补短，使语句长度一致。

In [361]:
maxlen = 4      # 语句最大字数
# 测试资料
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better']

vocab_object, clean_text_list, clean_index_list = create_vocabulary(docs)

# 若字串过长，删除多余单字
clean_index_list = torchtext.functional.truncate(clean_index_list, maxlen)

# 若字串长度不足，后面补 0
while len(clean_index_list[0]) < maxlen:
    clean_index_list[0] += [0]
torchtext.functional.to_tensor(clean_index_list, 0) # 0:不足补0

tensor([[ 6,  2,  0,  0],
        [ 3,  1,  0,  0],
        [ 7,  4,  0,  0],
        [ 8,  1,  0,  0],
        [ 9,  0,  0,  0],
        [10,  0,  0,  0],
        [ 5,  4,  0,  0],
        [11,  3,  0,  0],
        [ 5,  1,  0,  0],
        [12, 13,  2, 14]])

## 嵌入层转换

In [362]:
embeds = nn.Embedding(vocab_object.__len__(), 5) 
X = torchtext.functional.to_tensor(clean_index_list, 0) # 0:不足补0
embed_output = embeds(X)
print(embed_output.shape)

torch.Size([10, 4, 5])


## 加上完全连接层(Linear)

In [366]:
class RecurrentNet(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_class):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.fc = nn.Linear(embed_dim * maxlen, num_class) # 要乘以 maxlen
        self.embed_dim = embed_dim
        self.init_weights()

    def init_weights(self):
        initrange = 0.5
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

    def forward(self, text):
        embedded = self.embedding(text)
        out = embedded.reshape(embedded.size(0), -1) # 转换成1维
        return self.fc(out)

model = RecurrentNet(vocab_object.__len__(), 10, 1)

## 另一种写法，使用EmbeddingBag

In [363]:
class RecurrentNet(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_class):
        super().__init__()
        self.embedding = nn.EmbeddingBag(vocab_size, embed_dim)
        self.fc = nn.Linear(embed_dim, num_class)
        self.embed_dim = embed_dim
        self.init_weights()

    def init_weights(self):
        initrange = 0.5
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

    def forward(self, text):
        embedded = self.embedding(text)
        return self.fc(embedded)

model = RecurrentNet(vocab_object.__len__(), 10, 1)

## 模型训练

In [367]:
# 定义 10 个语句的正面(1)或负面(0)的情绪
y = torch.FloatTensor([1,1,1,1,1,0,0,0,0,0])
X = torchtext.functional.to_tensor(clean_index_list, 0) # 0:不足补0

# 指定优化器、损失函数
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

# 模型训练
for epoch in range(1000):
    outputs = model.forward(X) #forward pass
    optimizer.zero_grad() 
    loss = criterion(outputs.reshape(-1), y)
    loss.backward() 
    optimizer.step() 
    if epoch % 100 == 0:
        #print(outputs.shape)
        print(f"Epoch: {epoch}, loss: {loss.item():1.5f}")

Epoch: 0, loss: 0.72728
Epoch: 100, loss: 0.06390
Epoch: 200, loss: 0.00598
Epoch: 300, loss: 0.00140
Epoch: 400, loss: 0.00052
Epoch: 500, loss: 0.00016
Epoch: 600, loss: 0.00004
Epoch: 700, loss: 0.00001
Epoch: 800, loss: 0.00000
Epoch: 900, loss: 0.00000


## 训练资料预测

In [368]:
model.eval()
model(X)

tensor([[ 1.0000e+00],
        [ 9.9983e-01],
        [ 1.0002e+00],
        [ 9.9990e-01],
        [ 1.0000e+00],
        [-3.0272e-05],
        [-3.2104e-04],
        [ 1.9193e-05],
        [ 3.5113e-04],
        [-7.1526e-07]], grad_fn=<AddmmBackward0>)

## 测试资料预测

In [369]:
# 测试资料
test_docs = ['great effort', 'well done',
        'poor effort']

# 转成数值 
clean_index_list = []
for text in test_docs:
    clean_index_list.append(vocab_object.lookup_indices(text.split(' ')))
while len(clean_index_list[0]) < maxlen:
    clean_index_list[0] += [0]

clean_index_list = torchtext.functional.truncate(clean_index_list, maxlen)    
X = torchtext.functional.to_tensor(clean_index_list, 0) # 0:不足补0
model(X)

tensor([[ 1.0002e+00],
        [ 1.0000e+00],
        [-3.2104e-04]], grad_fn=<AddmmBackward0>)

## 使用词向量(Word2Vec)

## 读取 GloVe 50维的词向量，转换为GloVe 50维的词向量

In [302]:
# https://pytorch.org/text/stable/vocab.html#glove
examples = ['great']
vec = torchtext.vocab.GloVe(name='6B', dim=50)
ret = vec.get_vecs_by_tokens(examples, lower_case_backup=True)
ret

tensor([[-0.0266,  1.3357, -1.0280, -0.3729,  0.5201, -0.1270, -0.3543,  0.3782,
         -0.2972,  0.0939, -0.0341,  0.9296, -0.1402, -0.6330,  0.0208, -0.2153,
          0.9692,  0.4765, -1.0039, -0.2401, -0.3632, -0.0048, -0.5148, -0.4626,
          1.2447, -1.8316, -1.5581, -0.3747,  0.5336,  0.2088,  3.2209,  0.6455,
          0.3744, -0.1766, -0.0242,  0.3379, -0.4190,  0.4008, -0.1145,  0.0512,
         -0.1521,  0.2986, -0.4405,  0.1109, -0.2463,  0.6625, -0.2695, -0.4966,
         -0.4162, -0.2549]])

In [303]:
vec.vectors.size()

torch.Size([400000, 50])

In [304]:
vec.stoi['great']

353

## Embedding 不需训练，直接设定嵌入层权重

In [338]:
class RecurrentNet(nn.Module):
    def __init__(self, weights_matrix, num_embeddings, embedding_dim, num_class):
        super().__init__()
        self.embedding = nn.EmbeddingBag(num_embeddings, embedding_dim)
        # 设定嵌入层权重
        self.embedding.load_state_dict({'weight': weights_matrix})
        self.fc = nn.Linear(embedding_dim, num_class)

    def forward(self, text):
        embedded = self.embedding(text)
        return self.fc(embedded)

## 测试资料转换

In [339]:
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better']

# 将词汇表转为词向量
clean_text_list = []
clean_tokens_list = []
for i, text in enumerate(docs):
    tokens = tokenizer(text.lower()) 
    clean_tokens = []
    for w in tokens:
        if w not in stopwords:
            clean_tokens.append(w)
    clean_tokens_list += clean_tokens   
    clean_text_list.append(clean_tokens)  
    tokens_vec = vec.get_vecs_by_tokens(clean_tokens)
vocab_list = list(set(clean_tokens_list))            
weights_matrix = vec.get_vecs_by_tokens(vocab_list)

In [340]:
# 定义 10 个语句的正面(1)或负面(0)的情绪
y = torch.FloatTensor([1,1,1,1,1,0,0,0,0,0])
X = torch.LongTensor(np.zeros((len(docs), maxlen)))
for i, item in enumerate(clean_text_list):
    for j, token in enumerate(item):
        if token in vocab_list:
            X[i, j] = vocab_list.index(token)
X

tensor([[ 9,  6,  0,  0],
        [10,  4,  0,  0],
        [13, 12,  0,  0],
        [ 0,  4,  0,  0],
        [ 7,  0,  0,  0],
        [ 8,  0,  0,  0],
        [ 2, 12,  0,  0],
        [ 5, 10,  0,  0],
        [ 2,  4,  0,  0],
        [ 3, 11,  6,  1]])

In [341]:
vocab_list

['nice',
 'better',
 'poor',
 'could',
 'work',
 'not',
 'done',
 'excellent',
 'weak',
 'well',
 'good',
 'have',
 'effort',
 'great']

In [342]:
# 建立模型物件
model = RecurrentNet(torch.FloatTensor(weights_matrix), len(vocab_list), 50, 1)

# 指定优化器、损失函数
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

# 模型训练
for epoch in range(1000):
    outputs = model.forward(X) #forward pass
    optimizer.zero_grad() 
    loss = criterion(outputs.reshape(-1), y)
    loss.backward() 
    optimizer.step() 
    if epoch % 100 == 0:
        #print(outputs.shape)
        print(f"Epoch: {epoch}, loss: {loss.item():1.5f}")

Epoch: 0, loss: 1.03547
Epoch: 100, loss: 0.09790
Epoch: 200, loss: 0.02937
Epoch: 300, loss: 0.00643
Epoch: 400, loss: 0.00280
Epoch: 500, loss: 0.00141
Epoch: 600, loss: 0.00067
Epoch: 700, loss: 0.00031
Epoch: 800, loss: 0.00013
Epoch: 900, loss: 0.00006


## 训练资料预测

In [343]:
model.eval()
model(X)

tensor([[ 1.0005e+00],
        [ 1.0003e+00],
        [ 1.0043e+00],
        [ 9.9065e-01],
        [ 1.0017e+00],
        [ 1.3793e-03],
        [-6.5045e-03],
        [-4.8908e-05],
        [ 9.2722e-03],
        [-1.0637e-04]], grad_fn=<AddmmBackward0>)

In [344]:
# 测试资料
test_docs = ['great effort', 'well done',
        'poor effort']

# 转成数值 
X = torch.LongTensor(np.zeros((len(test_docs), maxlen)))
clean_text_list = []
for i, text in enumerate(test_docs):
    tokens = tokenizer(text.lower()) 
    clean_tokens = []
    for w in tokens:
        if w not in stopwords:
            clean_tokens.append(w)
    clean_text_list.append(clean_tokens)  

for i, item in enumerate(clean_text_list):
    for j, token in enumerate(item):
        if token in vocab_list:
            X[i, j] = vocab_list.index(token)

# 预测            
model.eval()        
model(X)

tensor([[ 1.0043],
        [ 1.0005],
        [-0.0065]], grad_fn=<AddmmBackward0>)

## 将整个词向量设定为嵌入层权重

In [295]:
class RecurrentNet2(nn.Module):
    def __init__(self, vec, embedding_dim, num_class):
        super().__init__()
        # 将整个词向量设定为嵌入层权重，且嵌入层设为不训练
        self.embedding = nn.EmbeddingBag.from_pretrained(vec, freeze=True)
        self.fc = nn.Linear(embedding_dim, num_class)

    def forward(self, text):
        embedded = self.embedding(text)
        return self.fc(embedded)
    
model = RecurrentNet2(vec.vectors, vec.dim, 1)

In [296]:
# 测试资料
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better']

# 转成数值 
X = torch.LongTensor(np.zeros((len(docs), maxlen)))

for i, text in enumerate(docs):
    tokens = tokenizer(text.lower()) 
    clean_tokens = []
    j=0
    for w in tokens:
        if w not in stopwords:
            # 转成词向量索引值 
            X[i, j] = vec.stoi[w]
            j+=1
X

tensor([[ 143,  751,    0,    0],
        [ 219,  161,    0,    0],
        [ 353,  968,    0,    0],
        [3082,  161,    0,    0],
        [4345,    0,    0,    0],
        [2690,    0,    0,    0],
        [ 992,  968,    0,    0],
        [  36,  219,    0,    0],
        [ 992,  161,    0,    0],
        [  94,   33,  751,  439]])

In [297]:
# 指定优化器、损失函数
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

# 模型训练
for epoch in range(1000):
    outputs = model.forward(X) #forward pass
    optimizer.zero_grad() 
    loss = criterion(outputs.reshape(-1), y)
    loss.backward() 
    optimizer.step() 
    if epoch % 100 == 0:
        #print(outputs.shape)
        print(f"Epoch: {epoch}, loss: {loss.item():1.5f}")

model.eval()        
model(X)

Epoch: 0, loss: 1.26617
Epoch: 100, loss: 0.12657
Epoch: 200, loss: 0.07799
Epoch: 300, loss: 0.05655
Epoch: 400, loss: 0.04253
Epoch: 500, loss: 0.03269
Epoch: 600, loss: 0.02547
Epoch: 700, loss: 0.01998
Epoch: 800, loss: 0.01569
Epoch: 900, loss: 0.01227


tensor([[ 0.8730],
        [ 0.9340],
        [ 1.0150],
        [ 0.9696],
        [ 0.9747],
        [-0.0099],
        [-0.0561],
        [ 0.2156],
        [ 0.1366],
        [-0.0677]], grad_fn=<AddmmBackward0>)

## 输入训练数据以外的单字测试

In [299]:
# 测试资料
test_docs = ['great job', 'well done',
        'poor job']

# 转成数值 
X = torch.LongTensor(np.zeros((len(test_docs), maxlen)))
for i, text in enumerate(test_docs):
    tokens = tokenizer(text.lower()) 
    clean_tokens = []
    j=0
    for w in tokens:
        if w not in stopwords:
            X[i, j] = vec.stoi[w]
            j+=1
X

tensor([[353, 664,   0,   0],
        [143, 751,   0,   0],
        [992, 664,   0,   0]])

## 训练资料预测

In [301]:
model.eval()        
model(X)

tensor([[ 0.6623],
        [ 0.8730],
        [-0.4088]], grad_fn=<AddmmBackward0>)