In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

In [2]:
sentences = [ "i like dog", "i love coffee", "i hate milk"]

word_list = " ".join(sentences).split()
print(word_list)
word_list = list(set(word_list))
print(word_list)
word_dict = {w:i for i,w in enumerate(word_list)}
print(word_dict)
number_dict = {i:w for i,w in enumerate(word_list)}
print(number_dict)
n_class = len(word_dict)
print(n_class)

['i', 'like', 'dog', 'i', 'love', 'coffee', 'i', 'hate', 'milk']
['coffee', 'love', 'dog', 'like', 'milk', 'hate', 'i']
{'coffee': 0, 'love': 1, 'dog': 2, 'like': 3, 'milk': 4, 'hate': 5, 'i': 6}
{0: 'coffee', 1: 'love', 2: 'dog', 3: 'like', 4: 'milk', 5: 'hate', 6: 'i'}
7


In [3]:
n_step = 2  # 考虑前两个词
n_hidden = 2
m = 2

def make_batch(sentences):
    input_batch = []
    target_batch = []
    
    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]
        
        input_batch.append(input)
        target_batch.append(target)
        
    return input_batch, target_batch

In [4]:
input_batch, target_batch = make_batch(sentences)

print('input_batch:', input_batch)
print('target_batch:', target_batch)

input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

print('input_batch:', input_batch)
print('target_batch:', target_batch)


input_batch: [[6, 3], [6, 1], [6, 5]]
target_batch: [2, 0, 4]
input_batch: tensor([[6, 3],
        [6, 1],
        [6, 5]])
target_batch: tensor([2, 0, 4])


In [5]:
input_size = n_step * m # 2 * 2
hidden_size = n_hidden  # 2 隐藏层单元数为 2

class NNLM(nn.Module):
    def __init__(self, input_size, hidden_size, n_class):
        super(NNLM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_class = n_class
        self.emb = nn.Embedding(n_class, m)  # 7 * 2 词库有7个词，每个词2维
        # num_embeddings: 嵌入层字典的大小（单词本里单词个数） embedding_dim: 每个产出向量的大小
        # pytorch的nn.Embedding层讲 one-hot矩阵 和 C 封装起来了
        
        self.fc1 = nn.Linear(self.input_size, self.hidden_size) # 4 * 2
        self.fc2 = nn.Linear(self.hidden_size, self.n_class) # 2 * 7
    
    def forward(self, x):
        x = self.emb(x) #     # (3,2,7) * (7,2) ==> (3,2,2)
#         print('第一层维度:', x.size())    
        # (3,2,2) 第一个元素是 batchsize的维度，这里的训练集数目是 3；
        # 第二个元素是序列长度，即前n-1个词，这里是2；
        # 第三个元素是每个词的向量维度，这里是2.
        
        x = x.view(-1, self.input_size)  # (3,2,2) ==> (3,4)
#         print('第二层维度:', x.size())   # (3,4) 
        
        x = self.fc1(x)             # (3,4) * (4,2) ==> (3,4)
#         print('第三层维度:', x.size())  # (3,2) 
        
        x = torch.tanh(x)            # (3,2) ==> (3,2) 非线性激活
#         print('第四层维度:', x.size())  # (3,2)
        
        output = self.fc2(x)       # (3,2) * (2,7) ==> (3,7) 全连接层
#         print('第五层维度:', output.size())  # (3,7) 
        return output

model = NNLM(input_size, hidden_size, n_class)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(model)

NNLM(
  (emb): Embedding(7, 2)
  (fc1): Linear(in_features=4, out_features=2, bias=True)
  (fc2): Linear(in_features=2, out_features=7, bias=True)
)


In [6]:
for epoch in range(1000):
    output = model(input_batch)
    loss = criterion(output, target_batch)
    
    if (epoch + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    


Epoch: 1000 cost = 0.590239


In [7]:
predict = model(input_batch).data.max(1, keepdim=True)[1]
print(predict)

print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()]) 


tensor([[2],
        [4],
        [4]])
[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'milk', 'milk']


In [8]:
print('输入：',input_batch)
print('embedding之后的结果：',model.emb(input_batch))

输入： tensor([[6, 3],
        [6, 1],
        [6, 5]])
embedding之后的结果： tensor([[[-1.0330, -0.8919],
         [-0.2665,  2.4664]],

        [[-1.0330, -0.8919],
         [ 1.0460, -2.5226]],

        [[-1.0330, -0.8919],
         [-0.5616, -1.8973]]], grad_fn=<EmbeddingBackward>)


In [9]:
print('embedding之后的结果：',model.emb.parameters())
for embed in model.emb.parameters():
    print(embed)  
# nn.Embedding网络中的权重即 本例中词表中所有词汇的词向量

embedding之后的结果： <generator object Module.parameters at 0x00000207741104C8>
Parameter containing:
tensor([[-0.3432,  0.7328],
        [ 1.0460, -2.5226],
        [-0.8760,  0.0882],
        [-0.2665,  2.4664],
        [-0.4747, -0.2021],
        [-0.5616, -1.8973],
        [-1.0330, -0.8919]], requires_grad=True)
