In [62]:
# 导入必备的工具包
import torch

# 预定义的网络层torch.nn, 工具开发者已经帮助我们开发好的一些常用层,
# 比如，卷积层, lstm层, embedding层等, 不需要我们再重新造轮子.
import torch.nn as nn

# 数学计算工具包
import math

# torch中变量封装函数Variable.
from torch.autograd import Variable

In [63]:
class Embadding(nn.Module):
    def __init__(self, vocab_size, embed_size):
        super(Embadding, self).__init__()

        self.lut = nn.Embedding(vocab_size, embed_size)
        self.embed_size = embed_size
    def forward(self, input):
        return self.lut(input) * math.sqrt(self.embed_size)

In [64]:
embadding = Embadding(1000,128)
# 这个不能超过vocab_size
input = torch.LongTensor([[1,2,4,5],[4,3,2,9],[1,4,999,9]])
x = embadding(input)

In [70]:
class PositionalEncoding(nn.Module):
    def __init__(self, embed_dim, max_len=5000):
        """
        embed_dim: 嵌入维度
        max_len: 序列的最大长度
        """
        super(PositionalEncoding, self).__init__()
        self.embed_dim = embed_dim
        self.dropout = nn.Dropout(0.2)

        # 创建位置编码矩阵
        pe = torch.zeros(max_len, embed_dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, embed_dim, 2) *
                             -(math.log(10000.0) / embed_dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
    def forward(self, input):
        return self.dropout(input + Variable(self.pe[:, :input.size(1)],requires_grad=False))
ps = PositionalEncoding(128)
ps(x),x

(tensor([[[ -0.0000,  28.7924,  -0.0000,  ...,   0.0000, -17.9705,  -6.7543],
          [ 17.8949,  -0.0000,  12.1308,  ...,  10.2210, -14.2175,  -0.0000],
          [  2.3334,  15.9196,   6.3648,  ...,   0.5610,  -0.0000,  -0.0000],
          [-17.1841, -12.2876, -12.9305,  ...,  -5.4904, -19.3377,   0.0000]],
 
         [[  1.1968,  17.6898,   5.1310,  ...,   0.5610,  -8.2305, -11.1611],
          [ 12.6418,  -3.4836,  -4.0453,  ...,   5.2866,   8.0946, -12.1409],
          [ 17.9797,  -0.0000,  12.4125,  ...,  10.2210, -14.2174,  -9.5930],
          [ 11.4116,  -3.1542,  -9.1871,  ...,  29.8916,   0.0000,  -3.0731]],
 
         [[ -9.1296,   0.0000,  -4.7802,  ...,  13.1341, -17.9705,  -6.7543],
          [  2.2486,  17.1152,   6.0831,  ...,   0.0000,  -8.2304,  -0.0000],
          [ 12.2383,  -8.3959,   0.0000,  ...,  20.3719, -10.3864,  -6.4068],
          [ 11.4116,  -0.0000,  -0.0000,  ...,  29.8916,   1.0979,  -3.0731]]],
        grad_fn=<MulBackward0>),
 tensor([[[ -7.3037,  2