In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

In [2]:
random_torch = torch.rand(4,4)
print(random_torch)

tensor([[0.9738, 0.1652, 0.1956, 0.4268],
        [0.0213, 0.2562, 0.2386, 0.0656],
        [0.5477, 0.9836, 0.1516, 0.8134],
        [0.7741, 0.1835, 0.9570, 0.6306]])


In [4]:
dropout_module = nn.Dropout(p=0.1)
tensor_test = torch.ones(5,5)
print(tensor_test)
print(dropout_module(tensor_test))

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
tensor([[1.1111, 1.1111, 1.1111, 1.1111, 1.1111],
        [1.1111, 1.1111, 1.1111, 1.1111, 1.1111],
        [1.1111, 1.1111, 1.1111, 1.1111, 1.1111],
        [1.1111, 1.1111, 1.1111, 1.1111, 1.1111],
        [1.1111, 1.1111, 1.1111, 1.1111, 1.1111]])


In [13]:
encoding = torch.zeros(10, 10)
range_tensor = torch.arange(0, 10)
print(range_tensor)
range_tensor = range_tensor.float().unsqueeze(dim=1)
print(range_tensor)
_2i = torch.arange(0, 10, step=2).float()
print(_2i)
encoding[:,0::2] = torch.sin(range_tensor/(10000**(_2i/10)))
encoding[:,1::2] = torch.cos(range_tensor/(10000**(_2i/10)))
print(encoding)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])
tensor([0., 2., 4., 6., 8.])
tensor([[ 0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00],
        [ 8.4147e-01,  5.4030e-01,  1.5783e-01,  9.8747e-01,  2.5116e-02,
          9.9968e-01,  3.9811e-03,  9.9999e-01,  6.3096e-04,  1.0000e+00],
        [ 9.0930e-01, -4.1615e-01,  3.1170e-01,  9.5018e-01,  5.0217e-02,
          9.9874e-01,  7.9621e-03,  9.9997e-01,  1.2619e-03,  1.0000e+00],
        [ 1.4112e-01, -9.8999e-01,  4.5775e-01,  8.8908e-01,  7.5285e-02,
          9.9716e-01,  1.1943e-02,  9.9993e-01,  1.8929e-03,  1.0000e+00],
        [-7.5680e-01, -6.5364e-01,  5.9234e-01,  8.0569e-01,  1.0031e-01,
          9.9496e-01,  1.5924e-02,  9.9987e-01,  2.5238e-03,  1.0000e+00],
        [-9.5892e-01,  2.8366e-01,  7.1207e-01

In [17]:
# 将输入的词表索引转化为制定维度的embedding向量
class TokenEmbedding(nn.Embedding):
    def __init__(self, vocab_size, d_model):
        super(TokenEmbedding, self).__init__(vocab_size, d_model, padding_idx=1)


class PositionalEmbeddinh(nn.Module):
    def __init__(self, d_model, max_len, device):
        super(PositionalEmbeddinh, self).__init__()
        self.encoding = torch.zeros(max_len, d_model, device=device)
        # 位置编码部分无需进行梯度计算
        self.encoding.requires_grad = False
        pos = torch.arange(0, max_len, device=device)
        pos = pos.float().unsqueeze(dim=1)
        _2i = torch.arange(0, d_model, step=2, device=device).float()
        self.encoding[:, 0::2] = torch.sin(pos / (10000 ** (_2i / d_model)))
        self.encoding[:, 1::2] = torch.cos(pos / (10000 ** (_2i / d_model)))

    def forward(self, x):
        batch_size, seq_len = x.size()
        return self.encoding[:seq_len, :]
    
class TransformerEmbedding(nn.Module):
    def __init__(self, vocab_size, d_model, max_len, drop_prob, device):
        super(TransformerEmbedding, self).__init__()
        self.token_emb = TokenEmbedding(vocab_size, d_model)
        self.positional_emb = PositionalEmbeddinh(d_model, max_len, device)
        self.dropout = nn.Dropout(p=drop_prob)
    
    def forward(self, x):
        token_emb = self.token_emb(x)
        positional_emb = self.positional_emb(x)
        return self.dropout(token_emb + positional_emb)
        