In [6]:
import torch
from torch import nn
from torch.nn import functional as F
import math

In [7]:
#词编码--->这个词是什么
class TokenEmbedding(nn.Embedding):
    def __init__(self,vocab_size,d_model):
        super().__init__(vocab_size,d_model,padding_idx=1)

In [8]:
#位置编码--->这个词在哪
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, max_len, device):
        super().__init__()
        self.encoding = torch.zeros(max_len, d_model, device=device)
        self.encoding.requires_grad = False

        pos = torch.arange(0, max_len, device=device).float().unsqueeze(1)
        _2i = torch.arange(0, d_model, step=2, device=device).float()
        self.encoding[:, 0::2] = torch.sin(pos / (10000 ** (_2i / d_model)))
        self.encoding[:, 1::2] = torch.cos(pos / (10000 ** (_2i / d_model)))

    def forward(self, x):
        batch_size, seq_len = x.size()
        return self.encoding[:seq_len, :]

In [None]:
class TransformerEmbedding(nn.Module):
    def __init__(self,vacab_size,d_model,max_len,dropout,device):
        super().__init__()
        self.token_embedding=TokenEmbedding(vacab_size,d_model)
        self.position_embedding=PositionalEmbedding(d_model,max_len,device)
        self.dropout=nn.Dropout(p=dropout)
        self.d_model=d_model
    def forward(self,x):
        tok_emb=self.token_embedding(x)
        pos_emb=self.position_embedding(x)
        return self.dropout(tok_emb+pos_emb)

In [12]:

def main():
    device = "cuda"
    # 参数设置
    vocab_size = 10  # 假设词表 10 个词
    d_model = 4      # 每个词向量 4 维
    max_len = 6      # 最大序列长度 6
    dropout = 0.1
    
    # 创建 TransformerEmbedding
    embedding_layer = TransformerEmbedding(vocab_size, d_model, max_len, dropout, device)
    embedding_layer = embedding_layer.to(device)
    
    # 构造输入 token id，batch_size=2, seq_len=4
    tokens = torch.tensor([
        [2, 3, 1, 7],
        [5, 1, 1, 9]
    ], dtype=torch.long,device=device)
    
    # 获取最终 embedding
    output = embedding_layer(tokens)
    
    print("输入 token id:")
    print(tokens)
    print("\n最终 embedding (带位置编码):")
    print(output)
    print("\n形状:", output.shape)

if __name__ == "__main__":
    main()


输入 token id:
tensor([[2, 3, 1, 7],
        [5, 1, 1, 9]], device='cuda:0')

最终 embedding (带位置编码):
tensor([[[-0.5092,  1.1175,  0.0351,  1.4108],
         [-1.2399,  0.9976,  0.2590,  1.8516],
         [ 1.0103, -0.4624,  0.0222,  1.1109],
         [ 1.4013, -0.0000,  1.2351,  3.6217]],

        [[ 0.3338,  1.7399,  0.0000,  1.1599],
         [ 0.9350,  0.6003,  0.0111,  1.1111],
         [ 1.0103, -0.4624,  0.0000,  1.1109],
         [-0.3748, -0.6811,  1.2260,  0.4909]]], device='cuda:0',
       grad_fn=<NativeDropoutBackward0>)

形状: torch.Size([2, 4, 4])
