In [16]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

# 示例输入和输出
inputs = np.array([[10, 3, 0, 6, 0, 0],
                   [2, 4, 0, 0, 7, 0]])
outputs = np.array([[3, 4, 7, 5, 8],
                    [1, 3, 6, 2, 5]])

inputs[inputs == 0] = 0  # 将0替换为100，作为缺失值占位符
inputs = torch.tensor(inputs, dtype=torch.long)
outputs = torch.tensor(outputs, dtype=torch.float32)

# 创建DataLoader
dataset = TensorDataset(inputs, outputs)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

for batch in dataloader:
    print('inputs:', batch[0].shape, batch[0])
    print('outputs:', batch[1].shape, batch[1])
    break


inputs: torch.Size([2, 6]) tensor([[ 2,  4,  0,  0,  7,  0],
        [10,  3,  0,  6,  0,  0]])
outputs: torch.Size([2, 5]) tensor([[1., 3., 6., 2., 5.],
        [3., 4., 7., 5., 8.]])


In [10]:
import math
import torch.nn as nn

def generate_positional_encoding(max_len, d_model):
    pe = torch.zeros(max_len, d_model)
    position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    pe = pe.unsqueeze(0)
    return pe


In [11]:
import torch.nn as nn

class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model=64, nhead=4, num_layers=3, dim_feedforward=256, max_len=500):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(input_dim, d_model, padding_idx=0)
        self.pos_encoder = generate_positional_encoding(max_len, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, output_dim)
    
    def forward(self, src):
        src = self.embedding(src) + self.pos_encoder[:, :src.size(1), :].to(src.device)
        output = self.transformer_encoder(src)
        output = self.fc(output) # (batch_size, seq_len, output_dim)
        output = torch.mean(output, dim=1) # (batch_size, output_dim)
        return output


input_dim = 201  # 假设输入的数字在0到100之间，包括100作为缺失值占位符
output_dim = outputs.size(1)
model = TransformerModel(input_dim, output_dim)


In [12]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [13]:
num_epochs = 10

for epoch in range(num_epochs):
    for batch in dataloader:
        inputs, targets = batch
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/10], Loss: 23.7127
Epoch [2/10], Loss: 18.5774
Epoch [3/10], Loss: 15.6292
Epoch [4/10], Loss: 13.2802
Epoch [5/10], Loss: 11.8894
Epoch [6/10], Loss: 10.8821
Epoch [7/10], Loss: 9.9915
Epoch [8/10], Loss: 9.4551
Epoch [9/10], Loss: 8.9348
Epoch [10/10], Loss: 8.5800


In [14]:
# 模型评估
model.eval()
with torch.no_grad():
    for batch in dataloader:
        inputs, targets = batch
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        print(f'Validation Loss: {loss.item():.4f}')

# 进行预测
with torch.no_grad():
    inputs = torch.tensor([[10, 3, 0, 6, 0, 0]], dtype=torch.long)
    inputs[inputs == 0] = 100
    outputs = model(inputs)
    print(outputs)


Validation Loss: 7.9806
tensor([[1.2308, 1.5811, 3.1411, 1.5231, 2.4864]])
