In [None]:
import torch
# 创建一个需要梯度的张量
tensor_requires_grad = torch.tensor([1.0], requires_grad=True)

# 进行一些操作
tensor_result = tensor_requires_grad * 2

# 计算梯度
tensor_result.backward()
print(tensor_requires_grad.grad)  # 输出梯度

tensor([1.], requires_grad=True)
tensor([2.], grad_fn=<MulBackward0>)
tensor([2.])


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

# 1. 定义一个简单的神经网络模型
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(2, 2)  # 输入层到隐藏层
        self.fc2 = nn.Linear(2, 1)  # 隐藏层到输出层
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))  # ReLU 激活函数
        x = self.fc2(x)
        return x

# 2. 创建模型实例
model = SimpleNN()

# 3. 定义损失函数和优化器
criterion = nn.MSELoss()  # 均方误差损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam 优化器

# 4. 假设我们有训练数据 X 和 Y
X = torch.randn(10, 2)  # 10 个样本，2 个特征
Y = torch.randn(10, 1)  # 10 个目标值

# 5. 训练循环
for epoch in range(100):  # 训练 100 轮
    optimizer.zero_grad()  # 清空之前的梯度
    output = model(X)  # 前向传播
    loss = criterion(output, Y)  # 计算损失
    loss.backward()  # 反向传播
    optimizer.step()  # 更新参数
    
    # 每 10 轮输出一次损失
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/100], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 1.2693
Epoch [20/100], Loss: 1.2239
Epoch [30/100], Loss: 1.1802
Epoch [40/100], Loss: 1.1382
Epoch [50/100], Loss: 1.0981
Epoch [60/100], Loss: 1.0598
Epoch [70/100], Loss: 1.0232
Epoch [80/100], Loss: 0.9883
Epoch [90/100], Loss: 0.9551
Epoch [100/100], Loss: 0.9234


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class STTransformer(nn.Module):
    def __init__(
        self,
        d_model=512,
        nhead=8,
        num_encoder_layers=6,
        dim_feedforward=2048,
        dropout=0.1,
        max_seq_len=100,
        max_gps_bins=1000,
        multimodal_dim=256,
    ):
        super().__init__()

        # Embeddings for time, GPS and multimodal inputs
        self.time_embed = nn.Embedding(max_seq_len, d_model)
        self.gps_embed  = nn.Embedding(max_gps_bins, d_model)
        self.mm_proj    = nn.Linear(multimodal_dim, d_model)

        # Positional encoding for sequence positions
        self.pos_encoder = PositionalEncoding(d_model, dropout, max_len=max_seq_len)

        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation="relu"
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_encoder_layers
        )

        # Final projection
        self.output_proj = nn.Linear(d_model, d_model)

    def forward(self, time_seq, gps_seq, multimodal_feats):
        """
        Args:
            time_seq: LongTensor of shape (batch_size, seq_len) with time indices
            gps_seq:  LongTensor of shape (batch_size, seq_len) with GPS-bin indices
            multimodal_feats: FloatTensor of shape (batch_size, seq_len, multimodal_dim)
        Returns:
            embeddings: FloatTensor of shape (batch_size, seq_len, d_model)
        """
        # embed each modality
        t_emb = self.time_embed(time_seq)             # (B, L, D)
        g_emb = self.gps_embed(gps_seq)               # (B, L, D)
        mm_emb = self.mm_proj(multimodal_feats)       # (B, L, D)

        # sum embeddings
        x = t_emb + g_emb + mm_emb                    # (B, L, D)
        x = self.pos_encoder(x)                       # add positional encoding

        # transformer expects (L, B, D)
        x = x.transpose(0, 1)                         # (L, B, D)
        x = self.transformer_encoder(x)               # (L, B, D)
        x = x.transpose(0, 1)                         # (B, L, D)

        # final projection
        out = self.output_proj(x)                     # (B, L, D)
        return out

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        # create positional encoding matrix once
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(
            torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # shape (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x: FloatTensor of shape (batch_size, seq_len, d_model)
        """
        x = x + self.pe[:, : x.size(1), :]
        return self.dropout(x)

# Example usage
if __name__ == "__main__":
    batch_size = 4
    seq_len = 50
    multimodal_dim = 256

    # Dummy inputs
    time_seq = torch.randint(0, seq_len, (batch_size, seq_len))
    gps_seq  = torch.randint(0, 1000, (batch_size, seq_len))
    mm_feats = torch.randn(batch_size, seq_len, multimodal_dim)

    model = STTransformer(
        d_model=512,
        nhead=8,
        num_encoder_layers=4,
        dim_feedforward=1024,
        dropout=0.1,
        max_seq_len=seq_len,
        max_gps_bins=1000,
        multimodal_dim=multimodal_dim,
    )
    outputs = model(time_seq, gps_seq, mm_feats)  # (batch_size, seq_len, d_model)
    print("Output shape:", outputs.shape)


Output shape: torch.Size([4, 50, 512])


