<a href="https://colab.research.google.com/github/nanpolend/machine-learning/blob/master/Kaggle_Stanford_RNA_3D_Foldindeepseek_chatgpt4o.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

flowchart LR
    RawData --> Preprocess -->|轉換為 Tensor| DataLoader
    DataLoader -->|輸入批次數據| Model
    Model -->|預測結果| Loss[計算損失]
    Loss -->|反向傳播| Optimizer[更新參數]
    Optimizer -->|調整學習率| Scheduler
    Scheduler -->|動態更新| Optimizer
    Loss -->|記錄指標| TensorBoard
    Model -->|保存最佳權重| Checkpoint

In [5]:
!pip install kaggle --upgrade



In [6]:
# ========== Kaggle API 配置 ==========
import os
from kaggle_secrets import UserSecretsClient

# Kaggle Notebook 专用加载方式
try:
    user_secrets = UserSecretsClient()
    os.environ['KAGGLE_USERNAME'] = user_secrets.get_secret("KAGGLE_USERNAME")
    os.environ['KAGGLE_KEY'] = user_secrets.get_secret("KAGGLE_KEY")
except:
    pass  # 本地环境使用 ~/.kaggle/kaggle.json

# ========== 下载比赛数据 ==========
!kaggle competitions download -c stanford-rna-3d-folding -p /kaggle/working/

# ========== 解压数据 ==========
import zipfile
with zipfile.ZipFile('/kaggle/working/stanford-rna-3d-folding.zip', 'r') as zip_ref:
    zip_ref.extractall('/kaggle/working/data')

print("✅ 数据准备完成！")

ModuleNotFoundError: No module named 'kaggle_secrets'

In [1]:
# ========== 环境设置 ==========
# 安装依赖（若在 Colab 运行需先执行此命令）
# !pip install torch==2.3.0+cu121 torchvision==0.18.0+cu121 --extra-index-url https://download.pytorch.org/whl/cu121
# !pip install tensorboard

# ========== 导入库 ==========
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import GradScaler, autocast
from torch.utils.tensorboard import SummaryWriter
import numpy as np

# ========== 超参数配置 ==========
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE = 128
LR = 1e-3
EPOCHS = 50
PATIENCE = 5
MODEL_SAVE_PATH = './best_model.pth'

# ========== 数据增强与标准化 ==========
CIFAR_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR_STD = (0.2023, 0.1994, 0.2010)

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR_MEAN, CIFAR_STD)
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR_MEAN, CIFAR_STD)
])

# ========== 数据加载 ==========
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform
)
test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform
)

train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=2, pin_memory=True
)
test_loader = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=2, pin_memory=True
)

# ========== 模型定义 ==========
class EnhancedCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # 特征提取层
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 32x32 → 16x16

            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 16x16 → 8x8

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 8x8 → 4x4
        )
        # 分类层
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(256, 10)
        )
        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = EnhancedCNN().to(DEVICE)

# ========== 训练工具初始化 ==========
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
scaler = GradScaler()
writer = SummaryWriter()

# ========== 训练函数 ==========
def train_epoch(epoch):
    model.train()
    total_loss = 0.0

    for images, labels in train_loader:
        images = images.to(DEVICE, non_blocking=True)
        labels = labels.to(DEVICE, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item() * images.size(0)

    return total_loss / len(train_dataset)

# ========== 验证函数 ==========
def validate(epoch):
    model.eval()
    total_loss = 0.0
    correct = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()

    return total_loss / len(test_dataset), correct / len(test_dataset)

# ========== 主训练循环 ==========
best_acc = 0.0
patience_counter = 0

print("🚀 训练启动 | 设备:", DEVICE)
for epoch in range(1, EPOCHS + 1):
    train_loss = train_epoch(epoch)
    val_loss, val_acc = validate(epoch)
    scheduler.step()

    # 记录到TensorBoard
    writer.add_scalars('Loss', {'train': train_loss, 'val': val_loss}, epoch)
    writer.add_scalar('Accuracy/val', val_acc, epoch)
    writer.add_scalar('Learning Rate', optimizer.param_groups[0]['lr'], epoch)

    # 控制台输出
    print(f"Epoch {epoch:02d} | "
          f"Train Loss: {train_loss:.4f} | "
          f"Val Loss: {val_loss:.4f} | "
          f"Val Acc: {val_acc:.2%} | "
          f"LR: {optimizer.param_groups[0]['lr']:.2e}")

    # Early Stopping逻辑
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        patience_counter = 0
        print(f"💾 保存最佳模型 (准确率 {val_acc:.2%})")
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print("🛑 提前停止训练!")
            break

writer.close()

# ========== 最终测试 ==========
print("\n🔍 加载最佳模型进行最终测试...")
model.load_state_dict(torch.load(MODEL_SAVE_PATH))
final_loss, final_acc = validate(0)
print(f"🏆 最终测试准确率: {final_acc:.2%}")

print("🎉 训练完成！")

100%|██████████| 170M/170M [00:05<00:00, 29.5MB/s]


🚀 训练启动 | 设备: cuda


  scaler = GradScaler()
  with autocast():


Epoch 01 | Train Loss: 1.8030 | Val Loss: 1.4584 | Val Acc: 46.69% | LR: 9.99e-04
💾 保存最佳模型 (准确率 46.69%)
Epoch 02 | Train Loss: 1.4744 | Val Loss: 1.2801 | Val Acc: 53.72% | LR: 9.96e-04
💾 保存最佳模型 (准确率 53.72%)
Epoch 03 | Train Loss: 1.3282 | Val Loss: 1.1631 | Val Acc: 59.19% | LR: 9.91e-04
💾 保存最佳模型 (准确率 59.19%)
Epoch 04 | Train Loss: 1.2198 | Val Loss: 1.0568 | Val Acc: 61.79% | LR: 9.84e-04
💾 保存最佳模型 (准确率 61.79%)
Epoch 05 | Train Loss: 1.1362 | Val Loss: 1.0127 | Val Acc: 63.91% | LR: 9.76e-04
💾 保存最佳模型 (准确率 63.91%)
Epoch 06 | Train Loss: 1.0688 | Val Loss: 0.9244 | Val Acc: 67.37% | LR: 9.65e-04
💾 保存最佳模型 (准确率 67.37%)
Epoch 07 | Train Loss: 1.0193 | Val Loss: 0.8927 | Val Acc: 68.38% | LR: 9.52e-04
💾 保存最佳模型 (准确率 68.38%)
Epoch 08 | Train Loss: 0.9763 | Val Loss: 0.8691 | Val Acc: 69.76% | LR: 9.38e-04
💾 保存最佳模型 (准确率 69.76%)
Epoch 09 | Train Loss: 0.9448 | Val Loss: 0.8387 | Val Acc: 70.68% | LR: 9.22e-04
💾 保存最佳模型 (准确率 70.68%)
Epoch 10 | Train Loss: 0.9092 | Val Loss: 0.7814 | Val Acc: 73.0