In [3]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from sklearn.preprocessing import StandardScaler
from torchvision import transforms

# --- 0. 重新执行数据准备步骤 ---
# (确保这个 Notebook 能独立运行)

# 请确保路径正确
DATA_DIR = './csiro-biomass' # 你的数据文件夹路径
IMAGE_DIR = DATA_DIR # 你的图片文件夹路径

df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
df_wide = pd.pivot_table(df, 
                         index=['image_path', 'Sampling_Date', 'State', 'Species', 'Pre_GSHH_NDVI', 'Height_Ave_cm'], 
                         columns='target_name', 
                         values='target',
                         aggfunc='mean').reset_index()
df_wide = df_wide.rename_axis(None, axis=1)
df_wide['Sampling_Date'] = pd.to_datetime(df_wide['Sampling_Date'])


# --- 1. 定义图像预处理/增强 ---
# 对于验证集，我们只做基础的尺寸调整、Tensor转换和归一化
# 对于训练集，可以加入随机翻转、颜色抖动等数据增强
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(), # 随机水平翻转
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ImageNet 均值和标准差
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


# --- 2. 自定义 PyTorch 数据集类 ---
class BiomassDataset(Dataset):
    def __init__(self, dataframe, image_dir, target_cols, transform=None):
        """
        Args:
            dataframe (pd.DataFrame): 包含所有信息的宽格式 DataFrame.
            image_dir (str): 图像文件所在的目录.
            target_cols (list): 目标列的列名列表.
            transform (callable, optional): 应用于图像的 torchvision 变换.
        """
        self.df = dataframe
        self.image_dir = image_dir
        self.transform = transform
        
        # !! 只提取目标值 !!
        self.targets = np.log1p(self.df[target_cols].values.astype(np.float32))

    def __len__(self):
        # 返回数据集的总样本数
        return len(self.df)

    def __getitem__(self, idx):
        # 根据索引 idx 获取单个样本
        
        # 1. 加载图像
        img_name = self.df.iloc[idx]['image_path']
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        
        # 2. 应用图像变换
        if self.transform:
            image = self.transform(image)
        
        # 3. 获取对应的表格数据和目标
        tabular_row = torch.tensor(self.tabular_data[idx], dtype=torch.float)
        target_row = torch.tensor(self.targets[idx], dtype=torch.float)
        
        return image, tabular_row, target_row

print("--- PyTorch 环境和 Dataset 类定义完成 ---")
print(f"PyTorch 版本: {torch.__version__}")
print("BiomassDataset 类已准备就绪。")


--- PyTorch 环境和 Dataset 类定义完成 ---
PyTorch 版本: 2.5.1
BiomassDataset 类已准备就绪。


In [4]:
import torch
import torch.nn as nn
import torchvision.models as models

class BiModalModel(nn.Module):
    def __init__(self, num_tabular_features, num_targets=5, pretrained=True):
        """
        Args:
            num_tabular_features (int): 输入的表格特征数量.
            num_targets (int): 需要预测的目标数量 (本项目中是 5).
            pretrained (bool): 是否使用预训练的 CNN 权重.
        """
        super(BiModalModel, self).__init__()
        
        # --- 1. 图像分支 (Image Branch) ---
        # 加载一个预训练的 ResNet18 模型
        self.cnn = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None)
        
        # 获取 ResNet18 最后一层全连接层 (fc) 的输入特征数
        num_cnn_features = self.cnn.fc.in_features
        
        # 将原始的 fc 层替换为一个 Identity 层，相当于只做特征提取，不做分类
        self.cnn.fc = nn.Identity()
        
        # --- 2. 表格分支 (Tabular Branch) ---
        self.tabular_mlp = nn.Sequential(
            nn.Linear(num_tabular_features, 128),
            nn.BatchNorm1d(128), # BatchNorm 有助于稳定训练
            nn.ReLU(),
            nn.Dropout(0.3), # Dropout 防止过拟合
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # --- 3. 融合与最终预测 (Fusion Head) ---
        # 将 CNN 特征和 MLP 特征拼接后的总维度
        total_features = num_cnn_features + 64
        
        self.fusion_head = nn.Sequential(
            nn.Linear(total_features, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_targets) # 最终输出 5 个预测值
        )

    def forward(self, image, tabular):
        # 定义数据如何流过网络
        
        # 1. 图像数据通过 CNN
        image_features = self.cnn(image)
        
        # 2. 表格数据通过 MLP
        tabular_features = self.tabular_mlp(tabular)
        
        # 3. 拼接 (Concatenate) 特征
        combined_features = torch.cat((image_features, tabular_features), dim=1)
        
        # 4. 通过融合层得到最终输出
        output = self.fusion_head(combined_features)
        
        return output

print("--- 多模态模型 BiModalModel 定义完成 ---")
# 我们可以创建一个模型实例来测试一下结构是否正确
# (这里我们先假设表格特征有 21 个，和第二阶段一样)
test_model = BiModalModel(num_tabular_features=21)
print("模型结构:")
print(test_model)



--- 多模态模型 BiModalModel 定义完成 ---
模型结构:
BiModalModel(
  (cnn): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-0

In [6]:
# =======================================================
#               纯视觉模型训练完整脚本
# =======================================================

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tqdm.notebook import tqdm
import torch.optim as optim

# --- 1. 模型定义 (VisualModel) ---
class VisualModel(nn.Module):
    def __init__(self, num_targets=5, pretrained=True):
        super(VisualModel, self).__init__()
        # 加载预训练的 ResNet18
        self.cnn = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None)
        num_cnn_features = self.cnn.fc.in_features
        # 替换最后一层
        self.cnn.fc = nn.Sequential(
            nn.Linear(num_cnn_features, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_targets)
        )

    def forward(self, image):
        output = self.cnn(image)
        return output

# --- 2. 数据集定义 (BiomassDataset) ---
class BiomassDataset(Dataset):
    def __init__(self, dataframe, image_dir, target_cols, transform=None):
        self.df = dataframe
        self.image_dir = image_dir
        self.transform = transform
        # Log 变换目标值
        self.targets = np.log1p(self.df[target_cols].values.astype(np.float32))

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image_path']
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        target_row = torch.tensor(self.targets[idx], dtype=torch.float)
        return image, target_row

# --- 3. 准备数据 ---

IMAGE_DIR = './csiro-biomass'  
target_cols = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'GDM_g', 'Dry_Total_g']

# 划分训练/验证集
train_df, val_df = train_test_split(df_wide, test_size=0.2, random_state=42)

# 定义图像变换
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# 创建 DataLoader
train_dataset = BiomassDataset(train_df.reset_index(drop=True), IMAGE_DIR, target_cols, transform=data_transforms['train'])
val_dataset = BiomassDataset(val_df.reset_index(drop=True), IMAGE_DIR, target_cols, transform=data_transforms['val'])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)

# --- 4. 初始化模型和优化器 ---
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"正在使用设备: {device}")

model = VisualModel().to(device)
criterion = nn.MSELoss()
# 使用较小的学习率进行全模型微调
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)

# --- 5. 开始训练 ---
NUM_EPOCHS = 20
best_rmse = float('inf')

print("开始训练纯视觉模型...")

for epoch in range(NUM_EPOCHS):
    # 训练阶段
    model.train()
    running_loss = 0.0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [训练]")
    
    for images, targets in pbar:
        images, targets = images.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    epoch_train_loss = running_loss / len(train_dataset)

    # 验证阶段
    model.eval()
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        pbar_val = tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [验证]")
        for images, targets in pbar_val:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            
            # 还原 log 变换
            preds_orig = np.expm1(outputs.cpu().numpy())
            targets_orig = np.expm1(targets.cpu().numpy())
            
            all_preds.append(preds_orig)
            all_targets.append(targets_orig)
            
    val_rmse = np.sqrt(mean_squared_error(np.concatenate(all_targets), np.concatenate(all_preds)))
    
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} -> 训练损失: {epoch_train_loss:.4f} | 验证 RMSE: {val_rmse:.4f}")
    
    # 保存最佳模型
    if val_rmse < best_rmse:
        best_rmse = val_rmse
        torch.save(model.state_dict(), 'best_visual_model.pth')
        print(f"  -> 新的最佳模型已保存! RMSE: {best_rmse:.4f}")

print(f"\n--- 训练完成 ---\n最好的纯视觉模型 RMSE 是: {best_rmse:.4f}")


正在使用设备: cpu
开始训练纯视觉模型...


Epoch 1/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 1/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/20 -> 训练损失: 5.7119 | 验证 RMSE: 30.0886
  -> 新的最佳模型已保存! RMSE: 30.0886


Epoch 2/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 2/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 2/20 -> 训练损失: 1.5520 | 验证 RMSE: 19.3755
  -> 新的最佳模型已保存! RMSE: 19.3755


Epoch 3/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 3/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 3/20 -> 训练损失: 1.0056 | 验证 RMSE: 16.9863
  -> 新的最佳模型已保存! RMSE: 16.9863


Epoch 4/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 4/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 4/20 -> 训练损失: 0.8457 | 验证 RMSE: 19.2213


Epoch 5/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 5/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 5/20 -> 训练损失: 0.7287 | 验证 RMSE: 17.4288


Epoch 6/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 6/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 6/20 -> 训练损失: 0.6210 | 验证 RMSE: 16.9898


Epoch 7/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 7/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 7/20 -> 训练损失: 0.5995 | 验证 RMSE: 14.9775
  -> 新的最佳模型已保存! RMSE: 14.9775


Epoch 8/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 8/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 8/20 -> 训练损失: 0.5491 | 验证 RMSE: 17.7047


Epoch 9/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 9/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 9/20 -> 训练损失: 0.5836 | 验证 RMSE: 17.0306


Epoch 10/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 10/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 10/20 -> 训练损失: 0.5410 | 验证 RMSE: 15.5383


Epoch 11/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 11/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 11/20 -> 训练损失: 0.5327 | 验证 RMSE: 16.1423


Epoch 12/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 12/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 12/20 -> 训练损失: 0.4921 | 验证 RMSE: 15.3805


Epoch 13/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 13/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 13/20 -> 训练损失: 0.4751 | 验证 RMSE: 14.7648
  -> 新的最佳模型已保存! RMSE: 14.7648


Epoch 14/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 14/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 14/20 -> 训练损失: 0.4655 | 验证 RMSE: 13.2799
  -> 新的最佳模型已保存! RMSE: 13.2799


Epoch 15/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 15/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 15/20 -> 训练损失: 0.4392 | 验证 RMSE: 15.6569


Epoch 16/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 16/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 16/20 -> 训练损失: 0.4115 | 验证 RMSE: 15.7028


Epoch 17/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 17/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 17/20 -> 训练损失: 0.4100 | 验证 RMSE: 16.1350


Epoch 18/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 18/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 18/20 -> 训练损失: 0.3823 | 验证 RMSE: 15.1619


Epoch 19/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 19/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 19/20 -> 训练损失: 0.3874 | 验证 RMSE: 15.3223


Epoch 20/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 20/20 [验证]:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 20/20 -> 训练损失: 0.4026 | 验证 RMSE: 14.9275

--- 训练完成 ---
最好的纯视觉模型 RMSE 是: 13.2799


In [7]:
# =======================================================
#           基于 DINOv2 的纯视觉模型训练完整脚本
# =======================================================

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tqdm.notebook import tqdm
import torch.optim as optim

# --- 1. 定义 DINOv2 VisualModel ---
class VisualModel(nn.Module):
    def __init__(self, num_targets=5):
        super(VisualModel, self).__init__()
        
        print("正在加载 DINOv2 (Small) 模型，首次运行需要下载...")
        # 从 PyTorch Hub 加载 dinov2_vits14 (Small版本，速度快效果好)
        self.backbone = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
        
        # 冻结 DINOv2 骨干网络，只作为特征提取器
        for param in self.backbone.parameters():
            param.requires_grad = False
            
        # DINOv2 Small 的输出维度是 384
        self.embed_dim = 384 
        
        # 定义回归头 (Head)
        self.head = nn.Sequential(
            nn.Linear(self.embed_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_targets)
        )

    def forward(self, image):
        # 提取特征：使用 [CLS] token
        features = self.backbone.forward_features(image)['x_norm_clstoken']
        # 预测
        output = self.head(features)
        return output

# --- 2. 数据集定义 ---
class BiomassDataset(Dataset):
    def __init__(self, dataframe, image_dir, target_cols, transform=None):
        self.df = dataframe
        self.image_dir = image_dir
        self.transform = transform
        # Log 变换目标值
        self.targets = np.log1p(self.df[target_cols].values.astype(np.float32))

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image_path']
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        target_row = torch.tensor(self.targets[idx], dtype=torch.float)
        return image, target_row

# --- 3. 准备数据 ---
# 假设 df_wide 已经存在于内存中 (由之前的 pivot_table 生成)
IMAGE_DIR = './' 
target_cols = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'GDM_g', 'Dry_Total_g']

# 划分数据集
train_df, val_df = train_test_split(df_wide, test_size=0.2, random_state=42)

# DINOv2 推荐的图像变换 (必须 Resize 到 14 的倍数，224 是标准)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2), # 稍微加点颜色增强
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

train_dataset = BiomassDataset(train_df.reset_index(drop=True), IMAGE_DIR, target_cols, transform=data_transforms['train'])
val_dataset = BiomassDataset(val_df.reset_index(drop=True), IMAGE_DIR, target_cols, transform=data_transforms['val'])

# DINOv2 显存占用稍大，如果爆显存可以将 batch_size 调小为 8
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)

# --- 4. 初始化模型和训练组件 ---
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"正在使用设备: {device}")

model = VisualModel().to(device)
criterion = nn.MSELoss()
# 只优化 head 的参数，学习率设为 1e-3
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, weight_decay=1e-4)

# --- 5. 开始训练 ---
NUM_EPOCHS = 20
best_rmse = float('inf')

print("开始训练 DINOv2 纯视觉模型...")

for epoch in range(NUM_EPOCHS):
    # 训练阶段
    model.train()
    running_loss = 0.0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [训练]")
    
    for images, targets in pbar:
        images, targets = images.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    epoch_train_loss = running_loss / len(train_dataset)

    # 验证阶段
    model.eval()
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        pbar_val = tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [验证]")
        for images, targets in pbar_val:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            
            # 还原 log 变换
            preds_orig = np.expm1(outputs.cpu().numpy())
            targets_orig = np.expm1(targets.cpu().numpy())
            
            all_preds.append(preds_orig)
            all_targets.append(targets_orig)
            
    val_rmse = np.sqrt(mean_squared_error(np.concatenate(all_targets), np.concatenate(all_preds)))
    
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} -> 训练损失: {epoch_train_loss:.4f} | 验证 RMSE: {val_rmse:.4f}")
    
    # 保存最佳模型
    if val_rmse < best_rmse:
        best_rmse = val_rmse
        torch.save(model.state_dict(), 'best_visual_model.pth')
        print(f"  -> 新的最佳模型已保存! RMSE: {best_rmse:.4f}")

print(f"\n--- 训练完成 ---\n最好的 DINOv2 模型 RMSE 是: {best_rmse:.4f}")


正在使用设备: cpu
正在加载 DINOv2 (Small) 模型，首次运行需要下载...


Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /Users/nisikin/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to /Users/nisikin/.cache/torch/hub/checkpoints/dinov2_vits14_pretrain.pth
100%|██████████| 84.2M/84.2M [00:20<00:00, 4.41MB/s]

开始训练 DINOv2 纯视觉模型...





Epoch 1/20 [训练]:   0%|          | 0/18 [00:00<?, ?it/s]

FileNotFoundError: [Errno 2] No such file or directory: './train/ID786365141.jpg'