## 一、对测试集进行预测
测试集：模型训练中因为我们固定随机种子`manual_seed(8)`，所以我们现在取出当时分割之后的占比20%的测试集：`test_data_raw`，我们的模型是没有拿测试集的数据进行训练的。

### 1.读取图片文件并分割出测试集数据

In [3]:
# 重新加载
import torch
from torch import manual_seed
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, Dataset

img_root = '/root/autodl-tmp/GSV/clip' # r"../../data/GSV/clip" 
all_data = datasets.ImageFolder(root=img_root)  # 不要应用tranform
# 拆分数据
train_size = int(0.8 * len(all_data))
test_size = len(all_data) - train_size
# 固定随机种子
manual_seed(8)
train_data_raw, test_data_raw = random_split(all_data, [train_size, test_size])

In [4]:
len(train_data_raw)

63642

### 2.获取类名列表

In [5]:
class_names = all_data.classes

# 数据集的类别的字典形式
class_dict = all_data.class_to_idx
print(class_dict)

{'1653–1705': 0, '1706–1764': 1, '1765–1845': 2, '1846–1910': 3, '1911–1943': 4, '1944–1977': 5, '1978–1994': 6, '1995–2023': 7, 'pre-1652': 8}


### 3.自定义加载dataset并加载

我们需要进一步修改CustomDataset类以返回文件名中的建筑id。然后，在预测循环中收集文件名，并在所有预测完成后将它们与预测结果一起保存到CSV文件中。以下是如何实现它的步骤：

In [6]:
import os
class CustomDataset(Dataset):
    """包装PyTorch数据集以应用转换。"""
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform
        self.imgs = subset.dataset.imgs

    def __getitem__(self, index):
        img, y = self.subset[index] # 这里的y是类别的索引
        
        # 获取文件名
        file_name = self.imgs[self.subset.indices[index]][0]  # 修改这里以匹配您的文件名和路径结构
        # 获取文件名中的id
        id = os.path.basename(file_name).split('--')[-2]
        
        if self.transform:
            img = self.transform(img)

        return img, y, id

    def __len__(self):
        return len(self.subset)

### 4.定义transform并加载测试集

In [7]:
# 只需要调整尺寸和转换为张量
transform = transforms.Compose([
        transforms.Resize(size=(400, 400), antialias=True), 
        transforms.ToTensor()
        
    ])

In [8]:
train_data = CustomDataset(train_data_raw, transform=transform)

In [9]:
# #### 测试能不能获取id
# # 获取数据集中的前几个项
# for i in range(5):  # 例如，检查前5项
#     img, y, id = test_data[i]
#     print(f"Item {i}:")
#     print(f"    ID: {id}")
#     print(f"    Label: {y}")
#     # 如果图片是一个张量，您可以打印其形状
#     print(f"    Image shape: {img.shape if hasattr(img, 'shape') else 'not a tensor'}")
#     print("\n")

### 5.加载模型

In [10]:
from torchvision.models import densenet121
from torchvision.models.densenet import DenseNet121_Weights
from torch import load, cuda
import torch.nn as nn

# 加载预训练的DenseNet121模型
model = densenet121(weights=DenseNet121_Weights.DEFAULT)

## 修改最后一层的输出特征数
num_features = model.classifier.in_features
# 修改为9个类别的输出特征数
model.classifier = nn.Linear(num_features, 9)

# 加载建筑年代的数据集
model_path = '../weights_6/model_epoch_32.pth'
model.load_state_dict(load(model_path))

# 调整到eval评估模式
model.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"

model.to(device)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [12]:
# 创建DataLoader 我在云端跑的4090跑的 BATCH_SIZE = 704，num_workers=12比较合适
BATCH_SIZE = 128 # 704 

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=12)
len(train_loader)

498

### 6.进行预测

In [13]:
# To store the true and predicted labels
true_labels = []
pred_labels = []
# 存储文件名
ids_list = []

In [None]:
from tqdm import tqdm

with torch.inference_mode():
    for images, labels, id in tqdm(train_loader, desc="Evaluating"):
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)

        # Get prediction labels
        test_pred_labels = outputs.argmax(dim=1)
        
        # Store the true and predicted labels for this batch
        true_labels.extend(labels.cpu().numpy())
        pred_labels.extend(test_pred_labels.cpu().numpy())
        ids_list.extend(id)
        
    # 如果您想查看这一批的结果，可以打印或处理这些列表
    # print("True Labels:", true_labels)
    # print("Predicted Labels:", pred_labels)
    # print("IDs:", ids_list)

Evaluating:  91%|█████████ | 453/498 [02:29<00:14,  3.15it/s]

### 7.保存预测结果

In [None]:
import pandas as pd

# 创建一个数据框来保存文件名和预测
df_predictions = pd.DataFrame({
    'id': ids_list,
    'prediction': pred_labels,  # 这是您之前收集的预测列表
    'true_label': true_labels  # 这是您之前收集的真实标签列表
})

# 将数据框写入CSV文件
df_predictions.to_csv('predictions_with_building_age_model_6_on_train_data.csv', index=False)