# CNN代码实现
## Framework
input: 480 * 480 *3 image

output: 4*1 vector

model: resnet50

loss function: MSE Loss

optimizer: Adam

In [1]:
# 调用相关python库

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torchvision.models as models
from torch.utils.data import Dataset
from torchvision import transforms

import os
import numpy as np
from PIL import Image


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 确保GPU可以正常使用

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
# 定义CNN模型
class BuildingParamNet(nn.Module):
    def __init__(self, output_dim=4):
        super(BuildingParamNet, self).__init__()
        self.backbone = models.resnet50(pretrained=True)
        num_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()  # 移除最后的全连接层
        
        self.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim)  # 输出维度为 4
        )
    
    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x      # 返回输出

In [3]:
# 超参数设置
batch_size = 16 # 批量大小
num_epochs = 30 # 迭代次数
learning_rate = 1e-4
output_dim = 4  # [a, b, c, d]

In [5]:
# data process

class CustomDataset(Dataset):
    def __init__(self, image_folder, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        self.image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]  # 读取 JPG 文件

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_name = self.image_files[idx]
        image_path = os.path.join(self.image_folder, image_name)

        # 读取图像
        image = Image.open(image_path).convert("RGB")
        
        # 读取对应的参数矩阵
        param_path = image_path.replace('.jpg', '.npy')  # 假设参数文件是 .npy 格式
        params = np.load(param_path)  # 加载参数矩阵

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(params, dtype=torch.float32)

# 图片格式设置
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 调整图片像素大小
    transforms.ToTensor(),  # 转换为Tensor
])

# 设置数据集目录
path_to_train_dataset = '/home/sjtu_dzn/Project/data/easy/train_dataset'
path_to_test_dataset = '/home/sjtu_dzn/Project/data/easy/test_dataset'

# 加载数据
train_dataset = CustomDataset(path_to_train_dataset, transform=transform)
test_dataset = CustomDataset(path_to_test_dataset, transform=transform)

# 创建数据加载器:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
## train stage

# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 初始化模型并移动到 GPU
model = BuildingParamNet(output_dim=4).to(device)

# 初始化损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print("beginning:")
# 训练模型
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, params in train_loader:
        images, params = images.to(device), params.to(device)  # 移动到 GPU
        # 调整 params 的形状以匹配 outputs
        params = params.squeeze(-1)  # 移除最后一个维度
        optimizer.zero_grad()
        outputs = model(images)
        # print("Output shape:", outputs.shape)
        # print("Params shape:", params.shape)
        loss = criterion(outputs, params)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
    
    torch.cuda.empty_cache() # 每次循环结束释放缓存，节省GPU资源

beginning:
Epoch [1/30], Loss: 1.8589
Epoch [2/30], Loss: 0.2976
Epoch [3/30], Loss: 0.1190
Epoch [4/30], Loss: 0.0703
Epoch [5/30], Loss: 0.0926
Epoch [6/30], Loss: 0.1138
Epoch [7/30], Loss: 0.0748
Epoch [8/30], Loss: 0.0534
Epoch [9/30], Loss: 0.0560
Epoch [10/30], Loss: 0.0634
Epoch [11/30], Loss: 0.0860
Epoch [12/30], Loss: 0.1044
Epoch [13/30], Loss: 0.0546
Epoch [14/30], Loss: 0.0692
Epoch [15/30], Loss: 0.0453
Epoch [16/30], Loss: 0.0583
Epoch [17/30], Loss: 0.0686
Epoch [18/30], Loss: 0.0488
Epoch [19/30], Loss: 0.0443
Epoch [20/30], Loss: 0.0494
Epoch [21/30], Loss: 0.0489
Epoch [22/30], Loss: 0.0642
Epoch [23/30], Loss: 0.0603
Epoch [24/30], Loss: 0.0610
Epoch [25/30], Loss: 0.0563
Epoch [26/30], Loss: 0.0480
Epoch [27/30], Loss: 0.0441
Epoch [28/30], Loss: 0.0331
Epoch [29/30], Loss: 0.0438
Epoch [30/30], Loss: 0.0429


In [20]:
# 测试模型
model.eval()
total_loss = 0.0 # 总损失

with torch.no_grad():
    for images, params in test_loader:
        images = images.to(device)  # 将输入数据移动到 GPU
        params = params.to(device)  # 同样将目标参数移动到 GPU
        # 调整 params 的形状以匹配 outputs
        params = params.squeeze(-1)  # 移除最后一个维度
        outputs = model(images)
        print("answer:", outputs)
        loss = criterion(outputs, params)  # 确保目标参数形状正确
        total_loss += loss.item()

print(f"Test Loss: {total_loss/len(test_loader):.4f}")

answer: tensor([[ 3.0336,  3.0618,  2.0173,  1.9724],
        [ 3.9022,  3.0386,  1.0618,  1.9242],
        [ 1.9409,  3.1155,  0.9223,  0.9951],
        [ 3.9559,  1.0257,  1.0752,  1.9180],
        [ 3.9497,  3.0367,  2.0532,  1.9236],
        [ 1.9916,  1.0480,  0.9545,  0.9963],
        [ 4.0121,  1.0376,  2.0935,  1.8889],
        [ 1.9893,  2.1156,  0.9444,  2.0306],
        [ 2.9330,  3.0469,  1.9742,  0.8893],
        [ 2.0240,  2.1179,  1.9953,  0.9513],
        [ 4.0518,  2.0171,  2.0520,  0.8875],
        [ 2.0558,  2.2205,  0.9325,  1.0128],
        [ 1.8926,  3.1019,  0.9325,  2.0664],
        [ 4.0403,  2.0730,  1.0188,  0.9103],
        [ 1.9082,  0.0234,  1.8715, -0.0762],
        [ 2.0174,  0.0366,  0.9140, -0.0255]], device='cuda:0')
answer: tensor([[4.0039, 2.0648, 2.0840, 1.9127],
        [3.9989, 3.0092, 2.0302, 0.8767],
        [3.0146, 1.0732, 0.9622, 0.9418],
        [2.0019, 3.1205, 1.9852, 0.9637],
        [3.0366, 1.0178, 1.9979, 0.9001],
        [1.9465, 3.1

In [None]:
## 保存模型
model_pth = "/home/sjtu_dzn/Project/model/cnn.pth"
torch.save(model.state_dict(), model_pth)
