In [None]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from deeplearning_model import EarlyStopping, ModelSaver,train_classification_model,plot_learning_curves
from deeplearning_model import evaluate_classification_model as evaluate_model

# 加载数据并处理为tensor

In [None]:
from pathlib import Path

DATA_DIR = Path("./archive/")

# 定义数据预处理
data_transforms = {
    'training': transforms.Compose([
        transforms.Resize((128, 128)),  # 调整图像大小为128x128
        transforms.ToTensor(),  # 将图像转换为Tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 标准化，使用ImageNet的均值和标准差
    ]),
    'validation': transforms.Compose([
        transforms.Resize((128, 128)),  # 调整图像大小为128x128
        transforms.ToTensor(),  # 将图像转换为Tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 标准化，使用ImageNet的均值和标准差
    ]),
}

# 使用ImageFolder加载数据
# ImageFolder假设数据集按照如下方式组织：root/class/image.jpg
train_dataset = datasets.ImageFolder(
    root=DATA_DIR / 'training',
    transform=data_transforms['training']
)

test_dataset = datasets.ImageFolder(
    root=DATA_DIR / 'validation',
    transform=data_transforms['validation']
)

# 打印类别信息
class_names = train_dataset.classes
print(f"类别数量: {len(class_names)}")
print(f"类别名称: {class_names}")

# 查看一个样本
img, label = train_dataset[0]
print(f"图像形状: {img.shape}")  # 应该是[3, 128, 128]
print(f"标签: {label} (类别: {class_names[label]})")


# 自定义数据集类，继承ImageFolder

In [None]:
from pathlib import Path

DATA_DIR = Path("./archive/")

# 自定义数据集类，继承ImageFolder
class MonkeyDataset(datasets.ImageFolder):
    def __init__(self, root, transform=None):
        super().__init__(root=root, transform=transform)
        

# 定义数据预处理
data_transforms = {
    'training': transforms.Compose([
        transforms.Resize((128, 128)),  # 调整图像大小为128x128
        transforms.ToTensor(),  # 将图像转换为Tensor
        transforms.Normalize(mean=[0.4363, 0.4328, 0.3291], std=[0.2085, 0.2032, 0.1988])  # 标准化，使用ImageNet的均值和标准差
    ]),
    'validation': transforms.Compose([
        transforms.Resize((128, 128)),  # 调整图像大小为128x128
        transforms.ToTensor(),  # 将图像转换为Tensor
        transforms.Normalize(mean=[0.4363, 0.4328, 0.3291], std=[0.2085, 0.2032, 0.1988])  # 标准化，使用ImageNet的均值和标准差
    ]),
}

# 使用自定义的MonkeyDataset加载数据
train_dataset = MonkeyDataset(
    root=DATA_DIR / 'training',
    transform=data_transforms['training']
)

test_dataset = MonkeyDataset(
    root=DATA_DIR / 'validation',
    transform=data_transforms['validation']
)

# 打印类别信息
class_names = train_dataset.classes
print(f"类别数量: {len(class_names)}")
print(f"类别名称: {class_names}")

# 查看一个样本
img, label = train_dataset[0]
print(f"图像形状: {img.shape}")  # 应该是[3, 128, 128]
print(f"标签: {label} (类别: {class_names[label]})")


# 计算train_dataset的均值和方差，做数据归一化

In [None]:
def cal_mean_std(ds):
    mean = 0.
    std = 0.
    for img, _ in ds:
        mean += img.mean(dim=(1, 2)) #dim=(1, 2)表示在通道维度上求平均
        std += img.std(dim=(1, 2))  #dim=(1, 2)表示在通道维度上求标准差
    mean /= len(ds)
    std /= len(ds)
    return mean, std
cal_mean_std(train_dataset)

# 创建数据加载器

In [None]:
# 创建数据加载器
batch_size = 32
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True #打乱数据集，每次迭代时，数据集的顺序都会被打乱
)

val_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

# 打印数据集大小信息
print(f"训练集大小: {len(train_dataset)}")
print(f"测试集大小: {len(test_dataset)}")
print(f"批次大小: {batch_size}")
print(f"训练批次数: {len(train_loader)}")


# 搭建模型，定义CNN卷积模型

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        # 第一个卷积块：3通道输入，32通道输出
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # 输入图像为3通道，输出32个特征图
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)  # 保持通道数不变，增强特征提取
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # 最大池化层，减小特征图尺寸
        
        # 第二个卷积块：32通道输入，64通道输出
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # 增加通道数到64，提取更复杂特征
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)  # 保持通道数不变，进一步提取特征
        
        # 第三个卷积块：64通道输入，128通道输出
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)  # 增加通道数到128，提取高级特征
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, padding=1)  # 保持通道数不变，进一步提取特征
        
        # 全连接层
        self.fc1 = nn.Linear(128 * 16 * 16, 256)  # 将特征图展平后连接到256个神经元
        self.dropout = nn.Dropout(0.2) # 添加Dropout层，防止过拟合
        self.fc2 = nn.Linear(256, 10)  # 输出层，10个类别（猴子种类）
        
        # 初始化网络权重
        self.init_weights()
        
    def init_weights(self):
        # 使用Xavier初始化方法初始化权重
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)  
                if m.bias is not None:
                    nn.init.zeros_(m.bias)  # 偏置初始化为0
    
    def forward(self, x):
        # 第一个卷积块
        x = F.relu(self.conv1(x))  # 第一次卷积后激活
        x = F.relu(self.conv2(x))  # 第二次卷积后激活
        x = self.pool(x)  # 池化减小尺寸
        
        # 第二个卷积块
        x = F.relu(self.conv3(x))  
        x = F.relu(self.conv4(x))  
        x = self.pool(x)  # 池化减小尺寸
        
        # 第三个卷积块
        x = F.relu(self.conv5(x))  
        x = F.relu(self.conv6(x))  
        x = self.pool(x)  # 池化减小尺寸
        
        # 展平特征图
        x = x.view(x.size(0), -1)  # 将特征图展平为一维向量
        
        # 全连接层
        x = F.relu(self.fc1(x))  # 第一个全连接层后激活
        x = F.dropout(x, p=0.2, training=self.training)  # 添加Dropout层，有20%的节点被丢弃
        x = self.fc2(x)  # 输出层，不使用激活函数
        
        return x
