In [1]:
classification_names = {
    0: '上身衣服',  # 数字 0 对应“上身衣服”
    1: '鞋',       # 数字 1 对应“鞋”
    2: '包',       # 数字 2 对应“包”
    3: '下身衣服',  # 数字 3 对应“下身衣服”
    4: '手表'      # 数字 4 对应“手表”
}

In [2]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, random_split  # 数据集处理模块

from PIL import Image
import re

In [3]:
# 定义函数：对图片名按字母数字混合排序
def sorted_alphanum(img_names):
    # 转换函数：将数字部分转为int，将字符串转为小写
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda img_name: [convert(x) for x in re.split(r'([0-9]+)', img_name)]
    return sorted(img_names, key=alphanum_key)

# 自定义数据集类型，元素（image, label）
class ImageLabelDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.main_dir = image_dir
        self.transform = transform
        self.image_names = sorted_alphanum(os.listdir(image_dir))  # 获取目录下所有图片文件名，并按字母数字混合排序
        self.labels = pd.read_csv('../common/fashion-labels.csv')      # 读取分类标签csv文件，得到DataFrame
        self.label_dict = dict(zip(self.labels['id'], self.labels['target']))   # 从DataFrame中分离id和标签，得到字典

    def __len__(self):
        return len(self.image_names)

    # 传入图片id，获取数据集元素 （x, y）
    def __getitem__(self, idx):
        # 1. 根据索引号，构建图片的完整路径
        image_loc = os.path.join(self.main_dir, self.image_names[idx])
        # 2. 使用 PIL 打开图片
        image = Image.open(image_loc).convert('RGB')
        # 3. 利用transform转换成tensor
        if self.transform is not None:
            tensor_img = self.transform(image)
        else:
            # 如果为None，就抛出异常
            raise ValueError("transform 参数不能为 None！")
        # 4. 在字典中找出图片对应的标签
        label = self.label_dict[idx]

        # 返回 (噪声图片，原始图片)
        return tensor_img, label

In [4]:
import torchvision.transforms as T  # 图像预处理模块

# 测试主流程
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = T.Compose([
    T.Resize((64, 64)),
    T.ToTensor()
])

# 1. 创建数据集
full_dataset = ImageLabelDataset(image_dir='/root/datasets/imagedata/dataset', transform=transform)
print(len(full_dataset))

24853


In [5]:
# 2. 划分数据集
train_dataset, test_dataset = random_split(full_dataset, [0.75, 0.25])
print(len(train_dataset))
print(len(test_dataset))

18640
6213


In [6]:
# 3. 创建DataLoader
batch_size = 32
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True
)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size)

In [7]:
for (x, y) in train_loader:
    print("x shape: ", x.shape)
    print("y shape: ", y.shape)
    break

x shape:  torch.Size([32, 3, 64, 64])
y shape:  torch.Size([32])


In [8]:
import torch.nn as nn
# 定义模型
class Classifier(nn.Module):
    def __init__(self, n_classes=5):
        super(Classifier, self).__init__()
        # 卷积层
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1)
        # 通用池化层
        self.pool = nn.MaxPool2d(2, 2)
        # 全连接层
        self.linear = nn.Linear(16 * 16 * 16, n_classes)
    
    def forward(self, x):
        # 第一层卷积
        x = torch.relu(self.conv1(x))
        print("第一卷积层输出形状：", x.shape)
        # 第一层池化
        x = self.pool(x)
        print("第一池化层输出形状：", x.shape)
        # 第二层卷积
        x = torch.relu(self.conv2(x))
        print("第二卷积层输出形状：", x.shape)
        # 第二层池化
        x = self.pool(x)
        print("第二池化层输出形状：", x.shape)
        # 扁平化处理：将三维张量展开为一维向量
        x = x.reshape(x.shape[0], -1)
        print("展开为一维向量形状：", x.shape)
        # 全连接层
        x = self.linear(x)
        print("全连接层输出形状：", x.shape)
        return x 

In [9]:
model = Classifier()
print(model)

Classifier(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear): Linear(in_features=4096, out_features=5, bias=True)
)


In [10]:
# 获取一个批次数据进行测试
data_iter = iter(train_loader)
x, y = next(data_iter)
print(x.shape, y.shape)

torch.Size([32, 3, 64, 64]) torch.Size([32])


In [11]:
# 前向传播
output = model(x)
print(output.shape)

第一卷积层输出形状： torch.Size([32, 8, 64, 64])
第一池化层输出形状： torch.Size([32, 8, 32, 32])
第二卷积层输出形状： torch.Size([32, 16, 32, 32])
第二池化层输出形状： torch.Size([32, 16, 16, 16])
展开为一维向量形状： torch.Size([32, 4096])
全连接层输出形状： torch.Size([32, 5])
torch.Size([32, 5])
