### 1. 原始数据读取
- 并不是把所有图像全部读进内存！
- 而是把所有图像的`路径`和`类别`归纳和梳理出来！
- train_path, test_path
- train_label, test_label

In [1]:
"""
    尝试读取 train 
    千万不要手动拼接路径！而是要增强程序的鲁棒性，用os模块拼接。
    善加使用 os.path.join() 和 os.listdir()
"""
import os

train_root = os.path.join("gesture","train")
train_paths = []
train_labels = []

In [2]:
# 列出路径下所有文件夹名 和 文件名
os.listdir(train_root)

['eight',
 'five',
 'four',
 'nine',
 'one',
 'seven',
 'six',
 'three',
 'two',
 'zero']

In [3]:
for label in os.listdir(train_root):
    label_root = os.path.join(train_root, label)
    # print(f'查看train_root下所有文件夹的路径{label_root}')
    for file in os.listdir(label_root):
        file_path = os.path.join(label_root, file)
        # print(f'每个图片的路径：{file_path}')
        # print(f'每个图片的标签：{label}')
        train_paths.append(file_path)
        train_labels.append(label)

In [4]:
"""
    尝试读取 test
"""
import os

test_root = os.path.join("gesture","test")
test_paths = []
test_labels = []

for label in os.listdir(test_root):
    label_root = os.path.join(test_root, label)
    for file in os.listdir(label_root):
        file_path = os.path.join(label_root, file)
        test_paths.append(file_path)
        test_labels.append(label)

# 验证数量
print(len(train_paths), len(train_labels))
print(len(test_paths), len(test_labels))

1662 1662
400 400


### 2. 构建 标签字典 Label Dict

In [5]:
labels = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
label2idx = {label: idx for idx, label in enumerate(labels)}
idx2label = {idx:label for label, idx in label2idx.items()}

In [6]:
print(label2idx, "\n", idx2label)

{'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9} 
 {0: 'zero', 1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six', 7: 'seven', 8: 'eight', 9: 'nine'}


### 3. 批量化打包【重要，但套路固定】
- 继承 Dataset，自定义一个数据集
- 实例化 DataLoader

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader
# Torch世界里打开图片，最好跟PIL配合
from PIL import Image
# 转张量用的包
from torchvision import transforms

In [8]:
class GestureDataset(Dataset):
    """
        自定义数据集
        __xxx__的方法是 由Python内部调用而不是人调用。
        实际上是重载了父类的方法
    """
    def __init__(self, X, y):
        """
            初始化函数
                X:图像路径
                y:图像标签
        """
        self.X = X
        self.y = y

    
    def __getitem__(self, idx):
        """
            按索引读取样本
        """
        # 获取图像路径
        img_path = self.X[idx]
        # 打开图像
        img = Image.open(fp=img_path)
        # 按照LeNet5 Input的图像大小resize
        img = img.resize((32, 32))
        # 图片还要转成张量,转换后的结构是[C, H, W]的，且数据范围是[0, 1]
        tensor_img = transforms.ToTensor()(img)
        # 将张量数据的范围从[0, 1]改成[-1, 1] => 规范化
        tensor_img = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])(tensor_img)

        # 获取图像标签
        img_label = self.y[idx]
        # 标签转id
        img_label = label2idx.get(img_label)
        # 也得转张量。但这里是数字转张量
        tensor_label = torch.tensor(data=img_label, dtype=torch.float32)
        
        return tensor_img, img_label

        
    def __len__(self):
        """
            返回该数据集 样本个数
        """
        return len(self.X)

In [9]:
# 分别实例化 train数据集和test数据集
train_dataset = GestureDataset(X=train_paths, y=train_labels)
test_dataset = GestureDataset(X=test_paths, y=test_labels)

In [10]:
# 测试__len__()
print(len(train_dataset), len(test_dataset))

1662 400


In [11]:
# 测试__getitem__()方法
train_dataset[0]

(tensor([[[0.3804, 0.3961, 0.4118,  ..., 0.3176, 0.2549, 0.2627],
          [0.3882, 0.4039, 0.4196,  ..., 0.3647, 0.3020, 0.2784],
          [0.3882, 0.4039, 0.4118,  ..., 0.3569, 0.3490, 0.3176],
          ...,
          [0.2392, 0.2627, 0.2863,  ..., 0.2314, 0.2157, 0.1765],
          [0.2235, 0.2392, 0.2627,  ..., 0.2157, 0.2000, 0.1686],
          [0.2078, 0.2235, 0.2392,  ..., 0.1922, 0.1765, 0.1451]],
 
         [[0.3647, 0.3804, 0.4039,  ..., 0.3098, 0.2314, 0.2314],
          [0.3725, 0.3882, 0.4039,  ..., 0.3490, 0.2784, 0.2471],
          [0.3725, 0.3882, 0.4118,  ..., 0.3490, 0.3333, 0.2706],
          ...,
          [0.2235, 0.2471, 0.2706,  ..., 0.2157, 0.2078, 0.1608],
          [0.2078, 0.2235, 0.2471,  ..., 0.2000, 0.1843, 0.1451],
          [0.1843, 0.2000, 0.2235,  ..., 0.1843, 0.1686, 0.1294]],
 
         [[0.3569, 0.3804, 0.4039,  ..., 0.2941, 0.1843, 0.1137],
          [0.3804, 0.3961, 0.4118,  ..., 0.3490, 0.2627, 0.1608],
          [0.3804, 0.3961, 0.4118,  ...,

In [12]:
# 测试__getitem__()方法
test_dataset[-1]

(tensor([[[-0.0745, -0.0431,  0.0431,  ..., -0.0275, -0.0745, -0.1137],
          [-0.0510,  0.0118,  0.1608,  ...,  0.0118, -0.0353, -0.0824],
          [-0.0118,  0.1451,  0.2157,  ...,  0.0588,  0.0118, -0.0431],
          ...,
          [ 0.0824,  0.1294,  0.1843,  ...,  0.0824,  0.0275, -0.0118],
          [ 0.0510,  0.0980,  0.1294,  ...,  0.0353,  0.0118, -0.0275],
          [ 0.0118,  0.0588,  0.0902,  ...,  0.0039, -0.0039, -0.0510]],
 
         [[-0.0902, -0.0588,  0.0275,  ..., -0.0431, -0.0902, -0.1294],
          [-0.0667, -0.0039,  0.1451,  ..., -0.0039, -0.0510, -0.0980],
          [-0.0275,  0.1294,  0.2000,  ...,  0.0431, -0.0039, -0.0588],
          ...,
          [ 0.0745,  0.1216,  0.1765,  ...,  0.0745,  0.0196, -0.0196],
          [ 0.0353,  0.0902,  0.1216,  ...,  0.0275, -0.0118, -0.0510],
          [-0.0118,  0.0510,  0.0824,  ..., -0.0118, -0.0353, -0.0824]],
 
         [[-0.0824, -0.0510,  0.0353,  ..., -0.0275, -0.0824, -0.1216],
          [-0.0588,  0.0039,

In [13]:
# 训练集加载器，训练时需要打乱顺序
# batch_size的设置含义： 每次加载16个图像数据，然后打乱顺序再次加载16个，循环至结束
train_dataloader = DataLoader(dataset=train_dataset, batch_size=100, shuffle=True)

In [14]:
# 遍历dataloader的时候，数据才开始写入内存
for batch_X, batch_y in train_dataloader:
    print(batch_X.shape)
    print(batch_y.shape)
    break

torch.Size([100, 3, 32, 32])
torch.Size([100])


In [15]:
# 测试集加载器, 测试的时候不必打乱顺序
test_dataloader = DataLoader(dataset=test_dataset, batch_size=300, shuffle=False)

In [16]:
# 遍历dataloader的时候，数据才开始写入内存
for batch_X, batch_y in test_dataloader:
    print(batch_X.shape)
    print(batch_y.shape)
    break

torch.Size([300, 3, 32, 32])
torch.Size([300])


### 4. 搭建模型

In [17]:
import torch
from torch import nn
# 参考day08上午搭建的 LeNet5Cover_Gai直接拿过来

In [22]:
class LeNet5Cover_Gai(nn.Module):
    """
        自定义一个卷积神经网络模型 - 对上面的神经网络进行了改良.
        改良：做了流水线
    """
    def __init__(self, in_channels=1, output=10):
        """
            初始化函数
            
            1. Flatten参数的默认值说明
                nn.Flatten(start_dim = 1, end_dim = -1)默认值是1和-1. 意思是在[N, C, H, W]中，把从第1个维度开始到倒数第1个维度进行展平。
                也就是[N, C, H, W]展平成[N, C * H * W]
        """
        super().__init__()
        self.pipline = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=6, kernel_size=5, stride=1, padding=0),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Flatten(),
            nn.Linear(in_features=400, out_features=120),
            nn.Linear(in_features=120, out_features=84),
            nn.Linear(in_features=84, out_features=output)
        )

    def forward(self, x):
        """
            前向传播
        """
        return self.pipline(x)

### 5.训练过程

In [19]:
# 设置训练轮次
epochs = 100
# 设置学习率
learning_rate = 1e-3
# 设备
device = "cuda" if torch.cuda.is_available() else "cpu"
# 实例化模型
model = LeNet5Cover_Gai(in_channels=3)
model.to(device=device)
# 优化器
# optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
optimizer = torch.optim.AdamW(params=model.parameters(), lr=learning_rate)
# 损失函数
loss_fn = nn.CrossEntropyLoss()

In [20]:
def train():
    for epoch in range(epochs):
        total_loss = 0
        count = 0
        for batch_X, batch_y in train_dataloader:
            # 1.数据搬家
            batch_X = batch_X.to(device=device)
            batch_y = batch_y.to(device=device)
            # 2.正向传播
            y_pred = model.forward(batch_X)
            # 3.计算损失
            loss = loss_fn(y_pred, batch_y)
            # 4.反向传播
            loss.backward()
            # 5.优化一步
            optimizer.step()
            # 6.梯度清零
            optimizer.zero_grad()
            # 记录每个epoch的平均loss
            total_loss += loss.item()
            count += 1
        # 7.打印损失
        print(f"Epoch {epoch+1}, Avg Loss: {total_loss/count:.8f}")

In [21]:
train()

Epoch 1, Avg Loss: 2.14648422
Epoch 2, Avg Loss: 1.23935846
Epoch 3, Avg Loss: 0.80352082
Epoch 4, Avg Loss: 0.65865150
Epoch 5, Avg Loss: 0.54208504
Epoch 6, Avg Loss: 0.47820197
Epoch 7, Avg Loss: 0.43114320
Epoch 8, Avg Loss: 0.37498976
Epoch 9, Avg Loss: 0.34552529
Epoch 10, Avg Loss: 0.32183964
Epoch 11, Avg Loss: 0.28210302
Epoch 12, Avg Loss: 0.25835160
Epoch 13, Avg Loss: 0.23069625
Epoch 14, Avg Loss: 0.21517163
Epoch 15, Avg Loss: 0.20172513
Epoch 16, Avg Loss: 0.17752626
Epoch 17, Avg Loss: 0.15319063
Epoch 18, Avg Loss: 0.16415689
Epoch 19, Avg Loss: 0.13115902
Epoch 20, Avg Loss: 0.11875591
Epoch 21, Avg Loss: 0.11414685
Epoch 22, Avg Loss: 0.10612061
Epoch 23, Avg Loss: 0.08914827
Epoch 24, Avg Loss: 0.08714529
Epoch 25, Avg Loss: 0.08339097
Epoch 26, Avg Loss: 0.08284724
Epoch 27, Avg Loss: 0.08065183
Epoch 28, Avg Loss: 0.06231232
Epoch 29, Avg Loss: 0.04699513
Epoch 30, Avg Loss: 0.05402360
Epoch 31, Avg Loss: 0.04005197
Epoch 32, Avg Loss: 0.03167756
Epoch 33, Avg Los

In [None]:
"""
    后续应该继续做以下步骤：
        1. 过程监控（准确率accuracy）
        2. 可视化loss和accuracy曲线
        3. 早停设置（在测试集上，如果连续N=3轮没有性能提升，则停止训练）
        4. 模型的best.pt和last.pt保存
        5. 加载预训练模型 last.pt
        6. 模型加载、推理流程
"""