In [2]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms
import torchvision
import pandas as pd
import os
import re
from PIL import Image
from torchvision.models import resnet34
import time

class CIFAR10Dataset(Dataset):
    def __init__(self, file_path=[], crop_size_img=None, crop_size_label=None):
        """para:
            file_path(list): 数据和标签路径,列表元素第一个为图片路径，第二个为标签路径
        """
        # 1 正确读入图片和标签路径
        if len(file_path) != 2:
            raise ValueError("同时需要图片和标签文件夹的路径，图片路径在前")
        self.img_path = file_path[0]
        self.label_path = file_path[1]
        # 2 从路径中取出图片和标签数据的文件名保持到两个列表当中（程序中的数据来源）
        self.imgs = self.read_file(self.img_path)
        labels=pd.read_csv(file_path[1])
        self.labels = labels['y'].to_list()      


    def __getitem__(self, index):
 
        # 从文件名中读取数据（图片和标签都是png格式的图像数据）
        img = self.imgs[index]
        img = Image.open(img)
        label = self.labels[index]
        img = self.img_transform(img, label)
        return img,label

    def __len__(self):
        return len(self.imgs)

    def read_file(self, path):
        """从文件夹中读取数据"""
        files_list = os.listdir(path)
        
        file_path_list = {int(re.search(r'(?<=train\\)[0-9]+',os.path.join(path, img)).group(0))-1:os.path.join(path, img) for img in files_list}
        return file_path_list

    def img_transform(self, img, label):
        """对图片和标签做一些数值处理"""
        transform = transforms.Compose(
            [
                transforms.ToTensor(),
                # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]
        )
        img = transform(img)

        return img

tensor_dataset=CIFAR10Dataset([r'D:\书籍资料整理\kaggle\cifar-10\train',r'D:\书籍资料整理\kaggle\cifar-10\label.csv'])

In [3]:
tensor_dataloader = DataLoader(tensor_dataset,   # 封装的对象
                               batch_size=256,     # 输出的batchsize
                               shuffle=True,     # 随机输出
                               num_workers=0)    # 只有1个进程


In [5]:
def try_gpu(i=0):  #@save
    """如果存在，则返回gpu(i)，否则返回cpu()"""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

In [6]:
EPOCH=5

In [7]:
model = torchvision.models.resnet50(pretrained=False) #使用resnet50模型[残差网络] 不进行预训练
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# 开始训练
device=try_gpu()

for epoch in range(EPOCH):
    start_time = time.time() #记录当前时间
    for i, data in enumerate(tensor_dataloader):
        # data里面包含图像数据（inputs）(tensor类型的）和标签（labels）(tensor类型）。
        inputs, labels = data
        # 将数据加载到相应设备中
        inputs, labels = inputs.to(device), labels.to(device)
        # 前向传播
        outputs = model(inputs)
        # 计算损失函数
        loss = criterion(outputs, labels)
        # 清空上一轮梯度
        optimizer.zero_grad()
        # 反向传播
        loss.backward()
        # 参数更新
        optimizer.step()
    print('epoch{} loss:{:.4f} time:{:.4f}'.format(epoch+1, loss.item(), time.time()-start_time))



epoch1 loss:1.8748 time:870.3292
epoch2 loss:1.2410 time:875.4534
epoch3 loss:1.2395 time:864.8805
epoch4 loss:1.4780 time:853.6953
epoch5 loss:0.8060 time:907.9043


In [None]:
#保存训练模型,下次可以直接加载
file_name = r'D:\书籍资料整理\kaggle\cifar-10\cifar10_resnet.pt'
torch.save(model, file_name)
print(file_name+' saved successfully!')

# 测试
model = torch.load(r'D:\书籍资料整理\kaggle\cifar-10\cifar10_resnet.pt') #加载模型
model.eval() #切换到测试模式
print(model)