In [2]:
import torch
import torch.nn as nn
from torchvision.models.resnet import *
import numpy as np
import pandas as pd
import os
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

In [3]:
root = ""
train_file = os.path.join(root, 'train.csv')
test_file = os.path.join(root, 'test.csv')
save_path = 'models'

In [13]:
def check_image():
    """
    检查图片是否全部是(224, 224, 3)
    """
    for i in range(0, 27153):
        img_path = os.path.join(file_path, str(i) + '.jpg')
        img = Image.open(img_path)
        if img.size != (224, 224) or img.mode != 'RGB':
            return False
    return True

def pre_process(df, dictionary=None):
    """
    如果没有传入字典，将自动生成：
        对传入的df的label列用整数替换种类字符
    如果传入字典，将使用已有的字典：
        对传入的df的label列用种类字符代替整数
    """
    if dictionary is not None:
        # 用在已经生成了测试结果，把预测结果转换为种类名称时。
        for id, kinds in dictionary.items():
            if isinstance(kinds, int):
                continue
            df.loc[df['label'] == id, 'label'] = kinds
        return df
    else:
        # 用在将训练集dataframe的label列转换成整数，然后生成字典。
        cnt = 0
        dictionary = {}
        for i in df['label'].unique():
            dictionary[i] = cnt
            dictionary[cnt] = i
            cnt += 1
        for i in df['label'].unique():
            df.loc[df['label'] == i, 'label'] = dictionary[i]
        return df, dictionary

class LeavesDataset(Dataset):

    def __init__(self, file_path, df=None, mode='train'):
        self.file_path = file_path
        self.mode = mode
        self.transform = [transforms.ToTensor()]

        if mode == 'train':
            self.img_label = np.zeros(18353, dtype=np.int64)
            self.img_label[:] = df.iloc[:, 1].values
            self.transform.insert(0, transforms.RandomHorizontalFlip(p=0.5))

        self.transform = transforms.Compose(self.transform)

    def __len__(self):
        if self.mode == 'train':
            return 18353
        else:
            return 8800

    def __getitem__(self, index):

        # 读取图像
        if self.mode == 'test':
            index += 18353
        img_name = str(index) + '.jpg'
        img_path = os.path.join(self.file_path, img_name)
        img = Image.open(img_path)

        # 图像转换(增广)
        img = self.transform(img)

        if self.mode == 'test':
            return img
        else:
            return img, self.img_label[index]

# 是否要冻住模型的前面一些层
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        model = model
        for param in model.parameters():
            param.requires_grad = False

# resnet50模型
def res_model(num_classes, feature_extract = False, pretrained=True):

    net_ft = resnet50(pretrained=pretrained)
    set_parameter_requires_grad(net_ft, feature_extract)
    num_ftrs = net_ft.fc.in_features
    net_ft.fc = nn.Linear(num_ftrs, num_classes)

    return net_ft

def train(net, loss_fn, optimizer, train_iter, num_epoch, device):

    net.to(device)
    len_iter = len(train_iter)

    for epoch in range(46, num_epoch):

        print('training epoch: ', epoch)
        net.train()
        acc_sum = 0.0
        loss_sum = 0.0
        for X, y in tqdm(train_iter):

            optimizer.zero_grad()
            X = X.to(device)
            y = y.to(device)
            output = net(X)
            loss = loss_fn(output, y)
            loss.backward()
            optimizer.step()
            acc = (output.argmax(dim=1) == y).float().mean()

            acc_sum += acc
            loss_sum += loss
        train_loss = loss_sum / len_iter
        train_acc = acc_sum / len_iter

        print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
        # 从38个epoch时（0-36正常，37中断），训练中断了。
        model_path = os.path.join(save_path, 'model_epoch_{}.pth'.format(epoch))
        torch.save(net.state_dict(), model_path)


In [14]:
if __name__ == "__main__":

    # 超参数
    batch_size, lr, weight_decay, num_epoch = 8, 2e-4, 0.001, 60

    # device
    device = 'cuda:0'

    # 网络 resnet50
    net = res_model(176, pretrained=True, feature_extract=False)
    # 中断了一次，从epoch36继续训练
    net.load_state_dict(torch.load('models/model_epoch_45.pth'))
    
    # 数据预处理
    train_label = pd.read_csv(train_file)
    train_label, dictionary = pre_process(train_label)

    # 数据集
    file_path = 'images'
    train_set = LeavesDataset(file_path, train_label, mode='train')
    train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=5)

    # train
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)
    train(net, loss_fn, optimizer, train_iter, num_epoch, device)

training epoch:  46


100%|██████████| 2295/2295 [03:11<00:00, 12.01it/s]


[ Train | 047/060 ] loss = 0.27148, acc = 0.93105
training epoch:  47


100%|██████████| 2295/2295 [03:11<00:00, 12.00it/s]


[ Train | 048/060 ] loss = 0.25151, acc = 0.93813
training epoch:  48


100%|██████████| 2295/2295 [03:11<00:00, 11.97it/s]


[ Train | 049/060 ] loss = 0.25479, acc = 0.93568
training epoch:  49


100%|██████████| 2295/2295 [03:15<00:00, 11.73it/s]


[ Train | 050/060 ] loss = 0.25044, acc = 0.93660
training epoch:  50


100%|██████████| 2295/2295 [03:15<00:00, 11.71it/s]


[ Train | 051/060 ] loss = 0.24766, acc = 0.93922
training epoch:  51


100%|██████████| 2295/2295 [03:13<00:00, 11.87it/s]


[ Train | 052/060 ] loss = 0.24331, acc = 0.94194
training epoch:  52


100%|██████████| 2295/2295 [03:23<00:00, 11.25it/s]


[ Train | 053/060 ] loss = 0.24643, acc = 0.94047
training epoch:  53


100%|██████████| 2295/2295 [03:32<00:00, 10.80it/s]


[ Train | 054/060 ] loss = 0.23809, acc = 0.94047
training epoch:  54


100%|██████████| 2295/2295 [03:30<00:00, 10.92it/s]


[ Train | 055/060 ] loss = 0.23941, acc = 0.94183
training epoch:  55


100%|██████████| 2295/2295 [03:21<00:00, 11.38it/s]


[ Train | 056/060 ] loss = 0.24715, acc = 0.93949
training epoch:  56


100%|██████████| 2295/2295 [03:23<00:00, 11.30it/s]


[ Train | 057/060 ] loss = 0.23317, acc = 0.94319
training epoch:  57


100%|██████████| 2295/2295 [03:13<00:00, 11.88it/s]


[ Train | 058/060 ] loss = 0.24196, acc = 0.93965
training epoch:  58


100%|██████████| 2295/2295 [03:03<00:00, 12.53it/s]


[ Train | 059/060 ] loss = 0.23299, acc = 0.94510
training epoch:  59


100%|██████████| 2295/2295 [03:03<00:00, 12.49it/s]


[ Train | 060/060 ] loss = 0.23903, acc = 0.94346
