In [15]:
import os
import pathlib
import torch

from PIL import Image

import torchinfo
from torch import nn
from torch.nn import functional as F
from torch.utils import data
from torch.utils.data import Dataset
import torchvision.datasets
from torchvision import transforms

from d2l import torch as d2l

from typing import Tuple, Dict, List
from sklearn.model_selection import train_test_split
import cv2

import pandas as pd

import albumentations
from albumentations.pytorch.transforms import ToTensorV2

In [16]:
train = pd.read_csv("../data/train.csv")
labels = list(pd.read_csv("../data/train.csv")['label'])

labels_unique = list(set(list(labels)))  #list index--labels

label_nums = []

for i in range(len(labels)):
    label_nums.append(labels_unique.index(labels[i]))

train['number'] = label_nums

In [17]:
train

Unnamed: 0,image,label,number
0,images/0.jpg,maclura_pomifera,44
1,images/1.jpg,maclura_pomifera,44
2,images/2.jpg,maclura_pomifera,44
3,images/3.jpg,maclura_pomifera,44
4,images/4.jpg,maclura_pomifera,44
...,...,...,...
18348,images/18348.jpg,aesculus_glabra,0
18349,images/18349.jpg,liquidambar_styraciflua,69
18350,images/18350.jpg,cedrus_libani,61
18351,images/18351.jpg,prunus_pensylvanica,111


In [18]:
test = pd.read_csv("../data/test.csv")

In [19]:
train_data, eval_data = train_test_split(train, test_size=0.2, stratify=train['number'])

In [20]:
class LeafDataset(Dataset):
    """
    树叶数据集的训练集 自定义Dataset
    """

    def __init__(self, train_csv, transform=None, test=False):
        """
        train_path : 传入记录图像路径及其标号的csv文件
        transform : 对图像进行的变换
        """
        super().__init__()
        self.train_csv = train_csv
        self.image_path = list(self.train_csv['image'])  #图像所在地址记录
        self.test = test
        if not self.test:
            self.label_nums = list(self.train_csv['number'])  #图像的标号记录
        self.transform = transform

    def __getitem__(self, idx):
        """
        idx : 所需要获取的图像的索引
        return : image， label
        """
        image = cv2.imread(os.path.join("../data", self.image_path[idx]))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            image = self.transform(image=image)['image']
        if not self.test:
            label = self.label_nums[idx]
            return image, label
        else:
            return image

    def __len__(self):
        return len(self.image_path)

In [21]:
lr, num_epochs, batch_size = 0.1, 20, 32

In [22]:
transforms_train = albumentations.Compose(
    [
        albumentations.Resize(320, 320),
        albumentations.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ToTensorV2(p=1.0)
    ]
)

transforms_test = albumentations.Compose(
    [
        albumentations.Resize(320, 320),
        albumentations.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ToTensorV2(p=1.0)
    ]
)

In [23]:
train_iter = data.DataLoader(
    LeafDataset(train_data, transform=transforms_train),
    batch_size, shuffle=True)
eval_iter = data.DataLoader(
    LeafDataset(eval_data, transform=transforms_test),
    batch_size, shuffle=True)

In [24]:
net = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V2)
net.fc = nn.LazyLinear(176)



In [25]:
torchinfo.summary(net, input_size=[32, 3, 224, 224])

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [32, 176]                 --
├─Conv2d: 1-1                            [32, 64, 112, 112]        9,408
├─BatchNorm2d: 1-2                       [32, 64, 112, 112]        128
├─ReLU: 1-3                              [32, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [32, 64, 56, 56]          --
├─Sequential: 1-5                        [32, 256, 56, 56]         --
│    └─Bottleneck: 2-1                   [32, 256, 56, 56]         --
│    │    └─Conv2d: 3-1                  [32, 64, 56, 56]          4,096
│    │    └─BatchNorm2d: 3-2             [32, 64, 56, 56]          128
│    │    └─ReLU: 3-3                    [32, 64, 56, 56]          --
│    │    └─Conv2d: 3-4                  [32, 64, 56, 56]          36,864
│    │    └─BatchNorm2d: 3-5             [32, 64, 56, 56]          128
│    │    └─ReLU: 3-6                    [32, 64, 56, 56]          --
│ 

In [26]:
device = 'cuda:0'

In [27]:
def evaluate_accuracy_gpu(net, data_iter, device=None):
    """Compute the accuracy for a model on a dataset using a GPU.

    Defined in :numref:`sec_utils`"""
    if isinstance(net, nn.Module):
        net.eval()  # Set the model to evaluation mode
        if not device:
            device = next(iter(net.parameters())).device
    # No. of correct predictions, no. of predictions
    metric = d2l.Accumulator(2)

    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):
                # Required for BERT Fine-tuning (to be covered later)
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y = y.to(device)
            metric.add(d2l.accuracy(net(X), y), d2l.size(y))
    return metric[0] / metric[1]

In [28]:
print('training on', device)

net.to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss(reduction='mean')
# animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],
#                         legend=['train loss', 'train acc', 'test acc'])

timer, num_batches = d2l.Timer(), len(train_iter)
for epoch in range(num_epochs):
    # Sum of training loss, sum of training accuracy, no. of examples
    metric = d2l.Accumulator(3)
    net.train()
    for i, (X, y) in enumerate(train_iter):
        timer.start()
        optimizer.zero_grad()
        X, y = X.to(device), y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        l.backward()
        optimizer.step()
        with torch.no_grad():
            metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
        timer.stop()
        train_l = metric[0] / metric[2]
        train_acc = metric[1] / metric[2]
        if ((i + 1)
                % (num_batches // 5) == 0 or i == num_batches - 1):
            print(epoch + (i + 1) / num_batches, (train_l, train_acc, None))
    test_acc = evaluate_accuracy_gpu(net, eval_iter)
    print(epoch + 1, (None, None, test_acc))
print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
      f'test acc {test_acc:.3f}')
print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
      f'on {str(device)}')

training on cuda:0
0.19825708061002179 (4.801209722246442, 0.059752747252747256, None)
0.39651416122004357 (3.7803937943427117, 0.18956043956043955, None)
0.5947712418300654 (3.058370834741837, 0.30597527472527475, None)
0.7930283224400871 (2.5927004491562373, 0.3916552197802198, None)
0.9912854030501089 (2.2573249193338247, 0.4596840659340659, None)
1.0 (2.244907999473897, 0.4619942787086228, None)
1 (None, None, 0.6943612094797058)
1.1982570806100217 (0.6161067266385634, 0.8135302197802198, None)
1.3965141612200436 (0.5955358280257864, 0.8197115384615384, None)
1.5947712418300655 (0.5602886621659492, 0.8310439560439561, None)
1.7930283224400871 (0.5352589124506646, 0.8395432692307693, None)
1.9912854030501088 (0.5175280952519112, 0.8441620879120879, None)
2.0 (0.5159507674362854, 0.8445715842528266, None)
2 (None, None, 0.8163988014165078)
2.1982570806100217 (0.2891595181036781, 0.9189560439560439, None)
2.3965141612200433 (0.26693198486016345, 0.9191277472527473, None)
2.59477124183

In [29]:
## Origin -- test acc: 0.958
## Resize -- test acc: 0.965 0.966
## Resize and HorizontalFlip and VerticalFlip -- test acc: 0.953 
## Resize and HorizontalFlip and VerticalFlip and Rotate and RandomBrightnessContrast and ShiftScaleRotate -- test acc 0.952