## Кривоногов Н.В., PyTorch, практическое задание № 4

- Обучите CNN (самописная) на CIFAR-100.
- Обучите CNN на CIFAR-100 через дообучение ImageNet Resnet-50.
- *Обучите CNN на CIFAR-100 через дообучение ImageNet Resnet-50 с аугментацией данных.

In [5]:
import numpy as np
import torch

from torch import nn
from torch.nn import functional as F
from PIL import Image
from torchvision import transforms, datasets
from tqdm import tqdm

from sklearn.model_selection import train_test_split

In [12]:
dataset = datasets.CIFAR100(root='data/', train=True, download=True)


class MyOwnCifar(torch.utils.data.Dataset):

    def __init__(self, init_dataset, transform=None):
        self._base_dataset = init_dataset
        self.transform = transform

    def __len__(self):
        return len(self._base_dataset)

    def __getitem__(self, idx):
        img = self._base_dataset[idx][0]
        if self.transform is not None:
            img = self.transform(img)
        return img, self._base_dataset[idx][1]

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:05<00:00, 28351862.64it/s]


Extracting data/cifar-100-python.tar.gz to data/


In [13]:
trans_actions = transforms.Compose([transforms.Resize(44),
                                    transforms.RandomCrop(32, padding=4),
                                    transforms.ToTensor()])


def train_valid_split(Xt):
    X_train, X_test = train_test_split(Xt, test_size=0.05, random_state=13)
    return X_train, X_test

train_dataset, valid_dataset = train_valid_split(dataset)

train_dataset = MyOwnCifar(train_dataset, trans_actions)
valid_dataset = MyOwnCifar(valid_dataset, transforms.ToTensor())

In [4]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                          batch_size=128,
                          shuffle=True,
                          num_workers=2)
valid_loader = torch.utils.data.DataLoader(valid_dataset,
                          batch_size=128,
                          shuffle=False,
                          num_workers=1)

In [5]:
# classes = ['beaver', 'dolphin', 'otter', 'seal', 'whale',
#            'aquarium fish', 'flatfish', 'ray', 'shark', 'trout',
#            'orchids', 'poppies', 'roses', 'sunflowers', 'tulips',
#            'bottles', 'bowls', 'cans', 'cups', 'plates',
#            'apples', 'mushrooms', 'oranges', 'pears', 'sweet peppers',
#            'clock', 'computer keyboard', 'lamp', 'telephone', 'television',
#            'bed', 'chair', 'couch', 'table', 'wardrobe',
#            'bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach',
#            'bear', 'leopard', 'lion', 'tiger', 'wolf',
#            'bridge', 'castle', 'house', 'road', 'skyscraper',
#            'cloud', 'forest', 'mountain', 'plain', 'sea',
#            'camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo',
#            'fox', 'porcupine', 'possum', 'raccoon', 'skunk',
#            'crab', 'lobster', 'snail', 'spider', 'worm',
#            'baby', 'boy', 'girl', 'man', 'woman',
#            'crocodile', 'dinosaur', 'lizard', 'snake', 'turtle',
#            'hamster', 'mouse', 'rabbit', 'shrew', 'squirrel',
#            'maple', 'oak', 'palm', 'pine', 'willow',
#            'bicycle', 'bus', 'motorcycle', 'pickup truck', 'train',
#            'lawn-mower', 'rocket', 'streetcar', 'tank', 'tractor']

In [6]:
# import matplotlib.pyplot as plt


# for img, lbl in train_loader:
#     print(img.shape)
#     print(classes[lbl[0]])
#     plt.imshow(img[0].permute(1, 2, 0))
#     break

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [8]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.dp_one = nn.Dropout(0.2)
        # self.dp_two = nn.Dropout(0.2)

        self.bn_one = torch.nn.BatchNorm2d(3)
        self.conv_one = torch.nn.Conv2d(3, 30, 3)
        self.bn_two = torch.nn.BatchNorm2d(30)
        self.conv_two = torch.nn.Conv2d(30, 60, 3)
        self.bn_three = torch.nn.BatchNorm2d(60)
        self.conv_three = torch.nn.Conv2d(60, 120, 3)
        self.bn_four = torch.nn.BatchNorm2d(120)
        self.fc1 = torch.nn.Linear(480, 200)
        # self.fc2 = torch.nn.Linear(200, 60)
        self.out = torch.nn.Linear(200, 100) #(60, 10)

    def forward(self, x):
        x = self.bn_one(x)
        x = self.conv_one(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)

        x = self.bn_two(x)
        x = self.conv_two(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)

        x = self.bn_three(x)
        x = self.conv_three(x)
        x = F.leaky_relu(x, 0.1)
        x = F.max_pool2d(x, 2)

        x = self.bn_four(x)
        x = x.view(x.size(0), -1)
        x = self.dp_one(x)
        x = self.fc1(x)
        x = F.relu(x)
        # x = self.dp_two(x)
        # x = self.fc2(x)
        # x = F.relu(x)
        return self.out(x)

net = Net().to(device)
print(net)

Net(
  (dp_one): Dropout(p=0.2, inplace=False)
  (bn_one): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_one): Conv2d(3, 30, kernel_size=(3, 3), stride=(1, 1))
  (bn_two): BatchNorm2d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_two): Conv2d(30, 60, kernel_size=(3, 3), stride=(1, 1))
  (bn_three): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_three): Conv2d(60, 120, kernel_size=(3, 3), stride=(1, 1))
  (bn_four): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=480, out_features=200, bias=True)
  (out): Linear(in_features=200, out_features=100, bias=True)
)


In [9]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.001) #0.01
criterion = nn.CrossEntropyLoss()

In [10]:
from torchsummary import summary

summary(net.to(device), input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
       BatchNorm2d-1            [-1, 3, 32, 32]               6
            Conv2d-2           [-1, 30, 30, 30]             840
       BatchNorm2d-3           [-1, 30, 15, 15]              60
            Conv2d-4           [-1, 60, 13, 13]          16,260
       BatchNorm2d-5             [-1, 60, 6, 6]             120
            Conv2d-6            [-1, 120, 4, 4]          64,920
       BatchNorm2d-7            [-1, 120, 2, 2]             240
           Dropout-8                  [-1, 480]               0
            Linear-9                  [-1, 200]          96,200
           Linear-10                  [-1, 100]          20,100
Total params: 198,746
Trainable params: 198,746
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.40
Params size (MB): 0.76
Estimated T

In [11]:
num_epochs = 10 #5
net.train()

for epoch in range(num_epochs):
    running_loss, running_items, running_right = 0.0, 0.0, 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)
        running_right += (labels == torch.max(outputs, 1)[1]).sum()

        # выводим статистику о процессе обучения
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            net.eval()

            print(f'Epoch [{epoch + 1}/{num_epochs}]. ' \
                  f'Step [{i + 1}/{len(train_loader)}]. ' \
                  f'Loss: {running_loss / running_items:.3f}. ' \
                  f'Acc: {running_right / running_items:.3f}', end='. ')
            running_loss, running_items, running_right = 0.0, 0.0, 0.0

            test_running_right, test_running_total = 0.0, 0.0
            for i, data in enumerate(valid_loader):

                test_outputs = net(data[0].to(device))
                test_running_total += len(data[1])
                test_running_right += (data[1].to(device) == torch.max(test_outputs, 1)[1]).sum()

            print(f'Test acc: {test_running_right / test_running_total:.3f}')

        net.train()

print('Training is finished!')

Epoch [1/10]. Step [1/372]. Loss: 0.036. Acc: 0.016. Test acc: 0.010
Epoch [1/10]. Step [301/372]. Loss: 0.031. Acc: 0.100. Test acc: 0.134
Epoch [2/10]. Step [1/372]. Loss: 0.028. Acc: 0.180. Test acc: 0.142
Epoch [2/10]. Step [301/372]. Loss: 0.027. Acc: 0.172. Test acc: 0.169
Epoch [3/10]. Step [1/372]. Loss: 0.026. Acc: 0.203. Test acc: 0.162
Epoch [3/10]. Step [301/372]. Loss: 0.025. Acc: 0.211. Test acc: 0.173
Epoch [4/10]. Step [1/372]. Loss: 0.023. Acc: 0.258. Test acc: 0.196
Epoch [4/10]. Step [301/372]. Loss: 0.024. Acc: 0.238. Test acc: 0.205
Epoch [5/10]. Step [1/372]. Loss: 0.022. Acc: 0.266. Test acc: 0.188
Epoch [5/10]. Step [301/372]. Loss: 0.023. Acc: 0.256. Test acc: 0.215
Epoch [6/10]. Step [1/372]. Loss: 0.022. Acc: 0.312. Test acc: 0.190
Epoch [6/10]. Step [301/372]. Loss: 0.023. Acc: 0.269. Test acc: 0.231
Epoch [7/10]. Step [1/372]. Loss: 0.022. Acc: 0.258. Test acc: 0.228
Epoch [7/10]. Step [301/372]. Loss: 0.022. Acc: 0.285. Test acc: 0.251
Epoch [8/10]. Step [

Loss уменьшается, accuracy растет - сеть обучается.

При первоначальных параметрах: Epoch [5/5]. Step [301/372]. Loss: 0.026. Acc: 0.195. Test acc: 0.200

При lr = 0.001: Epoch [5/5]. Step [301/372]. Loss: 0.023. Acc: 0.263. Test acc: 0.209

При num_epochs = 10: Epoch [10/10]. Step [301/372]. Loss: 0.021. Acc: 0.309. Test acc: 0.254

In [2]:
from torchvision import models

resnet50 = models.resnet50(pretrained=True)
print(resnet50)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
from torchsummary import summary
summary(resnet50.to(device), input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [8]:
for param in list(resnet50.parameters())[:]:
    param.requires_grad = False

In [9]:
resnet50.fc = nn.Linear(2048, 100)

summary(resnet50.to(device), input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]           9,408
       BatchNorm2d-2           [-1, 64, 16, 16]             128
              ReLU-3           [-1, 64, 16, 16]               0
         MaxPool2d-4             [-1, 64, 8, 8]               0
            Conv2d-5             [-1, 64, 8, 8]           4,096
       BatchNorm2d-6             [-1, 64, 8, 8]             128
              ReLU-7             [-1, 64, 8, 8]               0
            Conv2d-8             [-1, 64, 8, 8]          36,864
       BatchNorm2d-9             [-1, 64, 8, 8]             128
             ReLU-10             [-1, 64, 8, 8]               0
           Conv2d-11            [-1, 256, 8, 8]          16,384
      BatchNorm2d-12            [-1, 256, 8, 8]             512
           Conv2d-13            [-1, 256, 8, 8]          16,384
      BatchNorm2d-14            [-1, 25

In [10]:
resnet50 = resnet50.to(device)

In [14]:
train_actions = transforms.Compose([transforms.Resize(256),
                                    transforms.RandomCrop(224, padding=4),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                         std=[0.229, 0.224, 0.225])])
valid_transforms = transforms.Compose([transforms.Resize(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                            std=[0.229, 0.224, 0.225])])


train_dataset, valid_dataset = train_valid_split(dataset)

train_dataset = MyOwnCifar(train_dataset, train_actions)
valid_dataset = MyOwnCifar(valid_dataset, valid_transforms)

In [15]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=128,
                                           shuffle=True,
                                           num_workers=2)
valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                           batch_size=128,
                                           shuffle=False,
                                           num_workers=1)

In [16]:
params_to_update = []
for name, param in resnet50.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)


optimizer = torch.optim.Adam(params_to_update, lr=0.001)
criterion = nn.CrossEntropyLoss()

In [17]:
num_epochs = 5
resnet50.train()

for epoch in range(num_epochs):
    running_loss, running_items, running_right = 0.0, 0.0, 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(device), data[1].to(device)

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)
        running_right += (labels == torch.max(outputs, 1)[1]).sum()

        # выводим статистику о процессе обучения
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            resnet50.eval()

            print(f'Epoch [{epoch + 1}/{num_epochs}]. ' \
                  f'Step [{i + 1}/{len(train_loader)}]. ' \
                  f'Loss: {running_loss / running_items:.3f}. ' \
                  f'Acc: {running_right / running_items:.3f}', end='. ')
            running_loss, running_items, running_right = 0.0, 0.0, 0.0

            test_running_right, test_running_total = 0.0, 0.0
            for i, data in enumerate(valid_loader):

                test_outputs = resnet50(data[0].to(device))
                test_running_total += len(data[1])
                test_running_right += (data[1].to(device) == torch.max(test_outputs, 1)[1]).sum()

            print(f'Test acc: {test_running_right / test_running_total:.3f}')

        resnet50.train()

print('Training is finished!')

Epoch [1/5]. Step [1/372]. Loss: 0.037. Acc: 0.000. Test acc: 0.004
Epoch [1/5]. Step [301/372]. Loss: 0.020. Acc: 0.399. Test acc: 0.520
Epoch [2/5]. Step [1/372]. Loss: 0.012. Acc: 0.594. Test acc: 0.490
Epoch [2/5]. Step [301/372]. Loss: 0.013. Acc: 0.536. Test acc: 0.543
Epoch [3/5]. Step [1/372]. Loss: 0.012. Acc: 0.523. Test acc: 0.542
Epoch [3/5]. Step [301/372]. Loss: 0.012. Acc: 0.566. Test acc: 0.553
Epoch [4/5]. Step [1/372]. Loss: 0.013. Acc: 0.516. Test acc: 0.552
Epoch [4/5]. Step [301/372]. Loss: 0.012. Acc: 0.580. Test acc: 0.561
Epoch [5/5]. Step [1/372]. Loss: 0.011. Acc: 0.594. Test acc: 0.554
Epoch [5/5]. Step [301/372]. Loss: 0.012. Acc: 0.586. Test acc: 0.579
Training is finished!


Обучение длилось долго, но результат есть.

Loss падает, accuracy растет.

Epoch [5/5]. Step [301/372]. Loss: 0.012. Acc: 0.586. Test acc: 0.579