---

<h2 style="text-align: center;"><b>Свёрточные нейронные сети: CIFAR10</b></h3>

---

Выполнил - Пермяшкин Дмитрий, гр. 20223

<h3 style="text-align: center;"><b>CIFAR10</b></h3>

<img src="https://raw.githubusercontent.com/soumith/ex/gh-pages/assets/cifar10.png" width=500, height=400>

**CIFAR10:** это набор из 60k картинок 32х32х3, 50k которых составляют обучающую выборку, и оставшиеся 10k - тестовую. Классов в этом датасете 10: `'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'`.

In [1]:
import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F  # Functional

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import tqdm
from itertools import product, compress

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='../pytorch_data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='../pytorch_data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../pytorch_data\cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ../pytorch_data\cifar-10-python.tar.gz to ../pytorch_data
Files already downloaded and verified


Напишем свёрточную нейросеть для предсказания на CIFAR10.

In [3]:
def layer_output_shape(layer, shape):
    return layer(torch.rand(*(shape))).data.shape

class SimpleConvNet(nn.Module):
    def __init__(self, extractor_config, pooling_layer, classificator_config, activation_func=F.relu):
        super().__init__()

        self.activation = activation_func
        base_shape = (1,3,32,32)

        self.conv = nn.ModuleList()
        self.conv.append(nn.Conv2d(in_channels=3, out_channels=extractor_config[0], kernel_size=5))
        self.pool = pooling_layer
        base_shape = layer_output_shape(self.pool, layer_output_shape(self.conv[-1],base_shape))

        for i in range(0,len(extractor_config)-1):
          self.conv.append(nn.Conv2d(in_channels=extractor_config[i], out_channels=extractor_config[i+1], kernel_size=5))
          base_shape = layer_output_shape(self.pool, layer_output_shape(self.conv[-1],base_shape))
        
        self.extractor_shape = base_shape[1]*base_shape[2]*base_shape[3]
        self.fc = nn.ModuleList()
        real_config = [self.extractor_shape] + classificator_config + [10]
        for i in range(0, len(real_config)-1):
          self.fc.append(nn.Linear(real_config[i],real_config[i+1]))


    def forward(self, x):
        for c_layer in self.conv:
          x = self.pool(self.activation(c_layer(x)))
        x = x.view(-1, self.extractor_shape) 

        for l_layer in self.fc[:-1]:
          x = self.activation(l_layer(x))
        x = self.fc[-1](x)
        return x

Функции для обучения сети и проверки через accuracy_score

In [4]:
# выбираем функцию потерь
loss_fn = torch.nn.CrossEntropyLoss()

def train(network, epoches):
  optimizer = torch.optim.Adam(network.parameters(), lr=1e-4)
  losses = []
  for epoch in tqdm.notebook.tqdm(range(epoches)):
    for X_batch, y_batch in trainloader:
      optimizer.zero_grad()

      y_pred = network(X_batch)
      loss = loss_fn(y_pred, y_batch)
      losses.append(loss.item())
      loss.backward()
      optimizer.step()

  return losses

def test(network):
  class_correct = [0.]*10
  class_total = [0.]*10
  class_result = [0.]*10

  with torch.no_grad():
    for data in testloader:
        images, labels = data
        y_pred = net(images)
        _, predicted = torch.max(y_pred, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
  
  for i in range(10):
    class_result[i] = class_correct[i] / class_total[i]
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_result[i]))
  
  return class_result

In [7]:
best = [(None,0.)]*10
the_best = (None, 0.)

# Сверточные слои

Стоит отметить, что конфигурация выбрана в сторону расширения числа канала, поскольку сейчас мы "расширяем" данные новыми признаками. 

Число эпох и возможных каналов подобрано, чтобы получить результаты на этой неделе.

In [8]:
layers = [5,10,15,20]
for l in tqdm.notebook.tqdm([list(compress(layers,w)) for w in list(product([0,1],repeat=4))[1:]]):
    print("Extraction layer channel config", l)
    try:
        net = SimpleConvNet(l,nn.MaxPool2d(kernel_size=2, stride=2), [120,84])
        losses = train(net,3)
        result = test(net)
        average = 0.
        for i in range(10):
            if result[i] > best[i][1]:
                best[i] = (net, result[i])
            average += result[i]
        average /= 10
        if average > the_best[1]:
        the_best = (net,average)
    except RuntimeError:
        print("Impossible configuration")
    finally:
        print()

IndentationError: expected an indented block (Temp/ipykernel_14944/2572364210.py, line 15)

Выведем для каждого класса лучшую сеть и с лучшей средней точностью (усреднение accuracy_score по классам равномерное - слишком малый датасет для анализа распределения классов)

In [11]:
for i in range(10):
  print("Best network for class %5s with score %2d %%" % (classes[i],100*best[i][1]))
  print(best[i][0])

print("Best network on average")
print(the_best[0])

Best network for class plane with score 77 %
SimpleConvNet(
  (conv): ModuleList(
    (0): Conv2d(3, 15, kernel_size=(5, 5), stride=(1, 1))
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): ModuleList(
    (0): Linear(in_features=2940, out_features=120, bias=True)
    (1): Linear(in_features=120, out_features=84, bias=True)
    (2): Linear(in_features=84, out_features=10, bias=True)
  )
)
Best network for class   car with score 79 %
SimpleConvNet(
  (conv): ModuleList(
    (0): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1))
    (1): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): ModuleList(
    (0): Linear(in_features=500, out_features=120, bias=True)
    (1): Linear(in_features=120, out_features=84, bias=True)
    (2): Linear(in_features=84, out_features=10, bias=True)
  )
)
Best network for class  bird with score 52 %
SimpleConvNet(
  (c

По итогу немного неожиданно оказалось, что хоть для отдельных классов есть смысл в многослойных экстракторах, но в среднем лучше всего выступает однослойный. Скорее всего это связано с тем, что нам важны не только число различных свойств, но и также их расположение.

# Подбор типа пулинга

Можно было сделать минимум или хитрые схемы, но в большей части работ советуют либо среднее, либо пик брать. Поэтому 2 типа всего.

In [13]:
for p in tqdm.notebook.tqdm([nn.MaxPool2d(kernel_size=2, stride=2),nn.AvgPool2d(kernel_size=2, stride=2)]):
  try:
    net = SimpleConvNet([20],p, [120,84])
    losses = train(net,3)
    result = test(net)
    average = 0.
    for i in range(10):
      if result[i] > best[i][1]:
        best[i] = (net, result[i])
      average += result[i]
    average /= 10
    if average > the_best[1]:
      the_best = (net,average)
  except RuntimeError:
    print("Impossible configuration")
  finally:
    print()

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fb9a0fbc320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fb9a0fbc320>
    if w.is_alive():
Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    assert self._parent_pid == os.getpid(), 'can only test a child process'
    self._shutdown_workers()
AssertionError: can only test a child process
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho

Accuracy of plane : 70 %
Accuracy of   car : 75 %
Accuracy of  bird : 45 %
Accuracy of   cat : 46 %
Accuracy of  deer : 48 %
Accuracy of   dog : 45 %
Accuracy of  frog : 74 %
Accuracy of horse : 61 %
Accuracy of  ship : 70 %
Accuracy of truck : 63 %



  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 62 %
Accuracy of   car : 67 %
Accuracy of  bird : 40 %
Accuracy of   cat : 41 %
Accuracy of  deer : 39 %
Accuracy of   dog : 41 %
Accuracy of  frog : 71 %
Accuracy of horse : 74 %
Accuracy of  ship : 76 %
Accuracy of truck : 57 %



# Функция активации

In [14]:
for act in tqdm.notebook.tqdm([nn.functional.relu,nn.functional.elu, torch.sigmoid, nn.functional.softsign, torch.tanh]):
  print("Activation func", str(act))
  try:
    net = SimpleConvNet([20],nn.MaxPool2d(kernel_size=2, stride=2), [120,84],activation_func=act)
    losses = train(net,3)
    result = test(net)
  except RuntimeError:
    print("Impossible configuration")
  finally:
    print()

  0%|          | 0/5 [00:00<?, ?it/s]

Activation func <function relu at 0x7fb9a185aef0>


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 60 %
Accuracy of   car : 64 %
Accuracy of  bird : 38 %
Accuracy of   cat : 56 %
Accuracy of  deer : 39 %
Accuracy of   dog : 51 %
Accuracy of  frog : 77 %
Accuracy of horse : 62 %
Accuracy of  ship : 77 %
Accuracy of truck : 71 %

Activation func <function elu at 0x7fb9a18620e0>


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 68 %
Accuracy of   car : 68 %
Accuracy of  bird : 45 %
Accuracy of   cat : 39 %
Accuracy of  deer : 43 %
Accuracy of   dog : 53 %
Accuracy of  frog : 76 %
Accuracy of horse : 69 %
Accuracy of  ship : 77 %
Accuracy of truck : 67 %

Activation func <built-in method sigmoid of type object at 0x7fba9e6d41a0>


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 46 %
Accuracy of   car : 52 %
Accuracy of  bird : 19 %
Accuracy of   cat : 15 %
Accuracy of  deer : 37 %
Accuracy of   dog : 39 %
Accuracy of  frog : 41 %
Accuracy of horse : 67 %
Accuracy of  ship : 57 %
Accuracy of truck : 45 %

Activation func <function softsign at 0x7fb9a1862710>


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 72 %
Accuracy of   car : 69 %
Accuracy of  bird : 43 %
Accuracy of   cat : 38 %
Accuracy of  deer : 48 %
Accuracy of   dog : 44 %
Accuracy of  frog : 73 %
Accuracy of horse : 69 %
Accuracy of  ship : 72 %
Accuracy of truck : 66 %

Activation func <built-in method tanh of type object at 0x7fba9e6d41a0>


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 59 %
Accuracy of   car : 67 %
Accuracy of  bird : 46 %
Accuracy of   cat : 39 %
Accuracy of  deer : 39 %
Accuracy of   dog : 44 %
Accuracy of  frog : 78 %
Accuracy of horse : 69 %
Accuracy of  ship : 81 %
Accuracy of truck : 71 %



Ну вот тут видно, что хоть функции примерно дают схожий результат (кроме сигмоиды, что логично), но ReLU теперь не лучший выбор.

Поэтому далее будет использована softsign (поправлено "за кадром", поскольку фиксы "за кадром" не копируются в ячейках, а MNIST и CIFAR10 считались одновременно).

# Число полносвязных слоев

Вот тут уже конфигурация по сужению, поскольку все признаки уже выделились на предыдущем шагу. Плюсом вся сверточность придумана как раз с целью ужатия числа нейронов в полносвязном слое, поэтому расширение размера слоя может привести к возврату на поле "с чего начинали".

Опять параметры упираются в ограничение по времени.

In [15]:
l_layers = [256,128,64,32]
for l in tqdm.notebook.tqdm([list(compress(l_layers,w)) for w in list(product([0,1],repeat=4))[1:]]):
  print("Classificator layers config", l)
  try:
    net = SimpleConvNet([20],nn.MaxPool2d(kernel_size=2, stride=2), l)
    losses = train(net,3)
    result = test(net)
    average = 0.
    for i in range(10):
      if result[i] > best[i][1]:
        best[i] = (net, result[i])
      average += result[i]
    average /= 10
    if average > the_best[1]:
      the_best = (net,average)
  except RuntimeError:
    print("Impossible configuration")
  finally:
    print()

  0%|          | 0/15 [00:00<?, ?it/s]

Classificator layers config [32]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 62 %
Accuracy of   car : 68 %
Accuracy of  bird : 48 %
Accuracy of   cat : 41 %
Accuracy of  deer : 44 %
Accuracy of   dog : 55 %
Accuracy of  frog : 68 %
Accuracy of horse : 53 %
Accuracy of  ship : 73 %
Accuracy of truck : 53 %

Classificator layers config [64]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 69 %
Accuracy of   car : 58 %
Accuracy of  bird : 34 %
Accuracy of   cat : 43 %
Accuracy of  deer : 53 %
Accuracy of   dog : 56 %
Accuracy of  frog : 70 %
Accuracy of horse : 60 %
Accuracy of  ship : 67 %
Accuracy of truck : 69 %

Classificator layers config [64, 32]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 57 %
Accuracy of   car : 68 %
Accuracy of  bird : 34 %
Accuracy of   cat : 35 %
Accuracy of  deer : 46 %
Accuracy of   dog : 50 %
Accuracy of  frog : 75 %
Accuracy of horse : 72 %
Accuracy of  ship : 70 %
Accuracy of truck : 68 %

Classificator layers config [128]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 58 %
Accuracy of   car : 73 %
Accuracy of  bird : 50 %
Accuracy of   cat : 41 %
Accuracy of  deer : 47 %
Accuracy of   dog : 46 %
Accuracy of  frog : 76 %
Accuracy of horse : 67 %
Accuracy of  ship : 81 %
Accuracy of truck : 62 %

Classificator layers config [128, 32]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 51 %
Accuracy of   car : 63 %
Accuracy of  bird : 45 %
Accuracy of   cat : 40 %
Accuracy of  deer : 44 %
Accuracy of   dog : 61 %
Accuracy of  frog : 65 %
Accuracy of horse : 59 %
Accuracy of  ship : 78 %
Accuracy of truck : 62 %

Classificator layers config [128, 64]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 69 %
Accuracy of   car : 72 %
Accuracy of  bird : 43 %
Accuracy of   cat : 50 %
Accuracy of  deer : 45 %
Accuracy of   dog : 33 %
Accuracy of  frog : 71 %
Accuracy of horse : 75 %
Accuracy of  ship : 81 %
Accuracy of truck : 54 %

Classificator layers config [128, 64, 32]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 71 %
Accuracy of   car : 71 %
Accuracy of  bird : 37 %
Accuracy of   cat : 29 %
Accuracy of  deer : 56 %
Accuracy of   dog : 56 %
Accuracy of  frog : 73 %
Accuracy of horse : 70 %
Accuracy of  ship : 66 %
Accuracy of truck : 65 %

Classificator layers config [256]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 75 %
Accuracy of   car : 71 %
Accuracy of  bird : 44 %
Accuracy of   cat : 52 %
Accuracy of  deer : 62 %
Accuracy of   dog : 48 %
Accuracy of  frog : 75 %
Accuracy of horse : 64 %
Accuracy of  ship : 67 %
Accuracy of truck : 62 %

Classificator layers config [256, 32]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 53 %
Accuracy of   car : 82 %
Accuracy of  bird : 37 %
Accuracy of   cat : 41 %
Accuracy of  deer : 61 %
Accuracy of   dog : 63 %
Accuracy of  frog : 69 %
Accuracy of horse : 57 %
Accuracy of  ship : 75 %
Accuracy of truck : 47 %

Classificator layers config [256, 64]


  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy of plane : 71 %
Accuracy of   car : 74 %
Accuracy of  bird : 68 %
Accuracy of   cat : 39 %
Accuracy of  deer : 35 %
Accuracy of   dog : 48 %
Accuracy of  frog : 69 %
Accuracy of horse : 60 %
Accuracy of  ship : 70 %
Accuracy of truck : 67 %

Classificator layers config [256, 64, 32]


  0%|          | 0/3 [00:00<?, ?it/s]




KeyboardInterrupt: ignored

К сожалению времени подождать ещё час до конца не было, а ставить в ночь черевато тем, что капча вылезет и убъет рантайм.

In [16]:
for i in range(10):
  print("Best network for class %5s with score %2d %%" % (classes[i],100*best[i][1]))
  print(best[i][0])

print("Best network on average")
print(the_best[0])

Best network for class plane with score 78 %
SimpleConvNet(
  (conv): ModuleList(
    (0): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1))
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): ModuleList(
    (0): Linear(in_features=3920, out_features=120, bias=True)
    (1): Linear(in_features=120, out_features=84, bias=True)
    (2): Linear(in_features=84, out_features=10, bias=True)
  )
)
Best network for class   car with score 82 %
SimpleConvNet(
  (conv): ModuleList(
    (0): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1))
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): ModuleList(
    (0): Linear(in_features=3920, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=10, bias=True)
  )
)
Best network for class  bird with score 68 %
SimpleConvNet(
  (conv): ModuleList(
    (0): Conv2d(3, 20, kernel_size=(5, 5

Ну тут опять лучше всего показала базовая конфигурация классификатора, скорее всего она подбиралась более лучше, чем методом тыка.

И опять заметно, что для каких-то классов другие структуры лучше, чем базовая. И всего для пары классов 2 линейных слоя оказались лучше, но оба класса распознаются с вероятностью ниже 70%, так что возможно есть вариант 3х-слойного классификатора, который таки решает проблему.

# Итог

"Лучшая" нейросеть из рассмотренных

In [17]:
the_best[0]

SimpleConvNet(
  (conv): ModuleList(
    (0): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1))
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): ModuleList(
    (0): Linear(in_features=3920, out_features=120, bias=True)
    (1): Linear(in_features=120, out_features=84, bias=True)
    (2): Linear(in_features=84, out_features=10, bias=True)
  )
)

Её accuracy_score

In [23]:
_ = test(the_best[0])

Accuracy of plane : 60 %
Accuracy of   car : 40 %
Accuracy of  bird :  7 %
Accuracy of   cat :  0 %
Accuracy of  deer : 42 %
Accuracy of   dog : 59 %
Accuracy of  frog :  2 %
Accuracy of horse :  0 %
Accuracy of  ship : 21 %
Accuracy of truck : 30 %


Серьезного улучшения по сравнению с базовой сетью из примера нету. Ну зато можно сделать вывод - без серьезного улучшения структуры решать данную задачу бесполезно.