# Convolutional Neural Networks (Conv2d)

## Импорт библиотек

In [2]:
import torch
import torch.nn as nn

## Формулы для расчета внутренних параметров модели

**Универсальная формула размера выхода сверточного слоя:**

$$
H_{out} = \left\lfloor \frac{H_{in} + 2P - 1*(K[0]-1)-1}{S[0]} + 1 \right\rfloor
$$

$$
W_{out} = \left\lfloor \frac{W_{in} + 2P - 1*(K[1]-1)-1}{S[1]} + 1 \right\rfloor
$$

Где:
- $H_{in}, W_{in}$ — высота и ширина входа,
- $K$ — размер ядра (kernel),
- $S$ — шаг (stride),
- $P$ — дополнение (padding),
- $\lfloor \cdot \rfloor$ — округление вниз.

**Формула для квадратных карт и ядер:**

$$
O = \left\lfloor \frac{I + 2P - K}{S} \right\rfloor + 1
$$

$(O = H_{out} = W_{out}, \quad I = H_{in} = W_{in})$

**Стандартные комбинации параметров для построения CNN**
    
$\textcolor{blue}{\ \ \ kernel = (3,3), \ \ stride = (1,1), \ \ padding = (0, 0)}$

$ \ \ \ \ \ \  H_{out} = H_{in} - 2 $

$ \ \ \ \ \ \  W_{out} = W_{in} - 2 $


$\textcolor{blue} {\ \ \ kernel = (3,3), \ \ stride = (1,1),  \ \ padding = (1, 1)}$

$ \ \ \ \ \ \  H_{out} = H_{in}$

$ \ \ \ \ \ \  W_{out} = W_{in}$


$\textcolor{blue} {\ \ \ kernel = (3,3), \ \ stride = (2,2), \ \ padding = (0, 0)}$

$ \ \ \ \ \ \  H_{out} = \frac{(H_{in} - 3 )}{2} + 1$

$ \ \ \ \ \ \  W_{out} = \frac{(W_{in} - 3 )}{2} + 1 $

## Основы и базовые архитектуры CNN

### Первый способ создания модели

In [3]:
# модель, на вход котрой можно подать тензор любого размера, так как H и W не указаны, но их нужно контролировать

model = nn.Sequential(
            nn.Conv2d(3, 32, (3, 3)),   # inp => (batch_size, 3, 28, 28), out => (batch_size, 32, 26, 26)
            nn.ReLU(),
            nn.Conv2d(32, 64, (3, 3))   # inp => (batch_size, 32, 26, 26), out => (batch_size, 64, 24, 24)
        )

model

Sequential(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
)

In [4]:
input = torch.rand([16, 3, 28, 28], dtype = torch.float32)

out = model(input)
out.shape

torch.Size([16, 64, 24, 24])

### Второй способ создания модели

In [5]:
# модель, на вход котрой можно подать тензор любого размера, так как H и W не указаны, но их нужно контролировать

model = nn.Sequential()
model.add_module('layer_1', nn.Conv2d(3, 32, (3, 3)))   # inp => (batch_size, 3, 28, 28), out => (batch_size, 32, 26, 26)
model.add_module('ReLU', nn.ReLU())
model.add_module('layer_2', nn.Conv2d(32, 64, (3, 3)))   # inp => (batch_size, 32, 26, 26), out => (batch_size, 64, 24, 24)

model

Sequential(
  (layer_1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (ReLU): ReLU()
  (layer_2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
)

In [6]:
input = torch.rand([16, 3, 28, 28], dtype = torch.float32)

out = model(input)
out.shape

torch.Size([16, 64, 24, 24])

In [7]:
model.state_dict()

OrderedDict([('layer_1.weight',
              tensor([[[[ 1.8173e-02, -5.9485e-02,  6.0730e-02],
                        [ 5.6538e-02, -6.9484e-02,  1.1951e-01],
                        [-7.4295e-02,  7.3668e-02,  8.0896e-02]],
              
                       [[ 4.7404e-02,  1.7555e-01,  1.3234e-01],
                        [-2.3737e-02, -1.9345e-02,  2.8475e-02],
                        [ 1.1874e-01,  1.4735e-01,  1.4341e-02]],
              
                       [[ 6.6260e-02, -6.3820e-02, -1.3531e-01],
                        [-1.2706e-01,  1.4200e-01,  2.5292e-02],
                        [ 1.7792e-01,  1.3969e-01,  6.0373e-02]]],
              
              
                      [[[-1.0028e-01,  9.8481e-02,  1.3137e-01],
                        [ 1.4673e-01, -1.1169e-01,  1.1264e-01],
                        [ 8.3995e-02,  7.7384e-02,  5.3340e-02]],
              
                       [[ 1.1883e-01, -2.7210e-02,  7.0598e-02],
                        [-1.4131e-01, -5.80

In [8]:
model.state_dict()['layer_1.weight'].shape

torch.Size([32, 3, 3, 3])

### Создание класса для модели нейронной сети

In [9]:
class MyModel(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv_1 = nn.Conv2d(in_channels, 32, (3,3))
        self.conv_2 = nn.Conv2d(32, out_channels, (3,3))
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.conv_1(x)
        x = self.act(x)
        x = self.conv_2(x)
        return x

In [10]:
model = MyModel(3, 64)
model

MyModel(
  (conv_1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv_2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (act): ReLU()
)

In [11]:
# Проверка правильности построения модели

input = torch.rand([16, 3, 50, 50], dtype=torch.float32)

out = model(input)
out.shape

torch.Size([16, 64, 46, 46])

### Слой Flatten

**преобразует многомерный тензор в одномерный вектор**

#### Способ №1 - reshape

In [12]:
# создадим тензор для сверточного слоя

tensor = torch.rand([2, 1, 3, 3], dtype=torch.float32)
tensor

tensor([[[[0.8396, 0.4415, 0.2994],
          [0.8760, 0.4739, 0.1201],
          [0.9855, 0.3720, 0.6058]]],


        [[[0.7959, 0.9822, 0.2878],
          [0.9046, 0.7331, 0.3915],
          [0.2384, 0.7505, 0.4960]]]])

In [13]:
# прогоняем через сверточный слой (для проверки и примера)

conv = nn.Conv2d(1, 3, (3, 3))(tensor)
print(conv.shape)
print(conv)

torch.Size([2, 3, 1, 1])
tensor([[[[ 0.0850]],

         [[ 0.2163]],

         [[ 0.2426]]],


        [[[-0.2790]],

         [[-0.0302]],

         [[ 0.4723]]]], grad_fn=<ConvolutionBackward0>)


In [14]:
# изменим размер входного тензора так, чтобы он подходил для линейного слоя

tensor = tensor.reshape(2, -1)

print(tensor.shape)
print(tensor)

torch.Size([2, 9])
tensor([[0.8396, 0.4415, 0.2994, 0.8760, 0.4739, 0.1201, 0.9855, 0.3720, 0.6058],
        [0.7959, 0.9822, 0.2878, 0.9046, 0.7331, 0.3915, 0.2384, 0.7505, 0.4960]])


In [15]:
# подадим наш тензор на вход линейного слоя

linear = nn.Linear(9, 10)(tensor)
print(linear.shape)
print(linear)

torch.Size([2, 10])
tensor([[-0.5477, -0.0271,  0.1558, -0.2124,  0.4231, -0.3241,  0.1551,  0.0282,
          0.6528,  0.3074],
        [-0.8427,  0.1984,  0.2199, -0.2751,  0.2479, -0.5064,  0.2969, -0.0403,
          0.5109,  0.3194]], grad_fn=<AddmmBackward0>)


#### Способ №2 - Flatten

**Метод flatten**

In [16]:
# создадим тензор для сверточного слояс

tensor = torch.rand([2, 1, 3, 3], dtype=torch.float32)
tensor

tensor([[[[0.7853, 0.8066, 0.1082],
          [0.6117, 0.7036, 0.7035],
          [0.9139, 0.7783, 0.5276]]],


        [[[0.9863, 0.1896, 0.8614],
          [0.5486, 0.4533, 0.7685],
          [0.8207, 0.6029, 0.7924]]]])

In [17]:
# изменим размер тензора так, чтобы он подходил для линейного слоя

flat = tensor.flatten(start_dim=1, end_dim=-1) # начиная с оси start_dim и до оси end_dim  объединяем в одну (в нашем случае объединяем все, кроме батча)
# flat = nn.Flatten()(tensor)

print(flat.shape)
print(flat)

torch.Size([2, 9])
tensor([[0.7853, 0.8066, 0.1082, 0.6117, 0.7036, 0.7035, 0.9139, 0.7783, 0.5276],
        [0.9863, 0.1896, 0.8614, 0.5486, 0.4533, 0.7685, 0.8207, 0.6029, 0.7924]])


**Класс nn.Flatten()**

In [18]:
# создадим тензор для сверточного слояс

tensor = torch.rand([2, 1, 3, 3], dtype=torch.float32)
tensor

tensor([[[[0.2147, 0.4089, 0.7565],
          [0.5821, 0.2641, 0.3709],
          [0.5871, 0.3504, 0.5337]]],


        [[[0.6046, 0.9514, 0.8618],
          [0.8903, 0.7930, 0.9544],
          [0.1842, 0.9135, 0.7583]]]])

In [19]:
# изменим размер тензора так, чтобы он подходил для линейного слоя

flat = nn.Flatten()(tensor) # нулевая ось остается (батчи), а все остальные оси вытягиваются в одну

print(flat.shape)
print(flat)

torch.Size([2, 9])
tensor([[0.2147, 0.4089, 0.7565, 0.5821, 0.2641, 0.3709, 0.5871, 0.3504, 0.5337],
        [0.6046, 0.9514, 0.8618, 0.8903, 0.7930, 0.9544, 0.1842, 0.9135, 0.7583]])


## Создание сверточной нейронной сети

### Пример 1 - Базовая CNN архитектура для классификации

In [20]:
class CNN_Model(nn.Module):
    def __init__(self, out):
        super().__init__()
        self.conv_1 = nn.Conv2d(3, 32, (3,3))  # (batch_size, 3, 28, 28) => (batch_size, 32, 26, 26)
        self.conv_2 = nn.Conv2d(32, 64, (3,3)) # (batch_size, 32, 26, 26) => (batch_size, 64, 24, 24)

        self.flatten = nn.Flatten()            # (batch_size, 64, 24, 24) => (batch_size, 64*24*24)
        
        self.act = nn.ReLU()

        self.layer_1 = nn.Linear(64*24*24, 128)
        self.layer_2 = nn.Linear(128, out)

    def forward(self, x):
        x = self.conv_1(x)
        x = self.act(x)
        x = self.conv_2(x)

        x = self.flatten(x)

        x = self.layer_1(x)
        x = self.act(x)
        x = self.layer_2(x)

        return x   

In [21]:
cnn_model = CNN_Model(5)
cnn_model

CNN_Model(
  (conv_1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv_2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (act): ReLU()
  (layer_1): Linear(in_features=36864, out_features=128, bias=True)
  (layer_2): Linear(in_features=128, out_features=5, bias=True)
)

In [22]:
input_cnn = torch.rand([16, 3, 28, 28], dtype=torch.float32)

cnn_out = cnn_model(input_cnn)
cnn_out.shape

torch.Size([16, 5])

In [23]:
cnn_out

tensor([[-0.0499,  0.0293, -0.0332,  0.0761,  0.0259],
        [-0.0912,  0.0695, -0.0293,  0.0933,  0.0336],
        [-0.0935,  0.0581, -0.0858,  0.1201,  0.0116],
        [-0.0670,  0.0549, -0.0570,  0.1106,  0.0371],
        [-0.0499,  0.0483, -0.0237,  0.0993,  0.0300],
        [-0.0817,  0.0622, -0.0564,  0.1020,  0.0532],
        [-0.0824,  0.0534, -0.0607,  0.1178,  0.0456],
        [-0.0758,  0.0523, -0.0410,  0.0989,  0.0355],
        [-0.0717,  0.0446, -0.0464,  0.0863,  0.0316],
        [-0.0930,  0.0488, -0.0489,  0.0814,  0.0202],
        [-0.0816,  0.0470, -0.0382,  0.1082,  0.0396],
        [-0.0677,  0.0779, -0.0231,  0.0695,  0.0475],
        [-0.0689,  0.0659, -0.0519,  0.0811,  0.0428],
        [-0.0708,  0.0726, -0.0570,  0.1092,  0.0272],
        [-0.0834,  0.0451, -0.0517,  0.1140,  0.0498],
        [-0.0736,  0.0743, -0.0538,  0.0721,  0.0430]],
       grad_fn=<AddmmBackward0>)

### Пример 2 - Улучшенная CNN архитектура с BatchNorm и Dropout

In [24]:
class CNN_Model(nn.Module):
    def __init__(self, in_channels, out):
        super().__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, 32, (3, 3), bias=False), # (batch_size, in_channels, 28, 28) => (batch_size, 32, 26, 26)
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, (3, 3), bias=False),          # (batch_size, 26, 26, 26) => (batch_size, 64, 24, 24)
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        self.flatten = nn.Flatten()                         # (batch_size, 64, 24, 24) => (batch_size, 64*24*24) 

        self.fc = nn.Sequential(
            nn.Linear(64*24*24, 128),                       # (batch_size, 128) 
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, out)                             # (batch_size, out)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [25]:
# Проверка модели

ModelCheck = CNN_Model(3, 10)
ModelCheck

CNN_Model(
  (conv): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=36864, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=128, out_features=10, bias=True)
  )
)

In [26]:
InpCheck = torch.rand([16, 3, 28, 28], dtype=torch.float32)

OutCheck = ModelCheck(InpCheck)
OutCheck.shape

torch.Size([16, 10])