### ResNet 34 모델 살펴보기

In [1]:
from torchvision import models

models.list_models(include='resnet*')

['resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50']

In [6]:
from torchvision import models

resnet34_model = models.resnet34(weights=None)
print(resnet34_model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
from torchinfo import summary 

summary(model=resnet34_model, input_size=(1, 3, 224, 224),
        col_names=['input_size', 'output_size', 'num_params'], 
        row_settings=['var_names'])

Layer (type (var_name))                  Input Shape               Output Shape              Param #
ResNet (ResNet)                          [1, 3, 224, 224]          [1, 1000]                 --
├─Conv2d (conv1)                         [1, 3, 224, 224]          [1, 64, 112, 112]         9,408
├─BatchNorm2d (bn1)                      [1, 64, 112, 112]         [1, 64, 112, 112]         128
├─ReLU (relu)                            [1, 64, 112, 112]         [1, 64, 112, 112]         --
├─MaxPool2d (maxpool)                    [1, 64, 112, 112]         [1, 64, 56, 56]           --
├─Sequential (layer1)                    [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    └─BasicBlock (0)                    [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    │    └─Conv2d (conv1)               [1, 64, 56, 56]           [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d (bn1)            [1, 64, 56, 56]           [1, 64, 56, 56]           128
│    │    └─ReLU (relu)   

### Resnet-34 Residual(Identity) Block(BasicBlock) 생성
* 2개의 Conv block(3x3 Conv -> BN -> Relu -> 3x3 Conv -> BN)을 연속적으로 이어서 생성.
* Residual BasicBlock에 입력 전 값을 입력 후 값과 Add 한 후 Relu 적용.
* Stage별로 Feature map의 크기를 줄일 경우 첫번째 Conv에서 stride 2를 적용하여 줄임.
* Feature map의 크기를 줄일 경우 downsample block을 이용하여 입력값의 크기 역시 절반으로 줄여서 add 할 수 있도록 함.
  
![Residual Block Base](https://github.com/chulminkw/CNN_PG_Torch/blob/main/image/residual_block.png?raw=true)

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    def __init__(self, in_channels, last_channels, stride=1, downsample=None):
        '''
        입력 채널수, 최종 채널수
        stride는 기본 1. stage 별로 feature map의 크기를 줄일 경우 2
        downsample은 stride가 2일 경우 BasicBlock 입력 전 값도 1x1 conv, stride 2를 적용하여 사이즈를 줄이는 Conv block
        '''
        super().__init__()
        self.conv_block = nn.Sequential(
            # 첫번째 3x3 Conv. stage별로 Feature Map의 크기를 줄일 시 첫번째 Conv에서 줄임(3x3 kernel에 stride=2로)
            nn.Conv2d(in_channels, last_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(last_channels),
            nn.ReLU(),
            # 두번째 3x3 Conv
            nn.Conv2d(last_channels, last_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(last_channels)
        )
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.conv_block(x)
        if self.downsample:
            identity = self.downsample(x)
            
        out += identity
        out = F.relu(out)
        
        return out

In [15]:
basicblock = BasicBlock(in_channels=64, last_channels=64, stride=1, downsample=None)
print('## my basicblock ## \n', basicblock)
print('## torchvision basicblock ##\n', resnet34_model.layer1[0])

## my basicblock ## 
 BasicBlock(
  (conv_block): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)
## torchvision basicblock ##
 BasicBlock(
  (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [16]:
stage_01 = nn.Sequential(
    BasicBlock(in_channels=64, last_channels=64, stride=1, downsample=None),
    BasicBlock(in_channels=64, last_channels=64, stride=1, downsample=None),
    BasicBlock(in_channels=64, last_channels=64, stride=1, downsample=None)
)

print('## my stage_01 ## \n', stage_01)
print('## torchvision stage_01 ##\n', resnet34_model.layer1)

## my stage_01 ## 
 Sequential(
  (0): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3),

#### 아래는 downsample을 적용하지 않을 경우 오류 발생. 
* stage의 첫번째 BasicBlock에 stride=2를 적용할 경우 downsample을 conv 1x1(in_channels, out_channels, stride=2) 으로 만들어 주지 않을 경우 오류 발생.
* stage_02의 첫번째 Conv block에서 입력 feature map의 크기를 절반으로 줄이므로, stage_02 입력 전 feature map 크기의 절반으로 줄여야 add 할 수 있음. downsample 필요.  

In [21]:
# downsample을 적용하지 않을 경우 입력값과 Residual Block의 shape가 같지 않아서 add시 오류 발생. 
stage_02 = nn.Sequential(
    BasicBlock(in_channels=64, last_channels=128, stride=2, downsample=None),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None)
)

print('## my stage_02 ## \n', stage_02)
print('## torchvision stage_02 ##\n', resnet34_model.layer2)

summary(model=stage_02, input_size=(1, 64, 56, 56),
        col_names=['input_size', 'output_size', 'num_params'], 
        row_settings=['var_names'])

## my stage_02 ## 
 Sequential(
  (0): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(128, 128, kernel

RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: [Sequential: 2, Conv2d: 3, BatchNorm2d: 3, ReLU: 3, Conv2d: 3, BatchNorm2d: 3]

In [22]:
# stage_02에 입력되기 전 입력 feature map의 size를 절반으로 줄여주는 downsample block 생성. 
downsample_02 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False),
            nn.BatchNorm2d(128)    
)
stage_02 = nn.Sequential(
    BasicBlock(in_channels=64, last_channels=128, stride=2, downsample=downsample_02),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None)
)

summary(model=stage_02, input_size=(1, 64, 56, 56),
        col_names=['input_size', 'output_size', 'num_params'], 
        row_settings=['var_names'])

Layer (type (var_name))                  Input Shape               Output Shape              Param #
Sequential (Sequential)                  [1, 64, 56, 56]           [1, 128, 28, 28]          --
├─BasicBlock (0)                         [1, 64, 56, 56]           [1, 128, 28, 28]          --
│    └─Sequential (conv_block)           [1, 64, 56, 56]           [1, 128, 28, 28]          --
│    │    └─Conv2d (0)                   [1, 64, 56, 56]           [1, 128, 28, 28]          73,728
│    │    └─BatchNorm2d (1)              [1, 128, 28, 28]          [1, 128, 28, 28]          256
│    │    └─ReLU (2)                     [1, 128, 28, 28]          [1, 128, 28, 28]          --
│    │    └─Conv2d (3)                   [1, 128, 28, 28]          [1, 128, 28, 28]          147,456
│    │    └─BatchNorm2d (4)              [1, 128, 28, 28]          [1, 128, 28, 28]          256
│    └─Sequential (downsample)           [1, 64, 56, 56]           [1, 128, 28, 28]          --
│    │    └─Conv2d (0)  

#### stage별로 Sequential로 Basic Block을 연결하는 코드는 반복적인 부분이 많이 필요

In [None]:
stage_01 = nn.Sequential(
    BasicBlock(in_channels=64, last_channels=64, stride=1, downsample=None),
    BasicBlock(in_channels=64, last_channels=64, stride=1, downsample=None),
    BasicBlock(in_channels=64, last_channels=64, stride=1, downsample=None)
)

downsample_02 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False),
            nn.BatchNorm2d(128)    
)
stage_02 = nn.Sequential(
    BasicBlock(in_channels=64, last_channels=128, stride=2, downsample=downsample_02),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None),
    BasicBlock(in_channels=128, last_channels=128, stride=1, downsample=None)
)
### stage 3은 BasicBlock 6개, 
'''
downsample_03 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False),
            nn.BatchNorm2d(256)    
)
stage_03 = nn.Sequential(
    BasicBlock(in_channels=128, middle_channels=256, last_channels=64, stride=2, downsample=downsample_03),
    BasicBlock(in_channels=256, middle_channels=256, last_channels=128, stride=1, downsample=None),
    .....
    BasicBlock(in_channels=256, middle_channels=256, last_channels=128, stride=1, downsample=None)
)
### stage 4는 BasicBlock 3개
.....
'''


### BasicBlock들을 서로 연결하여 Stage를 만들어주는 함수 생성

In [23]:
def make_basic_stage(in_channels, last_channels, stride, blocks):
    # 모든 BasicBlock들을 담을 List
    layers = []
    downsample = None
    # 함수의 인자로 stride가 1이 아닌 2가 들어올 경우 downsample 생성. 
    if stride != 1:
        downsample = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=last_channels,
                      kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(num_features=last_channels)
        )
    # 각 stage의 첫번째 Block. 함수의 stride 인자가 1 또는 2인지에 따라 생성된 downsample을 BasicBlock의 인자로 입력.
    layers.append(BasicBlock(in_channels=in_channels, last_channels=last_channels,
                             stride=stride, downsample=downsample))
    for _ in range(1, blocks): 
        # 각 stage의 첫번째 Block을 제외하고는 모두 stride=1, downsample=None
        layers.append(BasicBlock(in_channels=last_channels, last_channels=last_channels,
                                 stride=1, downsample=None))
    #layers list에 있는 모든 BasicBlock들을 Sequential로 연결하여 반환 
    return nn.Sequential(*layers)

In [24]:
stage_01 = make_basic_stage(in_channels=64, last_channels=64, stride=1, blocks=3)
print(stage_01)

Sequential(
  (0): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padd

In [25]:
stage_02 = make_basic_stage(in_channels=64, last_channels=128, stride=2, blocks=4)
print(stage_02)

Sequential(
  (0): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (downsample): Sequential(
      (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv_block): Sequential(
      (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
   

### Resnet 34 모델 만들기

In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    def __init__(self, in_channels, last_channels, stride=1, downsample=None):
        '''
        입력 채널수, 중간 채널 수, 최종 채널수
        stride는 기본 1. stage 별로 feature map의 크기를 줄일 경우 2
        downsample은 stride가 2일 경우 BasicBlock 입력 전 값도 1x1 conv, stride 2를 적용하여 사이즈를 줄이는 Conv block
        '''
        super().__init__()
        self.conv_block = nn.Sequential(
            # 첫번째 3x3 Conv. stage별로 Feature Map의 크기를 줄일 시 첫번째 Conv에서 줄임(3x3 kernel에 stride=2로)
            nn.Conv2d(in_channels, last_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(last_channels),
            nn.ReLU(),
            # 두번째 3x3 Conv
            nn.Conv2d(last_channels, last_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(last_channels),
        )
        self.stride = stride
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.conv_block(x)
        if self.downsample is not None:
            identity = self.downsample(x)
        
        out += identity
        out = F.relu(out)

        return out

In [30]:
class ResNet34(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv_block_01 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        # 3, 4, 6, 3개의 BasicBlock들로 이루어진 stage들. 
        self.stage_01 = self.make_basic_stage(in_channels=64, last_channels=64, stride=1, blocks=3)
        self.stage_02 = self.make_basic_stage(in_channels=64, last_channels=128, stride=2, blocks=4)
        self.stage_03 = self.make_basic_stage(in_channels=128, last_channels=256, stride=2, blocks=6)
        self.stage_04 = self.make_basic_stage(in_channels=256, last_channels=512, stride=2, blocks=3)

        self.adaptive_pool = nn.AdaptiveAvgPool2d(output_size=1)
        self.fc = nn.Linear(512, num_classes)

    def make_basic_stage(self, in_channels, last_channels, stride, blocks):
        # 모든 BasicBlock들을 담을 List
        layers = []
        downsample = None
        # 함수의 인자로 stride가 1이 아닌 2가 들어올 경우 downsample 생성. 
        if stride != 1:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels=in_channels, out_channels=last_channels, 
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(num_features=last_channels)
            )
        # 각 stage의 첫번째 Block. 함수의 stride 인자가 1 또는 2인지에 따라 생성된 downsample을 BasicBlock의 인자로 입력. 
        layers.append(BasicBlock(in_channels=in_channels, last_channels=last_channels,
                                     stride=stride, downsample=downsample))
        for _ in range(1, blocks):
            # 각 stage의 첫번째 Block을 제외하고는 모두 stride=1, downsample=None 
            layers.append(BasicBlock(in_channels=last_channels, last_channels=last_channels, 
                                    stride=1, downsample=None))
        
        #layer list에 있는 모든 BasicBlock들을 Sequential로 연결하여 반환 
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv_block_01(x)
        x = self.stage_01(x)
        x = self.stage_02(x)
        x = self.stage_03(x)
        x = self.stage_04(x)
        
        # GAP 및 최종 Classifier Layer forward
        x = self.adaptive_pool(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)

        return x
    

In [32]:
my_resnet34_model = ResNet34(num_classes=1000)

summary(model=my_resnet34_model, input_size=(1, 3, 224, 224),
        col_names=['input_size', 'output_size', 'num_params'],
        depth=4,
        row_settings=['var_names'])


Layer (type (var_name))                  Input Shape               Output Shape              Param #
ResNet34 (ResNet34)                      [1, 3, 224, 224]          [1, 1000]                 --
├─Sequential (conv_block_01)             [1, 3, 224, 224]          [1, 64, 56, 56]           --
│    └─Conv2d (0)                        [1, 3, 224, 224]          [1, 64, 112, 112]         9,408
│    └─BatchNorm2d (1)                   [1, 64, 112, 112]         [1, 64, 112, 112]         128
│    └─ReLU (2)                          [1, 64, 112, 112]         [1, 64, 112, 112]         --
│    └─MaxPool2d (3)                     [1, 64, 112, 112]         [1, 64, 56, 56]           --
├─Sequential (stage_01)                  [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    └─BasicBlock (0)                    [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    │    └─Sequential (conv_block)      [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    │    │    └─Conv2d (0)    

In [33]:
torch_resnet34_model = models.resnet34(weights='DEFAULT')
summary(model=torch_resnet34_model, input_size=(1, 3, 224, 224),
        col_names=['input_size', 'output_size', 'num_params'], 
        row_settings=['var_names'])

Layer (type (var_name))                  Input Shape               Output Shape              Param #
ResNet (ResNet)                          [1, 3, 224, 224]          [1, 1000]                 --
├─Conv2d (conv1)                         [1, 3, 224, 224]          [1, 64, 112, 112]         9,408
├─BatchNorm2d (bn1)                      [1, 64, 112, 112]         [1, 64, 112, 112]         128
├─ReLU (relu)                            [1, 64, 112, 112]         [1, 64, 112, 112]         --
├─MaxPool2d (maxpool)                    [1, 64, 112, 112]         [1, 64, 56, 56]           --
├─Sequential (layer1)                    [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    └─BasicBlock (0)                    [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    │    └─Conv2d (conv1)               [1, 64, 56, 56]           [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d (bn1)            [1, 64, 56, 56]           [1, 64, 56, 56]           128
│    │    └─ReLU (relu)   