In [2]:
!python --version
!pip list > requirements.txt
!echo "---------------------------------------------"
!cat requirements.txt | grep "torch"

Python 3.10.11
"---------------------------------------------"


'cat'��(��) ���� �Ǵ� �ܺ� ����, ������ �� �ִ� ���α׷�, �Ǵ�
��ġ ������ �ƴմϴ�.


In [3]:
import torch
"cuda" if torch.cuda.is_available() else "cpu"

'cuda'

# Deep Residual Learning for Image Recognition

- 논문URL - https://arxiv.org/abs/1512.03385v1

- 핵심 내용
    - 신경망이 더 깊어질수록 성능 저하(degradation problem)가 발생하는 문제가 발생(정확도의 감소, 반대로 training error와 test error는 증가)
        - 이러한 성능 저하는 과적합에 의한 것이 아님
        - 또한, 기울기 소실이나 폭주(Gradient Vanishing/Exploding)의 문제가 아니라, 깊은 신경망에서 옵티마이저가 겪는 최적화 문제임

    - 심층 잔여 학습 프레임워크(Deep Residual Learning Framework) 도입을 제안
        - F(x) + x 공식으로 나타나는 shortcut connections 를 통해 입력을 해당 계층의 출력에 덧셈 연산을 통해 수행
        - 전체 네트워크는 SGD 역전파를 통해 End-To-End 방식으로 훈련할 수 있음
        - Solver(optimizer)를 수정하지 않고도 Caffe와 같은 일반적인 라이브러리에서 쉽게 구현할 수 있음
    
    - ImageNet 데이터를 학습한 결과, 기본 네트워크(Residual이 적용되지 않은) 18-layer와 50-layer를 비교하였을 때 50-layer에서 성능이 저하되어 18-layer보다 낮은 성능을 보였으나, Residual Learning이 적용된 네트워크는 18-layer보다 50-layer가 더 높은 성능을 보이며 수렴 속도도 더 빠름
        - 34 계층 Resnet은 매우 경쟁력 있는 정확도를 달성했고, 상위 5개의 검증 오차는 4.49%임
        - 152 계층 모델 2개를 결합한 앙상블을 구성하여 ILSVRC 2015에서 1위 입상

## Residual Learing

<img src="../images/2-Figure2-1.png" alt="Figure 2" />

### Basic Block & Bottleneck Block
컨볼루션 잔여 블록의 두 가지 변형
- 왼쪽: 3x3 컨볼루션 레이어 2개가 있는 기본 블록
- 오른쪽: 차원 감소(예: 1/4)를 위한 1x1 컨볼루션 레이어, 3x3 컨볼루션 레이어, 차원 복원을 위한 또 다른 1x1 컨볼루션 레이어가 있는 병목 블록

<img src="../images/6-Figure5-1.png" alt="Figure 5"/>

### Basic Block 간단 구현

Residual Block 중 하나인 Basic Block을 구현해본다.

블록을 통과한 출력과 입력을 합하기 위해서는 출력과 입력이 동일한 차원이어야 한다. 따라서 두 합성곱 계층(3 x 3, 64)을 통과하면서 피쳐맵 크기가 변하지 않아야 하므로 padding을 1로 설정한다. 논문 내용에 따라 합성곱 계층 뒤에는 배치 정규화를 위치시킨다.

`nn.Sequential`을 사용해 합성곱, 배치 정규화, 활성화 함수를 묶고 이 계층들을 통과한 출력에 입력을 합산하여 최종 출력으로 산출한다. `print`를 통해 입력과 출력 모두 동일한 차원을 갖는 것을 확인할 수 있다.

In [4]:
import torch
import torch.nn as nn

In [5]:
dim = 64
input = torch.rand(1, 64, 56, 56)

# Basic Block
layers = nn.Sequential(
    nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm2d(dim),
    nn.ReLU(),
    nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm2d(dim)
)

out = layers(input) + input
out = torch.relu(out)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 64, 56, 56])
output shape: torch.Size([1, 64, 56, 56])


#### 피처맵 차원 변화에 따른 다운 샘플링 적용

피처맵 크기를 줄임으로써 연산량은 감소하게 된다. Basic Block에서도 첫번째 합성곱 계층에서 stride를 2로 설정하여 피처맵 크기를 절반으로 감소시키는 경우가 있다.
이 경우 입력과 합성곱 블록을 통과한 출력의 차원은 동일하지 않으므로, 입력에 다운 샘플링을 적용하여 출력 차원과 동일하게 변환한 후 덧셈 연산을 수행한다. 다운 샘플링은 kernel size 1, stride 2인 합성곱 연산을 수행하여 피처맵 크기를 절반으로 감소시킨다.

In [6]:
dim = 64
input = torch.rand(1, 64, 56, 56)

# Basic Block
layers = nn.Sequential(
    nn.Conv2d(dim, dim, kernel_size=3, stride=2, padding=1), # stride 2
    nn.BatchNorm2d(dim),
    nn.ReLU(),
    nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm2d(dim)
)

downsample = nn.Sequential(
    nn.Conv2d(dim, dim, kernel_size=1, stride=2),
    nn.BatchNorm2d(dim)
)

out = layers(input) + downsample(input)
out = torch.relu(out)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 64, 56, 56])
output shape: torch.Size([1, 64, 28, 28])


### Bottleneck Block 간단 구현

Basic Block과 마찬가지로 블록을 통과한 출력과 입력은 동일한 차원이어야 한다. stride가 1인 1 x 1 합성곱 계층은 차원이 변하지 않는다. 3 x 3 합성곱 계층은 padding 1을 적용시켜 출력 차원이 변하지 않도록 구성한다.

In [7]:
dim = 256
input = torch.rand(1, 256, 56, 56)

# Bottleneck Block
layers = nn.Sequential(
    nn.Conv2d(dim, dim//4, kernel_size=1, stride=1),
    nn.BatchNorm2d(dim//4),
    nn.ReLU(),
    nn.Conv2d(dim//4, dim//4, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm2d(dim//4),
    nn.ReLU(),
    nn.Conv2d(dim//4, dim, kernel_size=1, stride=1),
    nn.BatchNorm2d(dim)
)

out = layers(input) + input
out = torch.relu(out)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 256, 56, 56])
output shape: torch.Size([1, 256, 56, 56])


#### 피처맵 차원 변화에 따른 다운 샘플링 적용

In [8]:
dim = 256
input = torch.rand(1, 256, 56, 56)

# Bottleneck Block
layers = nn.Sequential(
    nn.Conv2d(dim, dim//4, kernel_size=1, stride=2), # stride 2
    nn.BatchNorm2d(dim//4),
    nn.ReLU(),
    nn.Conv2d(dim//4, dim//4, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm2d(dim//4),
    nn.ReLU(),
    nn.Conv2d(dim//4, dim, kernel_size=1, stride=1),
    nn.BatchNorm2d(dim)
)

downsample = nn.Sequential(
    nn.Conv2d(dim, dim, kernel_size=1, stride=2),
    nn.BatchNorm2d(dim)
)

out = layers(input) + downsample(input)
out = torch.relu(out)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 256, 56, 56])
output shape: torch.Size([1, 256, 28, 28])


## Resnet Architecture

- Stem(conv1), stage1(conv2_x), stage2(conv3_x), stage3(conv4_x), stage4(conv5_x), FC(Full Connected) 로 구성
- Convolution layer과 Acivation layer 사이에 Batch Normalization(BN) 적용(3.4. Implementation 참고)
    - Resnet 18-layer
        - Basic Block
        - layers [2, 2, 2, 2]
        - 1 + 2 x (2 + 2 + 2 + 2) + 1 = 18
    - Resnet 34-layer
        - Basic Block
        - layers [3, 4, 6, 3]
        - 1 + 2 x (3 + 4 + 6 + 3) + 1 = 34
    - Resnet 50-layer
        - Bottleneck Block
        - layers [3, 4, 6, 3]
        - 1 + 3 x (3 + 4 + 6 + 3) + 1 = 50
    - Resnet 101-layer
        - Bottleneck Block
        - layers [3, 4, 23, 3]
        - 1 + 3 x (3 + 4 + 23 + 3) + 1 = 101
    - Resnet 152-layer
        - Bottleneck Block
        - layers [3, 8, 36, 3]
        - 1 + 3 x (3 + 8 + 36 + 3) + 1 = 152

<img src="../images/5-Table1-1.png" alt="Table1" />

## Resnet 구현

### Stem Layer

- conv1:
    - input size: 224 x 224
    - in_channels: 3(RGB)
    - out_channels: 64
    - kernel_size: 7
    - stride: 2
    - padding: 3(calculated)
        - output size: 112 x 112

In [9]:
input = torch.rand(1, 3, 224, 224)

conv1 = nn.Sequential(
    nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64)
)

out = conv1(input)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 3, 224, 224])
output shape: torch.Size([1, 64, 112, 112])


- max_pool:
    - input size: 112 x 112
    - kernel_size: 3
    - stride: 2
    - padding: 1(calculated)
        - output size: 56 x 56

In [10]:
input = torch.rand(1, 64, 112, 112)
max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

out = max_pool(input)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 64, 112, 112])
output shape: torch.Size([1, 64, 56, 56])


### Residual Block

- Basic Block(18, 34 layer)
    - input size: 56, 28, 14, 7
    - kernel_size: 3
    - stride: 1 or 2
    - padding: 1
        - output size: 56, 28, 14, 7

- 다운 샘플링 적용(ex. 18-layer)
    - conv2_1, conv3_1, conv4_1, conv5_1: 피처맵 크기 축소(stride=2)
    - 위 계층에 다운 샘플링 적용


|   conv   | in_dim | dim | out_dim | stride | downsample |
| :------: | :----: | :-: | :----:  |:-----: | :--------: |
| conv2_1  |   64   | 64  |   64    |  1     |     X      |
| conv2_2  |   64   | 64  |   64    |  1     |     X      |
| conv3_1  |   64   | 128 |   128   |  2     |     O      |
| conv3_2  |  128   | 128 |   128   |  1     |     X      |
| conv4_1  |  128   | 256 |   256   |  2     |     O      |
| conv4_2  |  256   | 256 |   256   |  1     |     X      |
| conv5_1  |  256   | 512 |   256   |  2     |     O      |
| conv5_2  |  512   | 512 |   256   |  1     |     X      |


In [11]:
# class BasicBlock(nn.Module):
#     def __init__(self, in_dim, dim, stride=1):
#         super().__init__()
#         self.conv1 = nn.Conv2d(in_dim, dim, kernel_size=3, stride=stride, padding=1)
#         self.bn = nn.BatchNorm2d(dim)
#         self.relu = nn.ReLU()
#         self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1)

#     def forward(self, x):
#         input = x
#         out = self.conv1(x)
#         out = self.bn(out)
#         out = self.relu(out)
#         out = self.conv2(out)
#         out = self.bn(out)

#         out = out + input
#         out = self.relu(out)
#         return out

In [12]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_dim, dim, stride=1):
        super().__init__()
        self.stride = stride
        self.conv1 = nn.Conv2d(in_dim, dim, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(dim)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(dim)
        self.downsample = self._build_downsample_layer(in_dim, dim, stride)

    def forward(self, x):
        input = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        if self.stride != 1:
            input = self.downsample(input)

        out = out + input
        out = self.relu(out)
        return out

    def _build_downsample_layer(self, in_dim, dim, stride):
        return nn.Sequential(
            nn.Conv2d(in_dim, dim * self.expansion, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(dim * self.expansion)
        )

In [13]:
input = torch.rand(1, 64, 56, 56)
basic_block = BasicBlock(64, 64, 1)
out = basic_block(input)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 64, 56, 56])
output shape: torch.Size([1, 64, 56, 56])


In [14]:
input = torch.rand(1, 64, 56, 56)
basic_block = BasicBlock(64, 128, 2)
out = basic_block(input)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 64, 56, 56])
output shape: torch.Size([1, 128, 28, 28])


- Bottleneck Block(18, 34 layer)
    - input size: 56, 28, 14, 7
    - kernel_size: 1 or 3
    - stride: 1 or 2
    - padding: 0 or 1
        - output size: 56, 28, 14, 7

- 다운 샘플링 적용(ex. 50-layer)
    - conv3_1, conv4_1, conv5_1: 피처맵 크기 축소(stride=2)
    - conv2_1: feature map channel 변화(64 -> 256)
    - 위 계층에 다운 샘플링 적용

|   conv   | in_dim | dim | out_dim | stride | downsample |
| :------: | :----: | :-: | :-----: | :----: | :--------: |
| conv2_1  |   64   | 64  |   256   |   1    |     O      |
| conv2_2  |  256   | 64  |   256   |   1    |     X      |
| conv2_3  |  256   | 64  |   256   |   1    |     X      |
| conv3_1  |  256   | 128 |   512   |   2    |     O      |
| conv3_2  |  512   | 128 |   512   |   1    |     X      |
| conv3_3  |  512   | 128 |   512   |   1    |     X      |
| conv3_4  |  512   | 128 |   512   |   1    |     X      |
| conv4_1  |  512   | 256 |   1024  |   2    |     O      |
| conv4_2  |  1024  | 256 |   1024  |   1    |     X      |
| conv4_3  |  1024  | 256 |   1024  |   1    |     X      |
| conv4_4  |  1024  | 256 |   1024  |   1    |     X      |
| conv4_5  |  1024  | 256 |   1024  |   1    |     X      |
| conv4_6  |  1024  | 256 |   1024  |   1    |     X      |
| conv5_1  |  1024  | 512 |   2048  |   2    |     O      |
| conv5_2  |  2048  | 512 |   2048  |   1    |     X      |
| conv5_3  |  2048  | 512 |   2048  |   1    |     X      |
| conv5_4  |  2048  | 512 |   2048  |   1    |     X      |



In [15]:
# class BottleneckBlock(nn.Module):
#     def __init__(self, in_dim, dim, stride=1):
#         super().__init__()
#         self.stride = stride
#         self.conv1 = nn.Conv2d(in_dim, dim, kernel_size=1, stride=stride, padding=0)
#         self.bn1 = nn.BatchNorm2d(dim)
#         self.relu = nn.ReLU()
#         self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1)
#         self.bn2 = nn.BatchNorm2d(dim)
#         self.conv3 = nn.Conv2d(dim, dim * 4, kernel_size=1, stride=1, padding=0)
#         self.bn3 = nn.BatchNorm2d(dim * 4)
#         self.downsample = self._build_downsample_layer(in_dim, dim, stride)

#     def forward(self, x):
#         input = x
#         out = self.conv1(x)
#         out = self.bn1(out)
#         out = self.relu(out)
#         out = self.conv2(out)
#         out = self.bn2(out)
#         out = self.relu(out)
#         out = self.conv3(out)
#         out = self.bn3(out)

#         if self.stride != 1:
#             input = self.downsample(input)

#         out = out + input
#         out = self.relu(out)
#         return out

#     def _build_downsample_layer(self, in_dim, dim, stride):
#         return nn.Sequential(
#             nn.Conv2d(in_dim, dim * 4, kernel_size=1, stride=stride),
#             nn.BatchNorm2d(dim * 4)
#         )

In [16]:
class BottleneckBlock(nn.Module):
    expansion = 4

    def __init__(self, in_dim, dim, stride=1):
        super().__init__()
        self.in_dim = in_dim
        self.dim = dim
        self.stride = stride
        self.conv1 = nn.Conv2d(in_dim, dim, kernel_size=1, stride=stride, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(dim)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(dim)
        self.conv3 = nn.Conv2d(dim, dim * self.expansion, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(dim * self.expansion)
        self.downsample = self._build_downsample_layer(in_dim, dim, stride)

    def forward(self, x):
        input = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)

        if self.stride != 1 or self.in_dim != self.dim * self.expansion:
            input = self.downsample(input)

        out = out + input
        out = self.relu(out)
        return out

    def _build_downsample_layer(self, in_dim, dim, stride):
        return nn.Sequential(
            nn.Conv2d(in_dim, dim * self.expansion, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(dim * self.expansion)
        )

In [17]:
input = torch.rand(1, 64, 56, 56)

bottleneck_block = BottleneckBlock(64, 64, 1)
out = bottleneck_block(input)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 64, 56, 56])
output shape: torch.Size([1, 256, 56, 56])


In [18]:
input = torch.rand(1, 256, 56, 56)

bottleneck_block = BottleneckBlock(256, 128, 2)
out = bottleneck_block(input)
print(f"input shape: {input.shape}")
print(f"output shape: {out.shape}")

input shape: torch.Size([1, 256, 56, 56])
output shape: torch.Size([1, 512, 28, 28])


### ResNet

In [19]:
IMAGE_SIZE = 224

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes):
        super().__init__()
        self.in_dim = 64
        self.stem = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64)
        )
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.stage1 = self._build_stage(block, 64, layers[0], stride=1)
        self.stage2 = self._build_stage(block, 128, layers[1], stride=2)
        self.stage3 = self._build_stage(block, 256, layers[2], stride=2)
        self.stage4 = self._build_stage(block, 512, layers[3], stride=2)

        self.average_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # for layers in [self.stem, self.stage1, self.stage2, self.stage3, self.stage4, self.fc]:
        #     if isinstance(layers, nn.Linear):
        #         nn.init.xavier_uniform_(layers.weight)
        #         continue

        #     for layer in layers:
        #         if isinstance(layer, nn.Conv2d):
        #             nn.init.(layer.weight)

    def _build_stage(self, block, dim, layers, stride):
        stage = []
        stage.append(block(self.in_dim, dim, stride))
        self.in_dim = dim * block.expansion

        for _ in range(layers - 1):
            stage.append(block(self.in_dim, dim, stride=1))

        return nn.Sequential(*stage)

    def forward(self, x):
        out = self.stem(x)
        out = self.max_pool(out)
        out = self.stage1(out)
        out = self.stage2(out)
        out = self.stage3(out)
        out = self.stage4(out)
        out = self.average_pool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out

In [20]:
resnet18 = ResNet(BasicBlock, [2, 2, 2, 2], 1000)
x = torch.rand(1, 3, 224, 224)
out = resnet18(x)
out.shape

torch.Size([1, 1000])

### 모델 요약

In [21]:
try:
    from torchinfo import summary
except Exception as ex:
    !pip install torchinfo
    from torchinfo import summary

In [22]:
summary(resnet18, [1, 3, 224, 224])

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Sequential: 1-1                        [1, 64, 112, 112]         --
│    └─Conv2d: 2-1                       [1, 64, 112, 112]         9,408
│    └─BatchNorm2d: 2-2                  [1, 64, 112, 112]         128
├─MaxPool2d: 1-2                         [1, 64, 56, 56]           --
├─Sequential: 1-3                        [1, 64, 56, 56]           --
│    └─BasicBlock: 2-3                   [1, 64, 56, 56]           4,224
│    │    └─Conv2d: 3-1                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-2             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-3                    [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-4                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-6                    [1, 64, 56, 56]           -

In [23]:
import torch

In [24]:
num_classes = 1000

resnet50 = ResNet(BottleneckBlock, [3, 4, 6, 3], num_classes)
x = torch.rand(1, 3, 224, 224)
out = resnet50(x)
assert out.shape == torch.Size([1, num_classes])

In [25]:
summary(resnet50(1000), [1, 3, 224, 224])

TypeError: conv2d() received an invalid combination of arguments - got (int, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias = None, tuple of ints stride = 1, tuple of ints padding = 0, tuple of ints dilation = 1, int groups = 1)
      didn't match because some of the arguments have invalid types: (!int!, !Parameter!, !NoneType!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, !int!)
 * (Tensor input, Tensor weight, Tensor bias = None, tuple of ints stride = 1, str padding = "valid", tuple of ints dilation = 1, int groups = 1)
      didn't match because some of the arguments have invalid types: (!int!, !Parameter!, !NoneType!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, !int!)


In [26]:
import torchvision

summary(torchvision.models.resnet34(), [1, 3, 224, 224])

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 112, 112]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 112, 112]         128
├─ReLU: 1-3                              [1, 64, 112, 112]         --
├─MaxPool2d: 1-4                         [1, 64, 56, 56]           --
├─Sequential: 1-5                        [1, 64, 56, 56]           --
│    └─BasicBlock: 2-1                   [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-1                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-2             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-3                    [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-4                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-6                    [1, 64, 56, 56]           --
│

In [27]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_dim, dim, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_dim, dim, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(dim)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(dim)
        self.downsample = downsample

    def forward(self, x):
        input = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            input = self.downsample(input)

        out = out + input
        out = self.relu(out)
        return out

# def _build_downsample_layer(self, in_dim, dim, stride):
#     return nn.Sequential(
#         nn.Conv2d(in_dim, dim * self.expansion, kernel_size=1, stride=stride, bias=False),
#         nn.BatchNorm2d(dim * self.expansion)
#     )

class BottleneckBlock(nn.Module):
    expansion = 4

    def __init__(self, in_dim, dim, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_dim, dim, kernel_size=1, stride=stride, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(dim)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(dim)
        self.conv3 = nn.Conv2d(dim, dim * self.expansion, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(dim * self.expansion)
        self.downsample = downsample

    def forward(self, x):
        input = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            input = self.downsample(input)

        out = out + input
        out = self.relu(out)
        return out

In [28]:
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes):
        super().__init__()
        self.in_dim = 64
        self.downsample = None
        self.stem = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64)
        )
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.stage1 = self._build_stage(block, 64, layers[0], stride=1)
        self.stage2 = self._build_stage(block, 128, layers[1], stride=2)
        self.stage3 = self._build_stage(block, 256, layers[2], stride=2)
        self.stage4 = self._build_stage(block, 512, layers[3], stride=2)

        self.average_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _build_stage(self, block, dim, layers, stride):
        stage = []

        if stride != 1 or self.in_dim != dim * block.expansion:
            self.downsample = nn.Sequential(
                nn.Conv2d(self.in_dim, dim * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(dim * block.expansion)
            )

        stage.append(block(self.in_dim, dim, stride, self.downsample))
        self.in_dim = dim * block.expansion

        for _ in range(layers - 1):
            stage.append(block(self.in_dim, dim, stride=1))

        return nn.Sequential(*stage)

    def forward(self, x):
        out = self.stem(x)
        out = self.max_pool(out)
        out = self.stage1(out)
        out = self.stage2(out)
        out = self.stage3(out)
        out = self.stage4(out)
        out = self.average_pool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out

In [29]:
def resnet18(num_classes):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)

def resnet34(num_classes):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes)

def resnet50(num_classes):
    return ResNet(BottleneckBlock, [3, 4, 6, 3], num_classes)

def resnet101(num_classes):
    return ResNet(BottleneckBlock, [3, 4, 23, 3], num_classes)

def resnet152(num_classes):
    return ResNet(BottleneckBlock, [3, 8, 36, 3], num_classes)

In [30]:
summary(resnet34(1000), [1, 3, 224, 224])

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Sequential: 1-1                        [1, 64, 112, 112]         --
│    └─Conv2d: 2-1                       [1, 64, 112, 112]         9,408
│    └─BatchNorm2d: 2-2                  [1, 64, 112, 112]         128
├─MaxPool2d: 1-2                         [1, 64, 56, 56]           --
├─Sequential: 1-3                        [1, 64, 56, 56]           --
│    └─BasicBlock: 2-3                   [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-1                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-2             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-3                    [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-4                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-6                    [1, 64, 56, 56]           --
│

## Cifar-10 데이터 학습

In [31]:
# download cifar-10 dataset
!wget -O ./cifar-10-python.tar.gz https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
!tar -xf ./cifar-10-python.tar.gz

'wget'��(��) ���� �Ǵ� �ܺ� ����, ������ �� �ִ� ���α׷�, �Ǵ�
��ġ ������ �ƴմϴ�.
tar: Error opening archive: Failed to open './cifar-10-python.tar.gz'


In [41]:
from pathlib import Path

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import v2

In [42]:
class CifarDataset(Dataset):
    def __init__(self, cifar_path, transform=None, train=True):
        filename = "data_batch_*" if train else "test_batch"
        batch_files = list(Path(cifar_path).glob(filename))

        self.images = []
        self.labels = []
        self.transform = transform

        for file in batch_files:
            batch = self.unpickle(str(file))
            self.labels += batch[b'labels']
            data = batch[b'data']

            for image in data:
                image = [np.split(x, 32) for x in np.split(image, 3)]
                image = np.array(image)
                self.images.append(image)

        self.images = torch.from_numpy(np.array(self.images))

    def unpickle(self, file):
        import pickle
        with open(file, 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')
        return dict

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform is not None:
            image = self.transform(image)

        return image, label

In [43]:
transform = v2.Compose([
    v2.RandomChoice([v2.Resize(256), v2.Resize(480)]),
    v2.Resize((224, 224)),
    v2.RandomHorizontalFlip(p=0.5),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [46]:
train_dataset = CifarDataset("../data/train", transform=transform, train=True)
test_dataset = CifarDataset("../data/test", transform=transform, train=False)

In [47]:
assert len(train_dataset) == 50_000
assert len(test_dataset) == 10_000

In [48]:
from torch.utils.data import random_split

train_dataset, validation_dataset = random_split(train_dataset, [0.8, 0.2])

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=64, shuffle=False)

In [51]:
from pathlib import Path

model_path = Path("../models")
model_path.mkdir(exist_ok=True)
model_name = "RESNET50.pth"

In [52]:
import sys

device = "cuda" if torch.cuda.is_available() else "cpu"

epochs = 20
num_classes = 10
lr = 1e-5

best_loss = sys.maxsize

model = resnet50(num_classes).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr)

train_loss_hist, test_loss_hist = [], []
train_accuracy_hist, test_accuracy_hist = [], []

for epoch in range(epochs):
    train_loss, test_loss = 0, 0
    train_accuracy, test_accuracy = 0, 0

    model.train()
    for X_train, y_train in train_dataloader:
        X_train, y_train = X_train.to(device), y_train.to(device)

        pred_logits = model(X_train)
        preds = torch.argmax(pred_logits, dim=1)
        train_accuracy += (preds == y_train).sum().item()
        loss = loss_fn(pred_logits, y_train)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        for X_test, y_test in validation_dataloader:
            X_test, y_test = X_test.to(device), y_test.to(device)

            pred_logits = model(X_test)
            preds = torch.argmax(pred_logits, dim=1)
            test_accuracy += (preds == y_test).sum().item()
            loss = loss_fn(pred_logits, y_test).item()
            test_loss += loss

            if best_loss > loss:
                best_loss = loss
                torch.save(model_path / model_name)

    train_loss /= len(train_dataset)
    test_loss /= len(test_dataset)
    train_accuracy /= len(train_dataset)
    test_accuracy /= len(test_dataset)

    train_loss_hist.append(train_loss)
    test_loss_hist.append(test_loss)
    train_accuracy_hist.append(train_accuracy)
    test_accuracy_hist.append(test_accuracy)

    print(f"epoch: {epoch+1} | train loss: {train_loss:.4f} | train accuracy: {train_accuracy * 100:.4f} | test loss: {test_loss:.4f} test accuracy: {test_accuracy * 100:.4f}")

KeyboardInterrupt: 