# ResNet 50

- Paper: [2015.12.10] Deep Residual Learning for Image Recognition
- https://arxiv.org/abs/1512.03385

### [Package load]

In [None]:
import torch 
print('pytorch version: {}'.format(torch.__version__))

import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import glob
import os
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
%matplotlib inline

print('pytorch version: {}'.format(torch.__version__))
print('GPU 사용 가능 여부: {}'.format(torch.cuda.is_available()))
device = "cuda" if torch.cuda.is_available() else "cpu"   # GPU 사용 가능 여부에 따라 device 정보 저장

### [Model: ResNet50]

https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py

In [None]:
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution, no padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample    # stride = 2일 때 skip과 identity size 맞춰주기 위해 사용
        self.stride = stride

    def forward(self, x):

        identity = x

        out = self.conv1(x) # 3x3 stride = 받아온 stride
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out) # 3x3 stride = 1
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)
            '''처음 stride = 2, x = 3x64x64로 가정
            : stride 2라서 feature 수 줄어서 identity도 줄이기 위해 만든다.
            identity = 3x64x64, out = 3x32x32 (d/t stride = 2)
            이후 쭉 지나가다 down sample 없다고 하면 3x64x64랑 3x32x32랑 덧셈 불가능해짐'''

        out += identity         # out을 identity와 더해주기
        out = self.relu(out)    # 이후 return

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes) #conv1x1(64,64)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)#conv3x3(64,64)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion) #conv1x1(64,256) channel 뻥튀기 위해 expansion이 되어 있다.
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x) # 1x1 stride = 1
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out) # 3x3 stride = stride 
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out) # 1x1 planes. planes*self.expansion, stride = 1
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)
            # 마찬가지 stride 달라지면 downsample 고려해야 함
        out += identity
        out = self.relu(out)

        return out

In [None]:
class ResNet(nn.Module):
    # model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) #resnet 50 
    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        '''block은 bottleneck, layer는 list로'''
        super(ResNet, self).__init__()
        
        self.inplanes = 64

        # input: 3x224x224       
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # self.conv1(input) -> output=64x112x112
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        
        # input: 64x112x112
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # output: 64x56x56
        
        self.layer1 = self._make_layer(block, 64, layers[0])    #layers[0]=3
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2) #layers[1]=4
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)  #layers[2]=6
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)  #layers[3]=3
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():    # weight 초기화
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
            # 논문에 근거가 나와 있음
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
    
    def _make_layer(self, block, planes, blocks, stride=1):
        # self.layer1 = self._make_layer(Bottleneck, 64, layers[0]'''3''')
            # block = bottleneck, planes = 64, blocks = layers[0]인 3, stirde = 1
        # self.inplanes의 경우 이제 256이 들어가있음
        # self.layer2 = self._make_layer(Bottleneck, 128, layers[0]'''4''', stride = 2)
        downsample = None
        
        if stride != 1 or self.inplanes != planes * block.expansion:   
                            # inplanes = 64 != 64 * 4 (bottleneck의 expansion)
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride), #conv1x1(64, 256, 1)
                nn.BatchNorm2d(planes * block.expansion), #batchnrom2d(256)
            )
            # 원래 feature 수 맞추려고 쓰는데 여기서는 channel을 맞추는 용도로 사용함

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
            # layers.append(Bottleneck(64, 64, 1, downsample))
        self.inplanes = planes * block.expansion #self.inplanes = 64 * 4
        
        for _ in range(1, blocks): 
            layers.append(block(self.inplanes, planes)) # * 3
            # blocks가 3이면 실제로는 2번만 돈다
        return nn.Sequential(*layers)
        '''self.layer1 = [
            layers.append(Bottleneck(64, 64, 1, downsample))
            Bottleneck(256, 64)
            Bottleneck(256, 64)
        ]
        self.layer2 = [
            layers.append(Bottleneck(256, 128, 2, downsample))
            Bottleneck(512, 128)
            Bottleneck(512, 128)
            Bottleneck(512, 128)
        ]'''

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x) # FC layer 대신 1x1 pooling으로 묶어버리기
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:
resnet50 = ResNet(Bottleneck, [3, 4, 6, 3], 4, True).to(device) 
# 1(conv1) + 9(layer1) + 12(layer2) + 18(layer3) + 9(layer4) +1(fc)= ResNet50
# OCT classification에서 class 4개이므로 4를 대입

In [None]:
resnet50

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
from torchsummary import summary
summary(resnet50, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

### [Model: ResNet50 Transfer learning]

https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py

In [None]:
class TransferResNet(nn.Module):
    def __init__ (self):
        super(TransferResNet, self).__init__()
        self.ResNet50 = torchvision.models.resnet50(pretrained=True)
        self.ResNet50.fc = nn.Linear(2048, 4)        # Final layer input = 512, Final layer output = 4 (num of classes) (original: 1000)
    
    def forward(self,x):
        return self.ResNet50(x)

In [None]:
resnet50_transfer = TransferResNet().to(device) 



In [None]:
resnet50_transfer

TransferResNet(
  (ResNet50): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
  

In [None]:
from torchsummary import summary
summary(resnet50_transfer, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [None]:
count = 0
for name, param in resnet50_transfer.named_parameters():
    count += 1
    print(f"count:{count},",name, param.requires_grad)

count:1, ResNet50.conv1.weight True
count:2, ResNet50.bn1.weight True
count:3, ResNet50.bn1.bias True
count:4, ResNet50.layer1.0.conv1.weight True
count:5, ResNet50.layer1.0.bn1.weight True
count:6, ResNet50.layer1.0.bn1.bias True
count:7, ResNet50.layer1.0.conv2.weight True
count:8, ResNet50.layer1.0.bn2.weight True
count:9, ResNet50.layer1.0.bn2.bias True
count:10, ResNet50.layer1.0.conv3.weight True
count:11, ResNet50.layer1.0.bn3.weight True
count:12, ResNet50.layer1.0.bn3.bias True
count:13, ResNet50.layer1.0.downsample.0.weight True
count:14, ResNet50.layer1.0.downsample.1.weight True
count:15, ResNet50.layer1.0.downsample.1.bias True
count:16, ResNet50.layer1.1.conv1.weight True
count:17, ResNet50.layer1.1.bn1.weight True
count:18, ResNet50.layer1.1.bn1.bias True
count:19, ResNet50.layer1.1.conv2.weight True
count:20, ResNet50.layer1.1.bn2.weight True
count:21, ResNet50.layer1.1.bn2.bias True
count:22, ResNet50.layer1.1.conv3.weight True
count:23, ResNet50.layer1.1.bn3.weight Tr

- https://89douner.tistory.com/289

In [None]:
# Layer 4의 required_grad True로 바꾸기 (Layer 4의 시작: 130) -> 130 이전은 false, 이후는 True
count = 0
for param in resnet50_transfer.ResNet50.parameters():
    count += 1
    if count >= 130:
        param.requires_grad = True
    else:
        param.requires_grad = False

for name, param in resnet50_transfer.named_parameters():
    print(name, param.requires_grad)

ResNet50.conv1.weight False
ResNet50.bn1.weight False
ResNet50.bn1.bias False
ResNet50.layer1.0.conv1.weight False
ResNet50.layer1.0.bn1.weight False
ResNet50.layer1.0.bn1.bias False
ResNet50.layer1.0.conv2.weight False
ResNet50.layer1.0.bn2.weight False
ResNet50.layer1.0.bn2.bias False
ResNet50.layer1.0.conv3.weight False
ResNet50.layer1.0.bn3.weight False
ResNet50.layer1.0.bn3.bias False
ResNet50.layer1.0.downsample.0.weight False
ResNet50.layer1.0.downsample.1.weight False
ResNet50.layer1.0.downsample.1.bias False
ResNet50.layer1.1.conv1.weight False
ResNet50.layer1.1.bn1.weight False
ResNet50.layer1.1.bn1.bias False
ResNet50.layer1.1.conv2.weight False
ResNet50.layer1.1.bn2.weight False
ResNet50.layer1.1.bn2.bias False
ResNet50.layer1.1.conv3.weight False
ResNet50.layer1.1.bn3.weight False
ResNet50.layer1.1.bn3.bias False
ResNet50.layer1.2.conv1.weight False
ResNet50.layer1.2.bn1.weight False
ResNet50.layer1.2.bn1.bias False
ResNet50.layer1.2.conv2.weight False
ResNet50.layer1.2.bn

In [None]:
# Layer 4의 Conv layer를 다시 initialization
for name, layer in resnet50_transfer.ResNet50.named_children():
    if name == 'layer4':
        bottleneck_index = 0
        conv_index = 1
        for name, param in resnet50_transfer.ResNet50.named_parameters():
            # Layer 4의 Conv layer parameter initialization
            if name == 'layer4.'+str(bottleneck_index)+'.conv'+str(conv_index)+'.weight':
                print('layer4의 '+str(bottleneck_index)+'번째 bottleneck의 conv'+str(conv_index)+'.weight')
                nn.init.xavier_uniform_(param)
                print(name+'의 conv filter initialization setting 완료')
                print()
                conv_index += 1
                if name == 'layer4.'+str(bottleneck_index)+'.conv3.weight':
                    bottleneck_index += 1
                    conv_index = 1

layer4의 0번째 bottleneck의 conv1.weight
layer4.0.conv1.weight의 conv filter initialization setting 완료

layer4의 0번째 bottleneck의 conv2.weight
layer4.0.conv2.weight의 conv filter initialization setting 완료

layer4의 0번째 bottleneck의 conv3.weight
layer4.0.conv3.weight의 conv filter initialization setting 완료

layer4의 1번째 bottleneck의 conv1.weight
layer4.1.conv1.weight의 conv filter initialization setting 완료

layer4의 1번째 bottleneck의 conv2.weight
layer4.1.conv2.weight의 conv filter initialization setting 완료

layer4의 1번째 bottleneck의 conv3.weight


layer4.1.conv3.weight의 conv filter initialization setting 완료

layer4의 2번째 bottleneck의 conv1.weight
layer4.2.conv1.weight의 conv filter initialization setting 완료

layer4의 2번째 bottleneck의 conv2.weight
layer4.2.conv2.weight의 conv filter initialization setting 완료

layer4의 2번째 bottleneck의 conv3.weight
layer4.2.conv3.weight의 conv filter initialization setting 완료



In [None]:
# Layer 4의 batch normalization gamma, beta initialization
# 전부 initialization시키므로 처음 시작을 0, 1로 시작, 만약 좀 더 뒤로 바꾸고 싶으면 초기 시작 값을 바꾸기
bottleneck_index = 0
bn_index = 1
for name, param in resnet50_transfer.ResNet50.named_parameters():          
    if name == 'layer4.'+str(bottleneck_index)+'.'+'bn1.weight':
        print('layer4의 '+str(bottleneck_index)+'번째 bottleneck의 '+'bn1.weight')
        nn.init.ones_(param)
        print(name+'의 gamma one setting 완료')
        print()
    elif name == 'layer4.'+str(bottleneck_index)+'.'+'bn2.weight':
        print('layer4의 '+str(bottleneck_index)+'번째 bottleneck의 '+'bn2.weight')
        nn.init.ones_(param)
        print(name+'의 gamma one setting 완료')
        print()
    elif name == 'layer4.'+str(bottleneck_index)+'.'+'bn3.weight':
        print('layer4의 '+str(bottleneck_index)+'번째 bottleneck의 '+'bn3.weight')
        nn.init.zeros_(param)
        print(name+'의 gamma zero setting 완료')    # residual block 마지막의 batchnorm의 gamma는 0으로, 나머지는 1로 초기화시킨다고 함
        print()
    elif name == 'layer4.'+str(bottleneck_index)+'.'+'bn'+str(bn_index)+'.bias':
        print('layer4.'+str(bottleneck_index)+'.'+'bn'+str(bn_index)+'.bias')     # bias는 0으로 초기화
        nn.init.zeros_(param)
        bn_index += 1
        print(name+'의 beta zero setting 완료')
        if bn_index == 4:
            bn_index=1
            bottleneck_index += 1
        print()

layer4의 0번째 bottleneck의 bn1.weight
layer4.0.bn1.weight의 gamma one setting 완료

layer4.0.bn1.bias
layer4.0.bn1.bias의 beta zero setting 완료

layer4의 0번째 bottleneck의 bn2.weight
layer4.0.bn2.weight의 gamma one setting 완료

layer4.0.bn2.bias
layer4.0.bn2.bias의 beta zero setting 완료

layer4의 0번째 bottleneck의 bn3.weight
layer4.0.bn3.weight의 gamma zero setting 완료

layer4.0.bn3.bias
layer4.0.bn3.bias의 beta zero setting 완료

layer4의 1번째 bottleneck의 bn1.weight
layer4.1.bn1.weight의 gamma one setting 완료

layer4.1.bn1.bias
layer4.1.bn1.bias의 beta zero setting 완료

layer4의 1번째 bottleneck의 bn2.weight
layer4.1.bn2.weight의 gamma one setting 완료

layer4.1.bn2.bias
layer4.1.bn2.bias의 beta zero setting 완료

layer4의 1번째 bottleneck의 bn3.weight
layer4.1.bn3.weight의 gamma zero setting 완료

layer4.1.bn3.bias
layer4.1.bn3.bias의 beta zero setting 완료

layer4의 2번째 bottleneck의 bn1.weight
layer4.2.bn1.weight의 gamma one setting 완료

layer4.2.bn1.bias
layer4.2.bn1.bias의 beta zero setting 완료

layer4의 2번째 bottleneck의 bn2.weight
laye