# Weight Initialization & Zero Gamma

### [Package load]

In [1]:
import torch
print('pytorch version: {}'.format(torch.__version__))

import torchvision
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

print('GPU 사용 가능 여부: {}'.format(torch.cuda.is_available()))
device = "cuda" if torch.cuda.is_available() else "cpu"   # GPU 사용 가능 여부에 따라 device 정보 저장

# 이걸 해줘야 matplotlib 시행 시 에러가 안 남
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

pytorch version: 2.2.2
GPU 사용 가능 여부: False


### [Model: DenseNet121 Transfer Learning]

In [2]:
class TransferDenseNet(nn.Module):
    def __init__ (self, num_classes):
        super(TransferDenseNet, self).__init__()
        self.densenet = torchvision.models.densenet121(pretrained=True)
        num_feature = torchvision.models.densenet121(pretrained=True).classifier.in_features
        self.densenet.classifier = nn.Linear(num_feature, num_classes)
        
    def forward(self,x):
        return self.densenet(x)

In [3]:
densenet121_transfer = TransferDenseNet(num_classes=5).to(device)



In [4]:
densenet121_transfer

TransferDenseNet(
  (densenet): DenseNet(
    (features): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace=True)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        (denselayer1): _DenseLayer(
          (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (denselayer2): _DenseLayer(
          (norm1): BatchNorm2d(96, eps=1e-05, mo

In [5]:
from torchinfo import summary
summary(densenet121_transfer, input_size=(64, 3, 512, 512), col_width=20, depth=150, row_settings=["depth", "var_names"], col_names=["input_size", "kernel_size", "output_size", "params_percent"])

Layer (type (var_name):depth-idx)                       Input Shape          Kernel Shape         Output Shape         Param %
TransferDenseNet (TransferDenseNet)                     [64, 3, 512, 512]    --                   [64, 5]                   --
├─DenseNet (densenet): 1-1                              [64, 3, 512, 512]    --                   [64, 5]                   --
│    └─Sequential (features): 2-1                       [64, 3, 512, 512]    --                   [64, 1024, 16, 16]        --
│    │    └─Conv2d (conv0): 3-1                         [64, 3, 512, 512]    [7, 7]               [64, 64, 256, 256]     0.14%
│    │    └─BatchNorm2d (norm0): 3-2                    [64, 64, 256, 256]   --                   [64, 64, 256, 256]     0.00%
│    │    └─ReLU (relu0): 3-3                           [64, 64, 256, 256]   --                   [64, 64, 256, 256]        --
│    │    └─MaxPool2d (pool0): 3-4                      [64, 64, 256, 256]   3                    [64, 64, 128,

In [6]:
count = 0
for name, param in densenet121_transfer.named_parameters():
    count += 1
    print(f"count:{count},",name, param.requires_grad)

count:1, densenet.features.conv0.weight True
count:2, densenet.features.norm0.weight True
count:3, densenet.features.norm0.bias True
count:4, densenet.features.denseblock1.denselayer1.norm1.weight True
count:5, densenet.features.denseblock1.denselayer1.norm1.bias True
count:6, densenet.features.denseblock1.denselayer1.conv1.weight True
count:7, densenet.features.denseblock1.denselayer1.norm2.weight True
count:8, densenet.features.denseblock1.denselayer1.norm2.bias True
count:9, densenet.features.denseblock1.denselayer1.conv2.weight True
count:10, densenet.features.denseblock1.denselayer2.norm1.weight True
count:11, densenet.features.denseblock1.denselayer2.norm1.bias True
count:12, densenet.features.denseblock1.denselayer2.conv1.weight True
count:13, densenet.features.denseblock1.denselayer2.norm2.weight True
count:14, densenet.features.denseblock1.denselayer2.norm2.bias True
count:15, densenet.features.denseblock1.denselayer2.conv2.weight True
count:16, densenet.features.denseblock1.d

- https://89douner.tistory.com/289

In [7]:
# Denseblock 3,4를 required_grad True로 바꾸기 (Denseblock 3의 시작: 118) ->  이전은 false, 이후는 True
count = 0
for param in densenet121_transfer.densenet.parameters():
    count += 1
    if count >= 118:
        param.requires_grad = True
    else:
        param.requires_grad = False

for name, param in densenet121_transfer.named_parameters():
    print(name, param.requires_grad)

densenet.features.conv0.weight False
densenet.features.norm0.weight False
densenet.features.norm0.bias False
densenet.features.denseblock1.denselayer1.norm1.weight False
densenet.features.denseblock1.denselayer1.norm1.bias False
densenet.features.denseblock1.denselayer1.conv1.weight False
densenet.features.denseblock1.denselayer1.norm2.weight False
densenet.features.denseblock1.denselayer1.norm2.bias False
densenet.features.denseblock1.denselayer1.conv2.weight False
densenet.features.denseblock1.denselayer2.norm1.weight False
densenet.features.denseblock1.denselayer2.norm1.bias False
densenet.features.denseblock1.denselayer2.conv1.weight False
densenet.features.denseblock1.denselayer2.norm2.weight False
densenet.features.denseblock1.denselayer2.norm2.bias False
densenet.features.denseblock1.denselayer2.conv2.weight False
densenet.features.denseblock1.denselayer3.norm1.weight False
densenet.features.denseblock1.denselayer3.norm1.bias False
densenet.features.denseblock1.denselayer3.conv1

In [8]:
# Denseblock 3,4의 Conv layer를 다시 initialization
for name, layer in densenet121_transfer.densenet.features.named_children():
    if name == 'denseblock3':
        denselayer_index = 1
        conv_index = 1
        for name, param in densenet121_transfer.densenet.features.named_parameters():
            # Layer 4의 Conv layer parameter initialization
            if name == 'denseblock3.denselayer'+str(denselayer_index)+'.conv'+str(conv_index)+'.weight':
                print('denseblock3의 '+str(denselayer_index)+'번째 denselayer의 conv'+str(conv_index)+'.weight')
                nn.init.xavier_uniform_(param)
                print(name+'의 conv filter initialization setting 완료')
                print()
                conv_index += 1
                if name == 'denseblock3.denselayer'+str(denselayer_index)+'.conv2.weight':
                    denselayer_index += 1
                    conv_index = 1
    elif name == 'denseblock4':
        denselayer_index = 1
        conv_index = 1
        for name, param in densenet121_transfer.densenet.features.named_parameters():
            # Layer 4의 Conv layer parameter initialization
            if name == 'denseblock4.denselayer'+str(denselayer_index)+'.conv'+str(conv_index)+'.weight':
                print('denseblock4의 '+str(denselayer_index)+'번째 denselayer의 conv'+str(conv_index)+'.weight')
                nn.init.xavier_uniform_(param)
                print(name+'의 conv filter initialization setting 완료')
                print()
                conv_index += 1
                if name == 'denseblock4.denselayer'+str(denselayer_index)+'.conv2.weight':
                    denselayer_index += 1
                    conv_index = 1

denseblock3의 1번째 denselayer의 conv1.weight
denseblock3.denselayer1.conv1.weight의 conv filter initialization setting 완료

denseblock3의 1번째 denselayer의 conv2.weight
denseblock3.denselayer1.conv2.weight의 conv filter initialization setting 완료

denseblock3의 2번째 denselayer의 conv1.weight
denseblock3.denselayer2.conv1.weight의 conv filter initialization setting 완료

denseblock3의 2번째 denselayer의 conv2.weight
denseblock3.denselayer2.conv2.weight의 conv filter initialization setting 완료

denseblock3의 3번째 denselayer의 conv1.weight
denseblock3.denselayer3.conv1.weight의 conv filter initialization setting 완료

denseblock3의 3번째 denselayer의 conv2.weight
denseblock3.denselayer3.conv2.weight의 conv filter initialization setting 완료

denseblock3의 4번째 denselayer의 conv1.weight
denseblock3.denselayer4.conv1.weight의 conv filter initialization setting 완료

denseblock3의 4번째 denselayer의 conv2.weight
denseblock3.denselayer4.conv2.weight의 conv filter initialization setting 완료

denseblock3의 5번째 denselayer의 conv1.weight
denseb

In [9]:
# Denseblock 3, 4의 batch normalization gamma, beta initialization
# 전부 initialization시키므로 처음 시작을 0, 1로 시작, 만약 좀 더 뒤로 바꾸고 싶으면 초기 시작 값을 바꾸기
# Denseblock 3, 4에서 4의 배수에 해당하는 denselayer의 batch norm 2의 gamma는 0로, 나머지는 1로 초기화시키기
denselayer_index = 1
bn_index = 1
for name, param in densenet121_transfer.densenet.features.named_parameters():
    if denselayer_index % 4 == 0:
        if name == 'denseblock3.denselayer'+str(denselayer_index)+'.norm1.weight':
            print('denseblock3의 '+str(denselayer_index)+'번째 denselayer의 '+'norm1.weight')
            nn.init.ones_(param)
            print(name+'의 gamma one setting 완료')
            print()
        elif name == 'denseblock3.denselayer'+str(denselayer_index)+'.norm2.weight':
            print('denseblock3의 '+str(denselayer_index)+'번째 denselayer의 '+'norm2.weight')
            nn.init.zeros_(param)
            print(name+'의 gamma zero setting 완료')
            print()
        elif name == 'denseblock3.denselayer'+str(denselayer_index)+'.'+'norm'+str(bn_index)+'.bias':
            print('denseblock3.denselayer'+str(denselayer_index)+'.'+'norm'+str(bn_index)+'.bias')     # bias는 0으로 초기화
            nn.init.zeros_(param)
            print(name+'의 beta zero setting 완료')
            print()
            if bn_index == 2:
                bn_index=1
                denselayer_index += 1
                continue
            bn_index += 1
    else:
        if name == 'denseblock3.denselayer'+str(denselayer_index)+'.norm1.weight':
            print('denseblock3의 '+str(denselayer_index)+'번째 denselayer의 '+'norm1.weight')
            nn.init.ones_(param)
            print(name+'의 gamma one setting 완료')
            print()
        elif name == 'denseblock3.denselayer'+str(denselayer_index)+'.norm2.weight':
            print('denseblock3의 '+str(denselayer_index)+'번째 denselayer의 '+'norm2.weight')
            nn.init.ones_(param)
            print(name+'의 gamma one setting 완료')
            print()
        elif name == 'denseblock3.denselayer'+str(denselayer_index)+'.'+'norm'+str(bn_index)+'.bias':
            print('denseblock3.denselayer'+str(denselayer_index)+'.'+'norm'+str(bn_index)+'.bias')     # bias는 0으로 초기화
            nn.init.zeros_(param)
            print(name+'의 beta zero setting 완료')
            print()
            if bn_index == 2:
                bn_index=1
                denselayer_index += 1
                continue
            bn_index += 1

denselayer_index = 1
bn_index = 1
for name, param in densenet121_transfer.densenet.features.named_parameters():
    if denselayer_index % 4 == 0:
        if name == 'denseblock4.denselayer'+str(denselayer_index)+'.norm1.weight':
            print('denseblock4의 '+str(denselayer_index)+'번째 denselayer의 '+'norm1.weight')
            nn.init.ones_(param)
            print(name+'의 gamma one setting 완료')
            print()
        elif name == 'denseblock4.denselayer'+str(denselayer_index)+'.norm2.weight':
            print('denseblock4의 '+str(denselayer_index)+'번째 denselayer의 '+'norm2.weight')
            nn.init.zeros_(param)
            print(name+'의 gamma zero setting 완료')
            print()
        elif name == 'denseblock4.denselayer'+str(denselayer_index)+'.'+'norm'+str(bn_index)+'.bias':
            print('denseblock4.denselayer'+str(denselayer_index)+'.'+'norm'+str(bn_index)+'.bias')     # bias는 0으로 초기화
            nn.init.zeros_(param)
            print(name+'의 beta zero setting 완료')
            print()
            if bn_index == 2:
                bn_index=1
                denselayer_index += 1
                continue
            bn_index += 1
    else:
        if name == 'denseblock4.denselayer'+str(denselayer_index)+'.norm1.weight':
            print('denseblock4의 '+str(denselayer_index)+'번째 denselayer의 '+'norm1.weight')
            nn.init.ones_(param)
            print(name+'의 gamma one setting 완료')
            print()
        elif name == 'denseblock4.denselayer'+str(denselayer_index)+'.norm2.weight':
            print('denseblock4의 '+str(denselayer_index)+'번째 denselayer의 '+'norm2.weight')
            nn.init.ones_(param)
            print(name+'의 gamma one setting 완료')
            print()
        elif name == 'denseblock4.denselayer'+str(denselayer_index)+'.'+'norm'+str(bn_index)+'.bias':
            print('denseblock4.denselayer'+str(denselayer_index)+'.'+'norm'+str(bn_index)+'.bias')     # bias는 0으로 초기화
            nn.init.zeros_(param)
            print(name+'의 beta zero setting 완료')
            print()
            if bn_index == 2:
                bn_index=1
                denselayer_index += 1
                continue
            bn_index += 1

denseblock3의 1번째 denselayer의 norm1.weight
denseblock3.denselayer1.norm1.weight의 gamma one setting 완료

denseblock3.denselayer1.norm1.bias
denseblock3.denselayer1.norm1.bias의 beta zero setting 완료

denseblock3의 1번째 denselayer의 norm2.weight
denseblock3.denselayer1.norm2.weight의 gamma one setting 완료

denseblock3.denselayer1.norm2.bias
denseblock3.denselayer1.norm2.bias의 beta zero setting 완료

denseblock3의 2번째 denselayer의 norm1.weight
denseblock3.denselayer2.norm1.weight의 gamma one setting 완료

denseblock3.denselayer2.norm1.bias
denseblock3.denselayer2.norm1.bias의 beta zero setting 완료

denseblock3의 2번째 denselayer의 norm2.weight
denseblock3.denselayer2.norm2.weight의 gamma one setting 완료

denseblock3.denselayer2.norm2.bias
denseblock3.denselayer2.norm2.bias의 beta zero setting 완료

denseblock3의 3번째 denselayer의 norm1.weight
denseblock3.denselayer3.norm1.weight의 gamma one setting 완료

denseblock3.denselayer3.norm1.bias
denseblock3.denselayer3.norm1.bias의 beta zero setting 완료

denseblock3의 3번째 denselay