In [20]:
import torch
import torch.nn as nn
from torchinfo import summary

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(8,4)
        self.fc2 = nn.Linear(4,2)
        self.fc3 = nn.Linear(2,1)
        
    def forward(self, x):
        return self.f3(self.f2(self.fc1(x)))

In [3]:
# 모델 인스턴스 생성
model=Net() 

# 모델의 특정 층 추출 
model.fc1

Linear(in_features=8, out_features=4, bias=True)

In [24]:
summary(model)
# 36개 -> 8*4 + 1*4 = 36
# 10개 -> 4*2 + 1*2 = 10
# 3개 -> 2*1 + 1*1 = 3

Layer (type:depth-idx)                   Param #
Net                                      --
├─Linear: 1-1                            36
├─Linear: 1-2                            10
├─Linear: 1-3                            3
Total params: 49
Trainable params: 49
Non-trainable params: 0

In [11]:
model.fc1.weight, model.fc1.bias

(Parameter containing:
 tensor([[ 1.7034e-01,  3.0003e-01, -1.2169e-01, -7.2638e-02, -2.1213e-01,
           1.5824e-01,  2.0362e-01, -3.0709e-01],
         [-2.8718e-01, -7.4751e-02, -2.9829e-01,  4.9680e-02,  9.6182e-02,
           1.2431e-01, -2.5645e-01,  5.7207e-02],
         [ 3.1309e-01, -2.1936e-01,  3.3504e-01,  2.0864e-01, -2.3069e-01,
           2.3661e-01, -1.1258e-03,  8.6547e-02],
         [ 1.7864e-01,  7.7386e-02,  3.3211e-01,  3.4109e-04, -3.4601e-01,
           1.7359e-01,  3.3117e-01,  2.5893e-02]], requires_grad=True),
 Parameter containing:
 tensor([-0.3107,  0.3365,  0.1707, -0.2703], requires_grad=True))

In [7]:
for i in model.parameters(): # 모델의 각 층별 w,b 텐서 정보 확인 
    print(i)

Parameter containing:
tensor([[ 1.7034e-01,  3.0003e-01, -1.2169e-01, -7.2638e-02, -2.1213e-01,
          1.5824e-01,  2.0362e-01, -3.0709e-01],
        [-2.8718e-01, -7.4751e-02, -2.9829e-01,  4.9680e-02,  9.6182e-02,
          1.2431e-01, -2.5645e-01,  5.7207e-02],
        [ 3.1309e-01, -2.1936e-01,  3.3504e-01,  2.0864e-01, -2.3069e-01,
          2.3661e-01, -1.1258e-03,  8.6547e-02],
        [ 1.7864e-01,  7.7386e-02,  3.3211e-01,  3.4109e-04, -3.4601e-01,
          1.7359e-01,  3.3117e-01,  2.5893e-02]], requires_grad=True)
Parameter containing:
tensor([-0.3107,  0.3365,  0.1707, -0.2703], requires_grad=True)
Parameter containing:
tensor([[ 0.3958, -0.2766, -0.4011, -0.1604],
        [ 0.3201,  0.3081,  0.0355, -0.1815]], requires_grad=True)
Parameter containing:
tensor([0.0754, 0.3342], requires_grad=True)
Parameter containing:
tensor([[0.6895, 0.0278]], requires_grad=True)
Parameter containing:
tensor([-0.1493], requires_grad=True)


In [13]:
nn.init.xavier_uniform_(model.fc1.weight) # 언더바(_)를 줌으로써 inplace기능 활성화 

Parameter containing:
tensor([[-0.3934,  0.2836,  0.0132,  0.5061,  0.5092, -0.5913,  0.0858, -0.5887],
        [ 0.4175, -0.0837,  0.4061,  0.2979, -0.4731,  0.5122, -0.1198, -0.6666],
        [ 0.0125, -0.3398,  0.3145, -0.2282, -0.2816,  0.1048,  0.5307,  0.5420],
        [ 0.5262,  0.4043,  0.6057,  0.0382,  0.2354,  0.4453, -0.0537, -0.6702]],
       requires_grad=True)

In [15]:
nn.init.xavier_normal_(model.fc1.weight) # xavier_normal_로 변경

Parameter containing:
tensor([[-2.0703e-01,  4.4152e-01,  4.3080e-01,  2.1949e-01,  8.0393e-01,
          1.1328e+00, -1.1215e+00,  1.8928e-02],
        [-7.3071e-02, -1.0732e-03,  4.0727e-01, -2.4839e-01, -9.3855e-01,
          5.0331e-01, -1.5112e-01,  2.6872e-01],
        [ 1.1452e-01,  5.0968e-02, -6.3083e-01, -2.7854e-01, -6.2466e-02,
          1.1130e-01,  6.3456e-01,  4.0566e-01],
        [ 1.9802e-01, -7.5579e-01, -8.5188e-01, -3.4921e-01,  3.6276e-01,
         -3.4187e-01,  3.2227e-01,  7.6576e-02]], requires_grad=True)

In [16]:
# 전체 layer에 대하여 xavier uniform initialization 설정
for name, child in model.named_children():
    nn.init.xavier_uniform_(child.weight)

In [17]:
# fc2, fc3 layer에 대하여 xavier normal initialization 설정
for name, child in model.named_children():
    for param in child.parameters():
        if name in ['fc2', 'fc3']: # 원하는 layer 이름 지정
            nn.init.xavier_normal_(child.weight)

In [14]:
nn.init.kaiming_normal_(model.fc1.weight) # he 방식 

Parameter containing:
tensor([[-0.2042,  0.2394, -0.3461, -0.1439, -0.1294, -0.2660, -0.9918, -0.4710],
        [ 0.1657, -0.0273, -0.1661, -0.2744, -0.1141, -0.2045,  0.0426, -0.1044],
        [-0.2684, -0.7717,  0.1135, -0.7962, -0.1170, -0.1001, -0.5707, -0.7335],
        [-0.3350,  0.8516,  0.9025, -1.5960, -0.7016, -0.1621,  0.0574, -0.0395]],
       requires_grad=True)

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(100, 50)
        self.bn = nn.BatchNorm1d(num_features=50)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(50, 10)
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.fc2(x)
        return 

In [18]:
# fc1 layer에 대한 He uniform initialization
nn.init.kaiming_uniform_(model.fc1.weight)

Parameter containing:
tensor([[-0.6636,  0.5145, -0.1027,  0.4867, -0.2917, -0.4672, -0.8585, -0.5551],
        [-0.4039, -0.1899,  0.4919,  0.2276, -0.6380,  0.1269,  0.8079, -0.2134],
        [ 0.0909,  0.7737,  0.0782,  0.3262, -0.3590,  0.3879,  0.2389, -0.4697],
        [ 0.1732, -0.3760, -0.6370, -0.2793,  0.6832, -0.8028, -0.3591, -0.4533]],
       requires_grad=True)

In [19]:
# fc1 layer에 대한 He normal initialization
nn.init.kaiming_normal_(model.fc1.weight)

Parameter containing:
tensor([[-0.3278,  0.8211, -0.6779, -0.8737, -0.3341,  0.2793, -0.1053, -0.2370],
        [-0.2456,  0.2297,  0.3770, -0.1289,  0.1960, -0.4868,  0.9842,  0.7023],
        [ 0.5891,  0.2423, -0.1513, -0.1781, -0.7464, -0.5787, -0.2469, -0.0358],
        [ 0.7223, -1.6068,  1.0708, -0.5295, -0.7663,  0.1249,  0.8430,  0.4883]],
       requires_grad=True)