In [4]:
import torch
import torch.nn as nn

In [5]:
help(torch.nn.init)

Help on module torch.nn.init in torch.nn:

NAME
    torch.nn.init

FUNCTIONS
    calculate_gain(nonlinearity, param=None)
        Return the recommended gain value for the given nonlinearity function.
        The values are as follows:
        
        nonlinearity      gain
        Linear / Identity :math:`1`
        Conv{1,2,3}D      :math:`1`
        Sigmoid           :math:`1`
        Tanh              :math:`\frac{5}{3}`
        ReLU              :math:`\sqrt{2}`
        Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
        SELU              :math:`\frac{3}{4}`
        
            In order to implement `Self-Normalizing Neural Networks`_ ,
            you should use ``nonlinearity='linear'`` instead of ``nonlinearity='selu'``.
            This gives the initial weights a variance of ``1 / N``,
            which is necessary to induce a stable fixed point in the forward pass.
            In contrast, the default gain for ``SELU`` sacrifices the normalisat

In [6]:
dir(torch.nn.init)

['Tensor',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_calculate_correct_fan',
 '_calculate_fan_in_and_fan_out',
 '_make_deprecate',
 '_no_grad_fill_',
 '_no_grad_normal_',
 '_no_grad_trunc_normal_',
 '_no_grad_uniform_',
 '_no_grad_zero_',
 'calculate_gain',
 'constant',
 'constant_',
 'dirac',
 'dirac_',
 'eye',
 'eye_',
 'kaiming_normal',
 'kaiming_normal_',
 'kaiming_uniform',
 'kaiming_uniform_',
 'math',
 'normal',
 'normal_',
 'ones_',
 'orthogonal',
 'orthogonal_',
 'sparse',
 'sparse_',
 'torch',
 'trunc_normal_',
 'uniform',
 'uniform_',
 'xavier_normal',
 'xavier_normal_',
 'xavier_uniform',
 'xavier_uniform_',
 'zeros_']

#### 这些函数除了calculate_gain，所有函数的后缀都带有下划线，意味着这些函数将会直接原地更改输入张量的值。

In [8]:
conv = nn.Conv2d(1,3,3)
linear = nn.Linear(10,1)


# 根据实际模型来使用torch.nn.init进行初始化，通常使用isinstance()来进行判断模块

print(isinstance(conv,nn.Conv2d)) # 判断conv是否是nn.Conv2d类型
print(isinstance(linear,nn.Conv2d)) # 判断linear是否是nn.Conv2d类型

True
False


In [12]:
# 查看随机初始化的conv参数
conv.weight.data

tensor([[[[ 0.3289, -0.2766, -0.1190],
          [ 0.0156,  0.0821, -0.1018],
          [-0.2432, -0.1440, -0.1805]]],


        [[[ 0.1478, -0.3053, -0.0254],
          [ 0.1240, -0.0659, -0.2035],
          [-0.3191,  0.2859,  0.2953]]],


        [[[ 0.1710,  0.3329, -0.0473],
          [ 0.1467, -0.1095,  0.0724],
          [-0.0642, -0.0382,  0.0065]]]])

In [13]:
# 查看linear的参数
linear.weight.data

tensor([[-0.1689,  0.0414, -0.0216, -0.2563,  0.1744, -0.3080, -0.2602, -0.0347,
         -0.0663, -0.0339]])

In [19]:
# xavier
torch.nn.init.xavier_normal_(conv.weight.data)
conv.weight.data

tensor([[[[ 0.0702, -0.2754, -0.3377],
          [ 0.0186, -0.0928,  0.1448],
          [ 0.0885, -0.4235, -0.0674]]],


        [[[ 0.3098,  0.0483,  0.1748],
          [ 0.0786, -0.0998, -0.0588],
          [-0.2620,  0.1402, -0.2494]]],


        [[[-0.0374,  0.1345,  0.0779],
          [ 0.1921, -0.1648, -0.1613],
          [ 0.1003,  0.2392,  0.2052]]]])

In [20]:
# 对linear进行常数初始化
torch.nn.init.constant_(linear.weight.data,0.3)
linear.weight.data

tensor([[0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000,
         0.3000]])

In [21]:
# 对conv进行kaiming初始化
torch.nn.init.kaiming_normal_(conv.weight.data)
conv.weight.data

tensor([[[[-1.2191e-01, -1.1305e-01, -4.6943e-01],
          [-4.9135e-01, -2.2668e-01, -8.9071e-03],
          [-3.4801e-02,  5.0942e-01,  1.7515e-01]]],


        [[[ 1.8005e-01,  1.5025e-02,  4.0557e-01],
          [-4.9781e-02,  2.7651e-01, -1.5404e-01],
          [-3.4019e-01, -1.7847e-01,  1.5932e-01]]],


        [[[ 4.1476e-01, -1.0942e+00,  5.3046e-01],
          [-5.5255e-01,  4.5096e-04,  3.4765e-01],
          [-1.8132e-02,  1.1355e+00,  2.6482e-01]]]])

### 初始化函数的封装

In [22]:
# 人们常常将各种初始化方法定义为一个initialize_weights()的函数并在模型初始后进行使用。
def initialize_weights(model):
	for m in model.modules():
		# 判断是否属于Conv2d
		if isinstance(m, nn.Conv2d):
			torch.nn.init.zeros_(m.weight.data)
			# 判断是否有偏置
			if m.bias is not None:
				torch.nn.init.constant_(m.bias.data,0.3)
		elif isinstance(m, nn.Linear):
			torch.nn.init.normal_(m.weight.data, 0.1)
			if m.bias is not None:
				torch.nn.init.zeros_(m.bias.data)
		elif isinstance(m, nn.BatchNorm2d):
			m.weight.data.fill_(1) 		 
			m.bias.data.zeros_()	

### 这段代码流程是遍历当前模型的每一层，然后判断各层属于什么类型，然后根据不同类型层，设定不同的权值初始化方法。我们可以通过下面的例程进行一个简短的演示：

In [23]:
# 模型的定义
class MLP(nn.Module):
  # 声明带有模型参数的层，这里声明了两个全连接层
  def __init__(self, **kwargs):
    # 调用MLP父类Block的构造函数来进行必要的初始化。这样在构造实例时还可以指定其他函数
    super(MLP, self).__init__(**kwargs)
    self.hidden = nn.Conv2d(1,1,3)
    self.act = nn.ReLU()
    self.output = nn.Linear(10,1)
    
   # 定义模型的前向计算，即如何根据输入x计算返回所需要的模型输出
  def forward(self, x):
    o = self.act(self.hidden(x))
    return self.output(o)

mlp = MLP()
print(mlp.hidden.weight.data)
print("-------初始化-------")

mlp.apply(initialize_weights)
# 或者initialize_weights(mlp)
print(mlp.hidden.weight.data)

tensor([[[[-0.2791, -0.1291, -0.1524],
          [-0.2811,  0.3133,  0.1566],
          [ 0.1856, -0.3264,  0.1985]]]])
-------初始化-------
tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]]])


## 注意： 我们在初始化时，最好不要将模型的参数初始化为0，因为这样会导致梯度消失，从而影响模型的训练效果。因此，我们在初始化时，可以使用其他初始化方法或者将模型初始化为一个很小的值，如0.01，0.1等。