In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.utils.data as data
import torchvision as tv
import torchvision.transforms as tf
import torchvision.models as mod
from PIL import Image
%matplotlib inline

# torch.nn.init

### torch.nn.init.calculate_gain(nonlinearity, param=None) : 为给定的非线性函数，返回推荐的增加值. 非线性函数可以是：
- Linear/Identity	1
- Conv{1,2,3}D      1
- Sigmoid           1
- Tanh             5/3
- ReLU             sqrt(2)


第一列是非线性函数名，第二列是增加的值.

In [3]:
gain=nn.init.calculate_gain('relu')##注意参数小写
gain

1.4142135623730951

### torch.nn.init.uniform\_(tensor, a=0, b=1)：以均匀分布初始化输入张量.

In [4]:
X=torch.empty(2,3)
nn.init.uniform_(X)
X

tensor([[0.5985, 0.2696, 0.0361],
        [0.7637, 0.7353, 0.3277]])

### torch.nn.init.normal\_(tensor, mean=0, std=1)：以正太分布初始化输入张量.

In [5]:
nn.init.normal_(X)

tensor([[-1.4623,  0.1677,  0.4717],
        [ 0.5376, -1.2502, -2.0507]])

### torch.nn.init.constant\_(tensor, val): 以常量初始化输入张量.

In [6]:
nn.init.constant_(X,2)
X

tensor([[2., 2., 2.],
        [2., 2., 2.]])

### torch.nn.init.eye\_(tensor):以恒等矩阵初始化输入的二维张量. 在线性层保持输入的恒等性，在该层使得尽量多的输入保持恒等.

In [7]:
#即使非方阵，也可调用
nn.init.eye_(X)
X

tensor([[1., 0., 0.],
        [0., 1., 0.]])

In [8]:
X=X.reshape(3,2)
nn.init.eye_(X)
X

tensor([[1., 0.],
        [0., 1.],
        [0., 0.]])

### torch.nn.init.dirac_(tensor):以 Dirac-delta 函数对3、4、5维张量进行初始化. 保持在卷积层的恒等性，在该层使得尽量多的输入保持恒等.

In [9]:
X=torch.empty(2,3,4)
nn.init.dirac_(X)
X

tensor([[[0., 0., 1., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 1., 0.],
         [0., 0., 0., 0.]]])

### torch.nn.init.xavier\_uniform\_(tensor, gain=1): 根据 “Understanding the difficulty of training deep feedforward neural networks”中描述的方法，使用均匀分布U(-a,a)为输入张量进行初始化.gain值是上面介绍的calculate_gain函数，是对a的缩放系数.

In [11]:
X=torch.empty(2,3)
nn.init.xavier_uniform_(X,gain=nn.init.calculate_gain('relu'))
X

tensor([[-0.5382,  0.1625,  0.3651],
        [-0.8799,  1.3694, -0.1203]])

### torch.nn.init.xavier\_normal\_(tensor, gain=1)：根据如上描述的方法，使用正太分布N(0,std)为输入变量进行初始化. gain值如上述，是std的缩放系数.

In [12]:
nn.init.xavier_normal_(X,gain=nn.init.calculate_gain('sigmoid'))
X

tensor([[-0.8178, -0.1211,  0.9286],
        [ 0.1067,  0.9894, -1.3154]])

### torch.nn.init.kaiming\_uniform\_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'): 使用均匀分布U(-bound,bound)为输入张量进行初始化，系数影响bound值.
见文档： https://pytorch.org/docs/stable/nn.html#torch.nn.init.kaiming_uniform_

In [13]:
nn.init.kaiming_uniform_(X)
X

tensor([[-0.2866,  0.6387,  0.1421],
        [-0.7327,  0.1454,  0.3882]])

### torch.nn.init.kaiming\_normal\_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu')：使用正太分布为输入张量做初始化. 

见文档：https://pytorch.org/docs/stable/nn.html#torch.nn.init.kaiming_normal_

In [14]:
nn.init.kaiming_normal_(X)
X

tensor([[-0.5731, -0.0047, -0.1785],
        [ 1.5102,  0.8333,  1.3601]])

### torch.nn.init.orthogonal\_(tensor, gain=1): 使用半正交矩阵为输入张量进行初始化.

见:https://pytorch.org/docs/stable/nn.html#torch.nn.init.orthogonal_

In [15]:
nn.init.orthogonal_(X)
X

tensor([[ 0.5613, -0.0461,  0.8263],
        [-0.0056,  0.9982,  0.0594]])

### torch.nn.init.sparse\_(tensor, sparsity, std=0.01): 把输入的二维张量初始化为稀疏矩阵，非0元素将从N(0,std)中采样.

见：https://pytorch.org/docs/stable/nn.html#torch.nn.init.sparse_

In [16]:
nn.init.sparse_(X,sparsity=0.5)
X

tensor([[ 0.0000,  0.0178, -0.0031],
        [-0.0053,  0.0000,  0.0000]])