# ** 模型参数的初始化和共享 **

In [1]:
import torch
from torch import nn
from torch.nn import init

In [2]:
net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1))

In [3]:
print(net)
X = torch.randn(4, 4)
net(X)

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)


tensor([[0.6187],
        [0.7060],
        [0.6230],
        [0.5894]], grad_fn=<AddmmBackward>)

## **访问模型参数**
 - paramsmeters():所有参数list
 - named_paramsmeters():所有参数dict

In [4]:
print(type(net.named_parameters()))
for name, param in net.named_parameters():
    print(name, param.shape)

<class 'generator'>
0.weight torch.Size([3, 4])
0.bias torch.Size([3])
2.weight torch.Size([1, 3])
2.bias torch.Size([1])


name默认是层数.weight     
也可以直接使用net\[index\]访问具体层的参数

In [6]:
for name, param in net[0].named_parameters():
    print(name, param.shape, type(param))

weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>
bias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>


类型是Parameter的参数在自定义模型里面会被自动添加到named_parameters

## **参数初始化**

In [7]:
for name, param in net.named_parameters():
    if 'weight' in name:
        # 正态初始化
        nn.init.normal_(param, mean=0, std=0.01)
    if 'bias' in name:
        # 常数
        nn.init.constant_(param, val=0.0)

## **自定义初始化方法**

In [8]:
def normal_(tensor, mean=0, std=1):
    with torch.no_grad():
        return tensor.normal_(mean, std)

初始化的时候应当不记录梯度

## **参数共享**
- Module使用相同的参数，见上一节
- Sequential中使用同一个实例

In [9]:
linear1 = nn.Linear(20, 20)
net = nn.Sequential(linear1, linear1)