#### 参数管理
已经定义好了类，怎么访问参数
首先关注具有单隐藏层的多层感知机

In [1]:
import torch
from torch import nn

# 一个线性层，一个激活函数，最后一个线性的输出层
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
# 定义一个矩阵，批次为2，特征数为4
X = torch.rand(size=(2,4))
net(X)

tensor([[ 0.0002],
        [-0.0015]], grad_fn=<AddmmBackward>)

#### 参数访问
将每一层的权重拿出来

In [6]:
print(net[2].state_dict())  # 这里拿到的是最后一层的权重
# 权重可以从自动机角度解释，其权重就是它的一个状态，全连接层两个参数：权重、偏置

OrderedDict([('weight', tensor([[-0.0984,  0.3159, -0.2597,  0.3319, -0.0016, -0.3519,  0.1586, -0.0924]])), ('bias', tensor([0.1942]))])


#### 目标参数

In [7]:
print(type(net[2].bias))  # Parameter 可以优化参数
print(net[2].bias)
print(net[2].bias.data)   # .grad访问梯度

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.1942], requires_grad=True)
tensor([0.1942])


In [8]:
net[2].weight.grad == None

True

#### 一次性访问所有参数

In [9]:
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape) for name,param in net.named_parameters()])
# nn有三层，中间的relu层没有权重和偏置

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [10]:
net.state_dict()['2.bias'].data   # 有了名字之后，可以通过名字获取参数

tensor([0.1942])

#### 从嵌套块收集参数

In [11]:
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),
                        nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):  # 插入4个block1
        net.add_module(f'block{i}',block1()) # 可以传一个字符串的名字 
    return net

rgnet = nn.Sequential(block2(),nn.Linear(4,1))
rgnet(X)  # 实例化

tensor([[0.1245],
        [0.1245]], grad_fn=<AddmmBackward>)

In [12]:
print(rgnet)  # 看看网络怎么组织

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


#### 内置参数初始化

In [13]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)  # 函数后加_表示替换函数（属性）
        nn.init.zeros_(m.bias)                     # 给bias赋值0

net.apply(init_normal)             # 对于net中每一层 for loop赋值      
net[0].weight.data[0],net[0].bias.data[0]

(tensor([ 0.0139,  0.0160,  0.0018, -0.0105]), tensor(0.))

In [14]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,1)  # 赋值常数
        nn.init.zeros_(m.bias)

net.apply(init_constant)
net[0].weight.data[0],net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

#### 对某些块应用不同的初始化方法

In [15]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([-0.6168,  0.5587,  0.1441,  0.5664])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


#### 自定义初始化

In [16]:
def my_init(m):
    if type(m) == nn.Linear:
        print(
            "Init",
            *[(name,param.shape) for name,param in m.named_parameters()][0])
        nn.init.uniform_(m.weight,-10,10)    # 均匀初始化
        m.weight.data *= m.weight.data.abs() >= 5

net.apply(my_init)
net[0].weight[:2]

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


tensor([[-8.3215,  5.7624,  0.0000, -0.0000],
        [ 5.2742,  5.8131,  0.0000, -0.0000]], grad_fn=<SliceBackward>)

net[0].weight.data[:] += 1
net[0].weight.data[0,0] =42
net[0].weight.data[0]

#### 参数绑定
在一些层之间共享参数

In [19]:
shared = nn.Linear(8,8)   # 构造一个层出来，框架会把参数（权重和偏置）申请出来
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,
                   nn.ReLU(),nn.Linear(8,1))
net(X)
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0,0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])
# 指向同一个实例

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])
