自定义网络

In [2]:
import torch 
from torch import nn
net  = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X = torch.rand(size = (2,4))
net(X)

tensor([[-0.2791],
        [-0.2183]], grad_fn=<AddmmBackward>)

In [3]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.2217,  0.0232, -0.2136, -0.1422, -0.1437, -0.1512,  0.1880, -0.2557]])), ('bias', tensor([-0.2125]))])


In [4]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].weight.grad)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.2125], requires_grad=True)
None


In [5]:
print([(name,param.shape) for name,param in net[0].named_parameters()])
print([(name,param.shape) for name,param in net.named_parameters()])

[('weight', torch.Size([8, 4])), ('bias', torch.Size([8]))]
[('0.weight', torch.Size([8, 4])), ('0.bias', torch.Size([8])), ('2.weight', torch.Size([1, 8])), ('2.bias', torch.Size([1]))]


In [6]:
net.state_dict()['0.bias'].data

tensor([ 0.3848, -0.0824, -0.4397,  0.0056, -0.2806, -0.0685, -0.2859,  0.3123])

嵌套使用各种层

In [7]:
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),
                         nn.Linear(8,4),nn.ReLU())
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}',block1())
    return net
rgnet = nn.Sequential(block2(),nn.Linear(4,1))
rgnet(X)

tensor([[-0.2273],
        [-0.2273]], grad_fn=<AddmmBackward>)

In [8]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


内置初始化 8：00

In [9]:
def init_normal(m):
    if type(m)  == nn.Linear: #如果是nn.Linear类型的直接进行一个初始化
        nn.init.normal_(m.weight,mean= 0,std = 0.01)#下划线划在后面，表示是一个替换函数，而不是返回一个值（inplace）
        nn.init.zeros_(m.bias)
        
net.apply(init_normal)#对net里面的所有module，遍历一遍，全都apply
net[0].weight.data[0],net[0].bias.data[0]

(tensor([ 0.0150, -0.0025,  0.0050, -0.0078]), tensor(0.))

In [10]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,1) #我们不能这么做，全部初始化一样的话，梯度等都会一样，这些神经元等效成一个权重为1*N的单个神经元了
        nn.init.zeros_(m.bias)
        
net.apply(init_constant)
net[0].weight.data[0],net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

In [11]:
def xavier(m):#为了信息更好的流动，每一层输出的方差应当尽量相等
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
        
net[0].apply(xavier)
print(net[0].weight.data)

tensor([[ 0.4479, -0.1196,  0.3846, -0.4521],
        [ 0.5159, -0.6062,  0.1892,  0.5560],
        [ 0.0224, -0.1719, -0.6933,  0.6864],
        [-0.1170,  0.2874, -0.4454,  0.5751],
        [-0.0305, -0.5852,  0.6810, -0.0052],
        [-0.0364, -0.0635,  0.5628,  0.4694],
        [ 0.3634, -0.0776,  0.0238, -0.1764],
        [ 0.4087, -0.3320, -0.7060, -0.6286]])


自定义初始化

In [12]:
def my_init(m):
    if type(m)== nn.Linear:
        print("INIT",
              [(name,param.shape) for name,param in m.named_parameters()][0])
        nn.init.uniform_(m.weight,-10.10)
        m.weight.data *= m.weight.data.abs() >= 5
        
net.apply(my_init)
print(net[0].weight[:2])

INIT ('weight', torch.Size([8, 4]))
INIT ('weight', torch.Size([1, 8]))
tensor([[-0.0000, -8.1895, -5.9046, -0.0000],
        [-0.0000, -0.0000, -8.9830, -0.0000]], grad_fn=<SliceBackward>)


In [13]:
net[0].weight.data[0,0]=42
net[0].weight.data[0,0]

tensor(42.)

参数绑定

In [14]:
#实际上就是用一个参数shared来作为两个层的参数
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))
net(X)
print(net[2].weight.data[0]== net[4].weight.data[0])



tensor([True, True, True, True, True, True, True, True])
