## Part 1 - 模型构造，参数访问

In [1]:
import torch
from torch import nn

class MLP(nn.Module):
    # 声明带有模型参数的层，这里声明了两个全连接层
    def __init__(self, **kwargs):
        # 调用MLP父类Module的构造函数来进行必要的初始化。这样在构造实例时还可以指定其他函数
        # 参数，如“模型参数的访问、初始化和共享”一节将介绍的模型参数params
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Linear(784, 256) # 隐藏层
        self.act = nn.ReLU()
        self.output = nn.Linear(256, 10)  # 输出层


    # 定义模型的前向计算，即如何根据输入x计算返回所需要的模型输出
    def forward(self, x):
        a = self.act(self.hidden(x))
        return self.output(a)

In [3]:
X = torch.rand(2, 784)
net = MLP()
print(net)
net(X)

MLP(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[-0.1479,  0.5430, -0.0126, -0.0852,  0.0899, -0.0119,  0.1434, -0.1638,
          0.0107, -0.0942],
        [-0.1959,  0.4421,  0.1383, -0.2489,  0.0574,  0.0772, -0.0179, -0.1129,
          0.1440,  0.0033]], grad_fn=<AddmmBackward>)

In [10]:
from collections import OrderedDict

net = nn.Sequential(
        nn.Linear(256, 128),
        nn.ReLU6(),
        nn.Linear(128, 16)
        )

print(net)

net = nn.Sequential(OrderedDict([
        ('linear1', nn.Linear(256,128)),
        ('relu1', nn.ReLU6()),
        ('linear2', nn.Linear(128,16))
        ])
        )

print(net)

Sequential(
  (0): Linear(in_features=256, out_features=128, bias=True)
  (1): ReLU6()
  (2): Linear(in_features=128, out_features=16, bias=True)
)
Sequential(
  (linear1): Linear(in_features=256, out_features=128, bias=True)
  (relu1): ReLU6()
  (linear2): Linear(in_features=128, out_features=16, bias=True)
)


In [14]:
net = nn.ModuleList([nn.Linear(784, 256), nn.ReLU()])
net.append(nn.Linear(256, 10)) # # 类似List的append操作
print(net[-1])  # 类似List的索引访问
print(net)
#net(torch.zeros(1, 784)) # 会报NotImplementedError


Linear(in_features=256, out_features=10, bias=True)
ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


In [13]:
net = nn.ModuleDict({
    'linear': nn.Linear(784, 256),
    'act': nn.ReLU(),
})
net['output'] = nn.Linear(256, 10) # 添加
print(net['linear']) # 访问
print(net.output)
print(net)
# net(torch.zeros(1, 784)) # 会报NotImplementedError


Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
  (act): ReLU()
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)


## Part 2 - 模型参数

In [32]:
import torch
from torch import nn
from torch.nn import init

net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1))  # pytorch已进行默认初始化

print(net)
X = torch.rand(2, 4)
Y = net(X).sum()


print(type(net.named_parameters()))
for name, param in net.named_parameters():
    print(name, param.size(), type(param))

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)
<class 'generator'>
0.weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>
0.bias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>
2.weight torch.Size([1, 3]) <class 'torch.nn.parameter.Parameter'>
2.bias torch.Size([1]) <class 'torch.nn.parameter.Parameter'>


In [33]:
for name, param in net[1].named_parameters():
    print(name, param.size(), type(param))

#层1是激活层，没有参数，什么也不会显示出来

In [35]:
weight_0 = list(net[0].parameters())[0]
print(weight_0)
print(weight_0.grad)
Y.backward(retain_graph=True)
print(weight_0.data)
print(weight_0.grad)

Parameter containing:
tensor([[-0.3766,  0.1715, -0.0939, -0.3749],
        [ 0.1494,  0.3100,  0.3377,  0.1284],
        [-0.2186, -0.2411,  0.3476, -0.2049]], requires_grad=True)
tensor([[0.0000, 0.0000, 0.0000, 0.0000],
        [0.5447, 0.3614, 0.3962, 0.0248],
        [0.0000, 0.0000, 0.0000, 0.0000]])
tensor([[-0.3766,  0.1715, -0.0939, -0.3749],
        [ 0.1494,  0.3100,  0.3377,  0.1284],
        [-0.2186, -0.2411,  0.3476, -0.2049]])
tensor([[0.0000, 0.0000, 0.0000, 0.0000],
        [1.0894, 0.7227, 0.7925, 0.0497],
        [0.0000, 0.0000, 0.0000, 0.0000]])


## Part 3 - 模型参数共享

In [36]:
linear = nn.Linear(1, 1, bias=False)
net = nn.Sequential(linear, linear) 
print(net)
for name, param in net.named_parameters():
    init.constant_(param, val=3)
    print(name, param.data)

print(id(net[0]) == id(net[1]))
print(id(net[0].weight) == id(net[1].weight))

Sequential(
  (0): Linear(in_features=1, out_features=1, bias=False)
  (1): Linear(in_features=1, out_features=1, bias=False)
)
0.weight tensor([[3.]])
True
True


In [37]:
x = torch.ones(1, 1)
y = net(x).sum()
print(y)
y.backward()
print(net[0].weight.grad) # 单次梯度是3，两次所以就是6

tensor(9., grad_fn=<SumBackward0>)
tensor([[6.]])
