In [1]:
import torch
from torch import nn
from torch.nn import functional as F

简单的多层感知机

In [2]:
net = nn.Sequential(nn.Linear(20, 256),
                   nn.ReLU(),
                   nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X)

tensor([[-0.0859,  0.0391, -0.1643,  0.0753,  0.1324, -0.1308, -0.0905,  0.0554,
         -0.0992, -0.0277],
        [-0.0037, -0.0202, -0.2049,  0.1059,  0.1655, -0.1099, -0.0697,  0.0958,
         -0.0721,  0.0128]], grad_fn=<AddmmBackward>)

In [3]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
        
    def forward(self, X):
        # out = self.hidden(X)
        # out = F.relu(out)
        # out = self.out(out)
        # return out
        
        return self.out(F.relu(self.hidden(X)))

In [4]:
net = MLP()
net(X)

tensor([[-2.1665e-01,  2.0091e-01, -2.0975e-02,  2.8544e-01, -2.5341e-02,
         -2.4074e-01, -5.1555e-02,  7.7062e-02, -4.9893e-03, -1.4372e-01],
        [-1.9121e-01,  1.0735e-01, -6.4942e-02,  3.0791e-01,  1.1672e-01,
         -8.9714e-02, -1.4971e-01,  1.2111e-02, -9.7262e-02, -2.2612e-04]],
       grad_fn=<AddmmBackward>)

## 参数管理

In [5]:
net = nn.Sequential(nn.Linear(2, 4),# net[0]
                   nn.ReLU(),# net[1]
                   nn.Linear(4, 1))# net[2]

X = torch.rand(2, 2)
net(X)

tensor([[ 0.0019],
        [-0.0061]], grad_fn=<AddmmBackward>)

In [6]:
net[2].state_dict()# 查看参数

OrderedDict([('weight', tensor([[ 0.2348,  0.3318, -0.1737,  0.3665]])),
             ('bias', tensor([-0.1129]))])

In [7]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.1129], requires_grad=True)
tensor([-0.1129])


一次性查看所有参数

In [8]:
print(*[(name, param.shape) for name, param in net.named_parameters()])# relu层没有参数的

('0.weight', torch.Size([4, 2])) ('0.bias', torch.Size([4])) ('2.weight', torch.Size([1, 4])) ('2.bias', torch.Size([1]))


嵌套block

In [9]:
def block1():
    return nn.Sequential(nn.Linear(2, 4), nn.ReLU(), nn.Linear(4, 2))

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
        
    return net

net = nn.Sequential(block2(), nn.Linear(2, 1))
net

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=2, out_features=4, bias=True)
      (1): ReLU()
      (2): Linear(in_features=4, out_features=2, bias=True)
    )
    (block 1): Sequential(
      (0): Linear(in_features=2, out_features=4, bias=True)
      (1): ReLU()
      (2): Linear(in_features=4, out_features=2, bias=True)
    )
    (block 2): Sequential(
      (0): Linear(in_features=2, out_features=4, bias=True)
      (1): ReLU()
      (2): Linear(in_features=4, out_features=2, bias=True)
    )
    (block 3): Sequential(
      (0): Linear(in_features=2, out_features=4, bias=True)
      (1): ReLU()
      (2): Linear(in_features=4, out_features=2, bias=True)
    )
  )
  (1): Linear(in_features=2, out_features=1, bias=True)
)

参数共享

In [10]:
shared = nn.Linear(8, 8)
net = nn.Sequential(
    nn.Linear(4, 8), nn.ReLU(),
    shared, nn.ReLU(),
    shared, nn.ReLU(), 
    nn.Linear(8, 1))

net(torch.rand(2, 4))
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0][0] = 100 # 修改值
print(net[2].weight.data[0] == net[4].weight.data[0])# 可以看到参数还是一样的

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])
