In [3]:
import torch
from torch import nn
from torch.nn import functional as F

net=nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))

X=torch.rand(2,20)
net(X)

tensor([[ 0.1223,  0.2380, -0.0549, -0.0380,  0.0805, -0.0726, -0.0677, -0.2498,
         -0.1170, -0.1891],
        [ 0.2506,  0.3541, -0.0951,  0.0890,  0.0074, -0.1248, -0.1108, -0.3038,
         -0.1492, -0.1891]], grad_fn=<AddmmBackward0>)

In [4]:
class MLP(nn.Module):
    def __init__(self):     # 用模型参数声明层。这里,我们声明两个全连接的层
        super().__init__()  # 调用`MLP`的父类Module的构造函数来执行必要的初始化。这样,在类实例化时也可以指定其他函数参数,例如模型参数params
        self.hidden=nn.Linear(20,256)   # 隐藏层
        self.out=nn.Linear(256,10)      # 输出层
    def forward(self,X):    # 定义模型的前向传播,即如何根据输入X返回所需的模型输出
        return self.out(F.relu(self.hidden(X))) # 这里我们使用ReLU的函数版本,其在nn.functional模块中定义

net=MLP()
net(X)

tensor([[-0.1395, -0.0453,  0.0135,  0.0902,  0.0249,  0.1164, -0.0471, -0.1008,
          0.0886,  0.1684],
        [-0.0094,  0.1001,  0.0056,  0.1844, -0.0514,  0.0170,  0.0273, -0.0866,
          0.0196,  0.2387]], grad_fn=<AddmmBackward0>)

In [17]:
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for idx,module in enumerate(args):
            self._modules[str(idx)]=module      # 这里,`module`是`Module`子类的一个实例,我们把它保存在'Module'类的成员变量`_modules`里,其类型是OrderedDict
    def forward(self,X):
        for block in self._modules.values():    # OrderedDict保证了按照成员添加时的顺序遍历它们
            X=block(X)
        return X

net=MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net

MySequential(
  (0): Linear(in_features=20, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)

In [18]:
net(X)

tensor([[-0.1443,  0.0274,  0.0231,  0.2383, -0.1433,  0.2706,  0.1281, -0.2102,
         -0.2895,  0.0148],
        [-0.0507,  0.0243,  0.1106,  0.2597, -0.0237,  0.1876,  0.0165, -0.1505,
         -0.1666, -0.2182]], grad_fn=<AddmmBackward0>)

In [7]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight=torch.rand((20,20),requires_grad=False)    # 不计算梯度的随机权重参数,因此其在训练期间保持不变
        self.linear=nn.Linear(20,20)
    def forward(self,X):
        X=self.linear(X)
        X=F.relu(torch.mm(X,self.rand_weight)+1)    # 使用创建的常数参数以及`relu`和`mm`函数
        X=self.linear(X)                            # 复用全连接层。这相当于两个全连接层共享参数
        while X.abs().sum()>1:
            X/=2
        return X.sum()

net=FixedHiddenMLP()
net(X)

tensor(0.0868, grad_fn=<SumBackward0>)

In [15]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net=nn.Sequential(nn.Linear(20,64),nn.ReLU(),
                               nn.Linear(64,32),nn.ReLU())
        self.linear=nn.Linear(32,16)
    def forward(self,X):
        return self.linear(self.net(X))

chimera=nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
chimera

Sequential(
  (0): NestMLP(
    (net): Sequential(
      (0): Linear(in_features=20, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): ReLU()
    )
    (linear): Linear(in_features=32, out_features=16, bias=True)
  )
  (1): Linear(in_features=16, out_features=20, bias=True)
  (2): FixedHiddenMLP(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
)

In [16]:
chimera(X)

tensor(-0.0368, grad_fn=<SumBackward0>)

Exercises

In [13]:
class Parallel(nn.Module):
    def __init__(self,net1,net2):
        super().__init__()
        self.net1=net1
        self.net2=net2
    def forward(self,X):
        return torch.cat((self.net1(X),self.net2(X)),dim=1)

parallel=Parallel(MLP(),MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10)))
parallel

Parallel(
  (net1): MLP(
    (hidden): Linear(in_features=20, out_features=256, bias=True)
    (out): Linear(in_features=256, out_features=10, bias=True)
  )
  (net2): MySequential(
    (0): Linear(in_features=20, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=10, bias=True)
  )
)

In [14]:
parallel(X)

tensor([[-0.1301, -0.0492, -0.0552, -0.1116, -0.1252,  0.1407, -0.0330,  0.0326,
          0.1234,  0.0012, -0.0685,  0.0468,  0.2071, -0.1888, -0.1068, -0.2285,
         -0.1078,  0.0791,  0.3881,  0.3659],
        [-0.0873, -0.0937, -0.2163, -0.0526, -0.1333,  0.0070, -0.0241,  0.2368,
          0.1769, -0.1337, -0.1394,  0.1266,  0.1361, -0.2932, -0.2012, -0.2391,
         -0.1265,  0.0349,  0.2590,  0.3636]], grad_fn=<CatBackward0>)