# 层和块

首先，我们回顾一下多层感知机

In [None]:
import sys
sys.path.append('..')

In [1]:
from d2l import mindspore as d2l
import mindspore
from mindspore import nn, ops, Tensor
import numpy as np

net = nn.SequentialCell([nn.Dense(20, 256), nn.ReLU(), nn.Dense(256, 10)])

X = d2l.rand((2, 20))
net(X)

Tensor(shape=[2, 10], dtype=Float32, value=
[[-6.79369224e-03,  7.97413464e-04,  3.23574618e-03 ...  7.16019771e-04, -1.44797785e-04,  1.57120079e-03],
 [-6.07040105e-03,  5.09693520e-03,  4.14576288e-03 ...  1.12101820e-03, -4.35058493e-04,  4.35355678e-03]])

`nn.SequentialCell`定义了一种特殊的`Cell`

自定义块

In [2]:
class MLP(nn.Cell):
    # 用模型参数声明层。这里，我们声明两个全连接的层
    def __init__(self):
        # 调用MLP的父类Model的构造函数来执行必要的初始化。
        # 这样，在类实例化时也可以指定其他函数参数，例如模型参数params（稍后将介绍）
        super().__init__()
        self.hidden = nn.Dense(20, 256)  # 隐藏层
        self.out = nn.Dense(256, 10)  # 输出层

    # 定义模型的前向传播，即如何根据输入X返回所需的模型输出
    def construct(self, X):
        return self.out(d2l.relu(self.hidden(X)))

实例化多层感知机的层，然后在每次调用前向传播函数时调用这些层

In [3]:
net = MLP()
net(X)

Tensor(shape=[2, 10], dtype=Float32, value=
[[ 1.00408215e-06, -7.51196174e-04, -1.47786422e-03 ...  9.07916750e-04,  3.31065315e-03,  3.96206928e-03],
 [-1.93189026e-03, -1.93725619e-03, -2.41200835e-03 ...  5.95276617e-03,  4.78746044e-03,  5.16231870e-03]])

顺序块

In [4]:
class MySequential(nn.Cell):
    def __init__(self, *args):
        super().__init__()
        for idx, cell in enumerate(args):
            cell.update_parameters_name(str(idx) + ".")
            self._cells[str(idx)] = cell

    def construct(self, X):
        for block in self._cells.values():
            X = block(X)
        return X

net = MySequential(nn.Dense(20, 256), nn.ReLU(), nn.Dense(256, 10))
net(X)

Tensor(shape=[2, 10], dtype=Float32, value=
[[ 1.56846642e-03, -4.25875094e-03,  5.70291071e-04 ... -1.61940954e-03,  2.20699748e-03, -5.04909083e-03],
 [-6.69776287e-04, -4.14503831e-03,  6.94219721e-04 ... -3.46967345e-03,  5.57024789e-04, -5.33907907e-03]])

在前向传播函数中执行代码

In [5]:
class FixedHiddenMLP(nn.Cell):
    def __init__(self):
        super().__init__()
        # 不计算梯度的随机权重参数。因此其在训练期间保持不变
        self.rand_weight = d2l.rand((20,20))
        self.linear = nn.Dense(20, 20)

    def construct(self, X):
        X = self.linear(X)
        # 使用创建的常量参数以及ReLU和matmul函数
        X = d2l.relu(d2l.matmul(X, self.rand_weight) + 1)
        # 复用全连接层。这相当于两个全连接层共享参数。
        X = self.linear(X)
        # 控制流
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

Tensor(shape=[], dtype=Float32, value= 0.542837)

In [None]:
net = FixedHiddenMLP()
net(X)

混合搭配各种组合块的方法

In [6]:
class NestMLP(nn.Cell):
    def __init__(self):
        super().__init__()
        self.net = nn.SequentialCell([nn.Dense(20, 64), nn.ReLU(),
                                      nn.Dense(64, 32), nn.ReLU()])
        self.linear = nn.Dense(32, 16)

    def construct(self, X):
        return self.linear(self.net(X))

chimera = nn.SequentialCell([NestMLP(), nn.Dense(16, 20), FixedHiddenMLP()])
chimera(X)

Tensor(shape=[], dtype=Float32, value= -0.0956512)