In [2]:
import mxnet as mx
from mxnet import nd
from mxnet.gluon import nn

net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(256, activation='relu'))
    net.add(nn.Dense(10))

print(net)

Sequential(
  (0): Dense(None -> 256, Activation(relu))
  (1): Dense(None -> 10, linear)
)


In [3]:

# Block是一个一般化的部件，可以无限的嵌套block。其主要提供如下内容：
#　存储参数，　描述forward的执行以及自动求导
# Sequential实际就是一个block的容器，通过add函数来添加block，自动生成forward函数
# 将add添加的block按序执行
class MLP(nn.Block):

    def __init__(self, *args, **kwargs):
        super(MLP, self).__init__(*args, **kwargs)
        with self.name_scope():
            self.dense_first = nn.Dense(256)
            self.dense_second = nn.Dense(10)
    
    def forward(self, x):
        return self.dense_second(nd.relu(self.dense_first(x)))


net2 = MLP()
print(net2)

MLP(
  (dense_first): Dense(None -> 256, linear)
  (dense_second): Dense(None -> 10, linear)
)


In [4]:
net2.initialize()
x = nd.random.uniform(shape=(4, 20))
y = net2(x)
print(y)


[[ 0.03126615  0.04562765  0.00039855 -0.08772384 -0.05355631  0.02904575
   0.08102557 -0.01433946 -0.04224152  0.06047883]
 [ 0.02871901  0.03652266  0.00630051 -0.0565097  -0.07189323  0.08615956
   0.05951558 -0.06045965 -0.0299026   0.05651002]
 [ 0.02147348  0.04818897  0.05321141 -0.12616858 -0.06850231  0.09096344
   0.04064302 -0.05064791 -0.0220024   0.04859561]
 [ 0.03780477  0.07512389  0.03290457 -0.1164111  -0.03254965  0.0586529
   0.02542157 -0.01697343 -0.00049651  0.05892841]]
<NDArray 4x10 @cpu(0)>


In [5]:
print(net2.dense_first.name)

net3 = MLP(prefix='') # 定义网络层的名称前缀
print(net3.dense_first.name)

mlp0_dense0
dense0


In [8]:
# Sequential的简单实现

class Sequential(nn.Block):

    def __init__(self, *args, **kwargs):
        super(Sequential, self).__init__(*args, **kwargs)

    def add(self, block, block_name):
        self._children[block_name] = block

    def forward(self, x):
        for block in self._children.values():
            x = block(x)
        return x

myseq = Sequential()
with myseq.name_scope():
    myseq.add(nn.Dense(256, activation='relu'), block_name='first')
    myseq.add(nn.Dense(10), block_name='second')

myseq.initialize()
y = myseq(x)
print(y)


[[-0.00411106  0.00781807  0.03506001 -0.01106467  0.09599376 -0.04190595
   0.01127483 -0.01493319  0.0716491   0.00700369]
 [ 0.01214233  0.02546025  0.03533493 -0.02328115  0.10768865 -0.01672857
  -0.02653831 -0.03458688  0.0640486  -0.00030123]
 [-0.00452384  0.00228632  0.02761049 -0.05750642  0.10328892 -0.01792853
  -0.04610603 -0.04085523  0.05824736  0.0003379 ]
 [-0.00518478 -0.02185423  0.02528594 -0.00436605  0.05142228 -0.02703231
   0.01939205 -0.03802725  0.0283272  -0.01720729]]
<NDArray 4x10 @cpu(0)>


In [9]:
class FancyMLP(nn.Block):

    def __init__(self, *args, **kwargs):
        super(FancyMLP, self).__init__(*args, **kwargs)
        with self.name_scope():
            self.dense = nn.Dense(256)
            self.weight = nd.random_uniform(shape=(256, 20))

    def forward(self, x):
        x = nd.relu(self.dense(x))
        x = nd.relu(nd.dot(x, self.weight) + 1)
        x = nd.relu(self.dense(x))
        return x

fancy = FancyMLP()
print(fancy)

FancyMLP(
  (dense): Dense(None -> 256, linear)
)


In [10]:
fancy.initialize()
y = fancy(x)
print(y.shape)

(4, 256)


In [11]:
# block和sequential可以混合使用
class RecMLP(nn.Block):

    def __init__(self, *args, **kwargs):
        super(RecMLP, self).__init__(*args, **kwargs)
        self.net = nn.Sequential()
        with self.name_scope():
            self.net.add(nn.Dense(256, activation='relu'))
            self.net.add(nn.Dense(128, activation='relu'))
            self.dense = nn.Dense(64)

    def forward(self, x):
        return nd.relu(self.dense(self.net(x)))

rec = nn.Sequential()
rec.add(RecMLP())
rec.add(nn.Dense(10))

print(rec)

Sequential(
  (0): RecMLP(
    (net): Sequential(
      (0): Dense(None -> 256, Activation(relu))
      (1): Dense(None -> 128, Activation(relu))
    )
    (dense): Dense(None -> 64, linear)
  )
  (1): Dense(None -> 10, linear)
)
