In [28]:
from mxnet import np, npx
from mxnet.gluon import nn
npx.set_np()

net = nn.Sequential()
net.add(nn.Dense(256, activation="relu"))
net.add(nn.Dense(10))
net.initialize()

X = np.random.uniform(size=(2, 20))
net(X)

array([[ 0.05154381,  0.00966544,  0.00602048,  0.0352538 , -0.05848393,
         0.01683447, -0.02691596, -0.002127  ,  0.00729257,  0.06459734],
       [ 0.03342589,  0.02208288,  0.07317045,  0.02163889, -0.07724746,
        -0.02824969, -0.04605664, -0.01612809,  0.02834059,  0.08598392]])

In [29]:
class MLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.hidden = nn.Dense(256, activation="relu")
        self.out = nn.Dense(10)
        
    def forward(self, X):
        return self.out(self.hidden(X))

In [30]:
net2 = MLP()
net2.initialize()
net2(X)

array([[-0.02600484, -0.01108652,  0.05857804,  0.00879788, -0.07648174,
         0.05120177,  0.02139943,  0.01986018, -0.05838133, -0.05440345],
       [-0.04650507, -0.04686351,  0.10305366, -0.01742985, -0.10396324,
         0.07541025, -0.02440178,  0.01584255, -0.09944818, -0.07798939]])

In [31]:
class MySequential(nn.Block):
    def add(self, block):
        self._children[block.name] = block
        
    def forward(self, X):
        for block in self._children.values():
            X = block(X)
        return X

In [32]:
net3 = MySequential()
net3.add(nn.Dense(256, activation="relu"))
net3.add(nn.Dense(10))
net3.initialize()
net3(X)

array([[ 0.03642648,  0.0133517 , -0.02723628, -0.02076728,  0.02490582,
         0.00161982, -0.05942534,  0.04214321, -0.00234531,  0.00981733],
       [ 0.03085874,  0.02843617, -0.01599898,  0.01526786,  0.01950636,
         0.02584129, -0.07666852,  0.01154022,  0.01807711,  0.05126766]])

In [33]:
class FixedHiddenMLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.rand_weight = self.params.get_constant("rand_weight", np.random.uniform(size=(20, 20)))
        self.dense = nn.Dense(20, activation="relu")
        
    def forward(self, X):
        X = self.dense(X)
        X = npx.relu(np.dot(X, self.rand_weight.data()) + 1)
        X = self.dense(X)
        
        while np.abs(X).sum() > 1:
            X /= 2
            
        return X.sum()

In [34]:
net4 = FixedHiddenMLP()
net4.initialize()
net4(X)

array(0.8542471)

In [35]:
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation="relu"),
                     nn.Dense(32, activation="relu"))
        self.dense = nn.Dense(16, activation="relu")
        
    def forward(self, X):
        return self.dense(self.net(X))
    
chimera = nn.Sequential()
chimera.add(NestMLP(), nn.Dense(20), FixedHiddenMLP())
chimera.initialize()
chimera(X)

array(0.55749935)

In [36]:
class ParallelMLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.net1 = nn.Sequential()
        self.net1.add(nn.Dense(64, activation="relu"),
                      nn.Dense(32, activation="relu"))
        
        self.net2 = nn.Sequential()
        self.net2.add(nn.Dense(128, activation="relu"),
                      nn.Dense(64, activation="relu"))
        
    def forward(self, X):
        return self.net1(X), self.net2(X)

In [37]:
pegasus = ParallelMLP()
pegasus.initialize()
o1, o2 = pegasus(X)
o1.shape, o2.shape

((2, 32), (2, 64))

In [38]:
import time

In [39]:
class FactoryMLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(self.gen_block_A())
        self.net.add(self.gen_block_A())
        self.net.add(self.gen_block_A())
        
    def gen_block_A(self):
        sequential = nn.Sequential()
        sequential.add(nn.Dense(64, activation="relu"),
                       nn.Dense(32, activation="relu"))
        
        return sequential
        
    def forward(self, X):
        return self.net(X)

factory = FactoryMLP()
factory.initialize()

start_time = time.time()
factory(X).shape
print("End time:", time.time() - start_time)

End time: 0.008774042129516602


In [40]:
X.shape

(2, 20)

In [41]:
factory.net[0][0].params, net[0].params

(dense50_ (
   Parameter dense50_weight (shape=(64, 20), dtype=float32)
   Parameter dense50_bias (shape=(64,), dtype=float32)
 ),
 dense34_ (
   Parameter dense34_weight (shape=(256, 20), dtype=float32)
   Parameter dense34_bias (shape=(256,), dtype=float32)
 ))

In [43]:
print(type(net[1].bias))
print(net[1].bias)
print(net[1].bias.data())

<class 'mxnet.gluon.parameter.Parameter'>
Parameter dense35_bias (shape=(10,), dtype=float32)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [44]:
net[1].weight.grad()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])