In [1]:
from mxnet import np, npx
from mxnet.gluon import nn

In [2]:
npx.set_np()

In [3]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
X = np.random.uniform(size=(2, 20))
net(X)

array([[ 0.06240272, -0.03268593,  0.02582653,  0.02254182, -0.03728798,
        -0.04253786,  0.00540613, -0.01364186, -0.09915452, -0.02272738],
       [ 0.02816677, -0.03341204,  0.03565666,  0.02506382, -0.04136416,
        -0.04941845,  0.01738528,  0.01081961, -0.09932579, -0.01176298]])

In [4]:
class MLP(nn.Block):
    # Declare a layer with model parameters. Here, we declare two
    # fully-connected layers
    def __init__(self, **kwargs):
        # Call the constructor of the `MLP` parent class `Block` to perform
        # the necessary initialization. In this way, other function arguments
        # can also be specified during class instantiation, such as the model
        # parameters, `params` (to be described later)
        super().__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu') # Hidden layer
        self.out = nn.Dense(10) # Output layer
        # Define the forward propagation of the model, that is, how to return the
        # required model output based on the input `X`
    def forward(self, X):
        return self.out(self.hidden(X))


In [5]:
net = MLP()
net.initialize()
net(X)

array([[-0.03989594, -0.1041471 ,  0.06799038,  0.05245074,  0.02526059,
        -0.00640342,  0.04182098, -0.01665319, -0.02067346, -0.07863817],
       [-0.03612847, -0.07210436,  0.09159479,  0.07890771,  0.02494172,
        -0.01028665,  0.01732428, -0.02843242,  0.03772651, -0.06671704]])

The Sequential Block

In [6]:
class MySequential(nn.Block):
    def add(self, block):
        # Here, `block` is an instance of a `Block` subclass, and we assume
        # that it has a unique name. We save it in the member variable
        # `_children` of the `Block` class, and its type is OrderedDict. When
        # the `MySequential` instance calls the `initialize` function, the
        # system automatically initializes all members of `_children`
        self._children[block.name] = block
    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._children.values():
            X = block(X)
        return X

In [7]:
net = MySequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(X)

array([[-0.07645682, -0.01130233,  0.04952145, -0.04651389, -0.04131573,
        -0.05884133, -0.0621381 ,  0.01311472, -0.01379425, -0.02514282],
       [-0.05124625,  0.00711231, -0.00155935, -0.07555379, -0.06675334,
        -0.01762914,  0.00589084,  0.01447191, -0.04330775,  0.03317726]])

Executing Code in the Forward Propagation Function

In [8]:
class FixedHiddenMLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        # Random weight parameters created with the `get_constant` function
        # are not updated during training (i.e., constant parameters)
        self.rand_weight = self.params.get_constant('rand_weight', np.random.uniform(size=(20, 20)))
        self.dense = nn.Dense(20, activation='relu')
    def forward(self, X):
        X = self.dense(X)
        # Use the created constant parameters, as well as the `relu` and `dot`
        # functions
        X = npx.relu(np.dot(X, self.rand_weight.data()) + 1)
        # Reuse the fully-connected layer. This is equivalent to sharing
        # parameters with two fully-connected layers
        X = self.dense(X)
        # Control flow
        while np.abs(X).sum() > 1:
            X /= 2
        return X.sum()


In [9]:
net = FixedHiddenMLP()
net.initialize()
net(X)


array(0.52637565)

In [10]:
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu'),nn.Dense(32, activation='relu'))
        self.dense = nn.Dense(16, activation='relu')
    def forward(self, X):
        return self.dense(self.net(X))
    
chimera = nn.Sequential()
chimera.add(NestMLP(), nn.Dense(20), FixedHiddenMLP())
chimera.initialize()
chimera(X)


array(0.97720534)

Parameter Management

In [11]:
from mxnet import init, np, npx
from mxnet.gluon import nn
npx.set_np()
net = nn.Sequential()
net.add(nn.Dense(8, activation='relu'))
net.add(nn.Dense(1))
net.initialize() # Use the default initialization method
X = np.random.uniform(size=(2, 4))
net(X) 

array([[-0.00061528],
       [-0.00041872]])

In [12]:
print(net[1].params)

dense13_ (
  Parameter dense13_weight (shape=(1, 8), dtype=float32)
  Parameter dense13_bias (shape=(1,), dtype=float32)
)


In [13]:
print(type(net[1].bias))
print(net[1].bias)
print(net[1].bias.data())

<class 'mxnet.gluon.parameter.Parameter'>
Parameter dense13_bias (shape=(1,), dtype=float32)
[0.]


In [14]:
net[1].weight.grad()

array([[0., 0., 0., 0., 0., 0., 0., 0.]])

In [15]:
print(net[0].collect_params())
print(net.collect_params())


dense12_ (
  Parameter dense12_weight (shape=(8, 4), dtype=float32)
  Parameter dense12_bias (shape=(8,), dtype=float32)
)
sequential3_ (
  Parameter dense12_weight (shape=(8, 4), dtype=float32)
  Parameter dense12_bias (shape=(8,), dtype=float32)
  Parameter dense13_weight (shape=(1, 8), dtype=float32)
  Parameter dense13_bias (shape=(1,), dtype=float32)
)


Collecting Parameters from Nested Blocks

In [17]:
def block1():
    net = nn.Sequential()
    net.add(nn.Dense(32, activation='relu'))
    net.add(nn.Dense(16, activation='relu'))
    return net
def block2():
    net = nn.Sequential()
    for _ in range(4):
        # Nested here
        net.add(block1())
    return net

rgnet = nn.Sequential()
rgnet.add(block2())
rgnet.add(nn.Dense(10))
rgnet.initialize()
rgnet(X)

array([[-8.0886596e-11, -1.3496230e-09, -5.5527755e-10, -1.2805073e-09,
        -1.0304946e-09,  1.7497598e-09,  1.0153671e-09,  1.7267601e-10,
        -9.1138991e-10,  3.9919201e-10],
       [ 1.1531965e-09, -7.4176387e-10, -6.1084227e-10, -2.1753395e-10,
         2.7838110e-11, -2.9127717e-10, -1.1417124e-09, -6.7512790e-10,
        -1.0054486e-09, -6.7114064e-10]])

In [18]:
print(rgnet.collect_params)
print(rgnet.collect_params())


<bound method Block.collect_params of Sequential(
  (0): Sequential(
    (0): Sequential(
      (0): Dense(4 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (1): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (2): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (3): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
  )
  (1): Dense(16 -> 10, linear)
)>
sequential4_ (
  Parameter dense14_weight (shape=(32, 4), dtype=float32)
  Parameter dense14_bias (shape=(32,), dtype=float32)
  Parameter dense15_weight (shape=(16, 32), dtype=float32)
  Parameter dense15_bias (shape=(16,), dtype=float32)
  Parameter dense16_weight (shape=(32, 16), dtype=float32)
  Parameter dense16_bias (shape=(32,), dtype=float32)
  Parameter dense17_weight (shape=(16, 32), dtype=float32)
  Parameter de

In [19]:
rgnet[0][1][0].bias.data()

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

Parameter Initialization

In [20]:
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]

array([-0.00194253,  0.00707577,  0.00459522,  0.00411877])

In [21]:
net.initialize(init=init.Constant(1), force_reinit=True)
net[0].weight.data()[0]


array([1., 1., 1., 1.])

In [22]:
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
net[1].initialize(init=init.Constant(42), force_reinit=True)
print(net[0].weight.data()[0])
print(net[1].weight.data())

[-0.13226163 -0.42949793  0.03997087 -0.6268525 ]
[[42. 42. 42. 42. 42. 42. 42. 42.]]


Custom Initialization

In [23]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = np.random.uniform(-10, 10, data.shape)
        data *= np.abs(data) >= 5
        net.initialize(MyInit(), force_reinit=True)
        net[0].weight.data()[:2]


In [25]:
net[0].weight.data()[:] += 1
net[0].weight.data()[0, 0] = 42
net[0].weight.data()[0]

array([42.        ,  0.57050204,  1.0399709 ,  0.3731475 ])

Tied Parameters

In [26]:
net = nn.Sequential()
# We need to give the shared layer a name so that we can refer to its
# parameters
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'), shared,
nn.Dense(8, activation='relu', params=shared.params), nn.Dense(10))
net.initialize()

X = np.random.uniform(size=(2, 20))
net(X)
# Check whether the parameters are the same
print(net[1].weight.data()[0] == net[2].weight.data()[0])
net[1].weight.data()[0, 0] = 100
# Make sure that they are actually the same object rather than just having the
# same value
print(net[1].weight.data()[0] == net[2].weight.data()[0])

[ True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True]


Instantiating a Network

In [27]:
from mxnet import np, npx
from mxnet.gluon import nn
npx.set_np()

def get_net():
    net = nn.Sequential()
    net.add(nn.Dense(256, activation='relu'))
    net.add(nn.Dense(10))
    return net

net = get_net()

In [28]:
print(net.collect_params)
print(net.collect_params())

<bound method Block.collect_params of Sequential(
  (0): Dense(-1 -> 256, Activation(relu))
  (1): Dense(-1 -> 10, linear)
)>
sequential11_ (
  Parameter dense27_weight (shape=(256, -1), dtype=float32)
  Parameter dense27_bias (shape=(256,), dtype=float32)
  Parameter dense28_weight (shape=(10, -1), dtype=float32)
  Parameter dense28_bias (shape=(10,), dtype=float32)
)


In [29]:
net.initialize()
net.collect_params()

sequential11_ (
  Parameter dense27_weight (shape=(256, -1), dtype=float32)
  Parameter dense27_bias (shape=(256,), dtype=float32)
  Parameter dense28_weight (shape=(10, -1), dtype=float32)
  Parameter dense28_bias (shape=(10,), dtype=float32)
)

In [30]:
X = np.random.uniform(size=(2, 20))
net(X)
net.collect_params()

sequential11_ (
  Parameter dense27_weight (shape=(256, 20), dtype=float32)
  Parameter dense27_bias (shape=(256,), dtype=float32)
  Parameter dense28_weight (shape=(10, 256), dtype=float32)
  Parameter dense28_bias (shape=(10,), dtype=float32)
)

Layers without Parameters

In [31]:
from mxnet import np, npx
from mxnet.gluon import nn
npx.set_np()
class CenteredLayer(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
    def forward(self, X):
        return X - X.mean()

In [32]:
layer = CenteredLayer()
layer(np.array([1, 2, 3, 4, 5]))


array([-2., -1.,  0.,  1.,  2.])

In [33]:
net = nn.Sequential()
net.add(nn.Dense(128), CenteredLayer())
net.initialize()


In [34]:
Y = net(np.random.uniform(size=(4, 8)))
Y.mean()


array(8.6583896e-10)

Layers with Parameters

In [35]:
class MyDense(nn.Block):
    def __init__(self, units, in_units, **kwargs):
        super().__init__(**kwargs)
        self.weight = self.params.get('weight', shape=(in_units, units))
        self.bias = self.params.get('bias', shape=(units,))
    def forward(self, x):
        linear = np.dot(x, self.weight.data(ctx=x.ctx)) + self.bias.data(ctx=x.ctx)
        return npx.relu(linear)

In [36]:
dense = MyDense(units=3, in_units=5)
dense.params

mydense0_ (
  Parameter mydense0_weight (shape=(5, 3), dtype=<class 'numpy.float32'>)
  Parameter mydense0_bias (shape=(3,), dtype=<class 'numpy.float32'>)
)

In [37]:
dense.initialize()
dense(np.random.uniform(size=(2, 5)))


array([[0.        , 0.01381929, 0.09430309],
       [0.00505158, 0.        , 0.12506852]])

In [38]:
net = nn.Sequential()
net.add(MyDense(8, in_units=64), MyDense(1, in_units=8))
net.initialize()
net(np.random.uniform(size=(2, 64)))


array([[0.],
       [0.]])

Loading and Saving Tensors

In [40]:
from mxnet.gluon import nn

npx.set_np()

x = np.arange(4)
npx.save('x-file', x)

In [41]:
x2 = npx.load('x-file')
x2

[array([0., 1., 2., 3.])]

In [42]:
y = np.zeros(4)
npx.save('x-files', [x, y])
x2, y2 = npx.load('x-files')
(x2, y2)

(array([0., 1., 2., 3.]), array([0., 0., 0., 0.]))

In [43]:
mydict = {'x': x, 'y': y}
npx.save('mydict', mydict)
mydict2 = npx.load('mydict')
mydict2

{'x': array([0., 1., 2., 3.]), 'y': array([0., 0., 0., 0.])}

Loading and Saving Model Parameters

In [44]:
class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')
        self.output = nn.Dense(10)
    def forward(self, x):
        return self.output(self.hidden(x))
    
net = MLP()
net.initialize()
X = np.random.uniform(size=(2, 20))
Y = net(X)


In [45]:
net.save_parameters('mlp.params')

In [46]:
clone = MLP()
clone.load_parameters('mlp.params')


In [47]:
Y_clone = clone(X)
Y_clone == Y

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]])