In [1]:
from mxnet import nd
from mxnet.gluon import nn

In [2]:
layer = nn.Dense(2)
layer

Dense(None -> 2, linear)

In [3]:
layer.initialize()

In [4]:
layer

Dense(None -> 2, linear)

In [5]:
x = nd.random.uniform(-1,1,(3,4))

In [6]:
x


[[ 0.09762704  0.18568921  0.43037868  0.6885315 ]
 [ 0.20552671  0.71589124  0.08976638  0.6945034 ]
 [-0.15269041  0.24712741  0.29178822 -0.23123658]]
<NDArray 3x4 @cpu(0)>

In [7]:
layer(x)


[[-0.02524132 -0.00874885]
 [-0.06026538 -0.01308061]
 [ 0.02468396 -0.02181557]]
<NDArray 3x2 @cpu(0)>

In [8]:
layer

Dense(4 -> 2, linear)

In [10]:
layer.weight.data()


[[-0.00873779 -0.02834515  0.05484822 -0.06206018]
 [ 0.06491279 -0.03182812 -0.01631819 -0.00312688]]
<NDArray 2x4 @cpu(0)>

In [11]:
net = nn.Sequential()
# Add a sequence of layers.
net.add(# Similar to Dense, it is not necessary to specify the input channels
        # by the argument `in_channels`, which will be  automatically inferred
        # in the first forward pass. Also, we apply a relu activation on the
        # output. In addition, we can use a tuple to specify a  non-square
        # kernel size, such as `kernel_size=(2,4)`
        nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
        # One can also use a tuple to specify non-symmetric pool and stride sizes
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        # The dense layer will automatically reshape the 4-D output of last
        # max pooling layer into the 2-D shape: (x.shape[0], x.size/x.shape[0])
        nn.Dense(120, activation="relu"),
        nn.Dense(84, activation="relu"),
        nn.Dense(10))
net

Sequential(
  (0): Conv2D(None -> 6, kernel_size=(5, 5), stride=(1, 1), Activation(relu))
  (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
  (2): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1), Activation(relu))
  (3): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
  (4): Dense(None -> 120, Activation(relu))
  (5): Dense(None -> 84, Activation(relu))
  (6): Dense(None -> 10, linear)
)

In [12]:
net.initialize()
# Input shape is (batch_size, color_channels, height, width)
x = nd.random.uniform(shape=(4,1,28,28))
y = net(x)
y.shape

(4, 10)

In [13]:
y


[[-1.8944463e-03 -1.1281811e-03  1.4912135e-03  3.4278480e-03
  -1.2005987e-03  4.5558185e-04 -1.9482849e-03  1.1272548e-03
   2.7200207e-03 -6.1708633e-05]
 [-1.4283753e-03 -1.3048853e-03  1.6090265e-03  3.0578680e-03
  -8.8548334e-04 -2.9706189e-05 -1.6792757e-03  5.9371087e-04
   3.2274700e-03  1.3673720e-04]
 [-1.4781976e-03 -1.8161776e-03  1.9952499e-03  3.6113353e-03
  -9.1113005e-04  1.8122171e-05 -1.5912550e-03  5.1596569e-04
   2.6741112e-03 -2.7984811e-04]
 [-1.3537750e-03 -1.3361122e-03  1.8610689e-03  3.6126999e-03
  -7.8630331e-04  5.7324505e-04 -2.2774055e-03  4.1549964e-04
   3.5023727e-03  4.2847291e-04]]
<NDArray 4x10 @cpu(0)>

In [14]:
(net[0].weight.data().shape, net[5].bias.data().shape)

((6, 1, 5, 5), (84,))

In [15]:
net[0].weight

Parameter conv0_weight (shape=(6, 1, 5, 5), dtype=<class 'numpy.float32'>)

In [16]:
net[0].weight.data()


[[[[ 2.76683643e-02 -5.61007522e-02 -6.15684353e-02  6.57732710e-02
     2.33473405e-02]
   [ 2.14396045e-02  2.38892958e-02 -4.60726582e-02 -4.05464396e-02
    -1.98586956e-02]
   [-5.19503206e-02  3.50960642e-02 -2.58400328e-02  1.50962919e-02
    -1.90804936e-02]
   [-2.44933888e-02  9.82754678e-03 -6.46204427e-02 -8.59578699e-03
     1.87983662e-02]
   [ 6.83723316e-02  6.42528906e-02 -5.57137281e-02  2.13906467e-02
    -4.07572538e-02]]]


 [[[ 1.89082399e-02 -4.74166684e-02  6.93419352e-02  2.14351639e-02
     1.14590526e-02]
   [-3.45391743e-02 -1.19883977e-02 -4.71649319e-03 -3.54234874e-03
    -3.57804187e-02]
   [ 1.72914192e-02 -4.77442592e-02 -2.26789303e-02 -5.45474812e-02
     2.44653225e-02]
   [ 2.18861401e-02 -2.55917571e-02 -5.06543890e-02  3.89683619e-02
    -4.24784720e-02]
   [ 6.29399493e-02 -1.83784738e-02  2.27537602e-02  4.49390560e-02
    -6.80999681e-02]]]


 [[[ 6.41045347e-02 -1.23496987e-02  4.64314222e-03 -4.23428752e-02
     2.68628001e-02]
   [-2.94518

In [17]:

class MixMLP(nn.Block):
    def __init__(self, **kwargs):
        # Run `nn.Block`'s init method
        super(MixMLP, self).__init__(**kwargs)
        self.blk = nn.Sequential()
        self.blk.add(nn.Dense(3, activation='relu'),
                     nn.Dense(4, activation='relu'))
        self.dense = nn.Dense(5)
    def forward(self, x):
        y = nd.relu(self.blk(x))
        print(y)
        return self.dense(y)

net = MixMLP()
net

MixMLP(
  (blk): Sequential(
    (0): Dense(None -> 3, Activation(relu))
    (1): Dense(None -> 4, Activation(relu))
  )
  (dense): Dense(None -> 5, linear)
)

In [18]:
net.initialize()
x = nd.random.uniform(shape=(2,2))
net(x)


[[0.0000000e+00 0.0000000e+00 6.2900386e-04 7.6445533e-05]
 [0.0000000e+00 0.0000000e+00 1.1989386e-03 1.2375204e-03]]
<NDArray 2x4 @cpu(0)>



[[-3.8061840e-05  1.5568350e-05  4.3668215e-06  4.2853058e-05
   1.8710394e-05]
 [-1.8345519e-05  2.6403079e-05  2.4685731e-05  7.7019373e-05
   9.7785989e-05]]
<NDArray 2x5 @cpu(0)>

In [19]:
net.blk[1].weight.data()


[[-0.0343901  -0.05805862 -0.06187592]
 [-0.06210143 -0.00918167 -0.00170272]
 [-0.02634858  0.05334064  0.02748809]
 [ 0.06669661 -0.01711474  0.01647211]]
<NDArray 4x3 @cpu(0)>