In [1]:
import numpy as np
from mxnet import nd
from mxnet.gluon import nn

  from ._conv import register_converters as _register_converters


## 4.1.1 Inheried from Block
page 114

In [2]:
class MLP(nn.Block):
    """
    declared layer with model parameters, here we use 2 full connected layers
    """
    def __init__(self, **kwargs):
        """
        Call MLP's parent class and initialization
        """
        # Issue: 4.1.5.1, if don't call super(), then, self._children doesn't exists
        # -Cause issue-> AttributeError: 'MLP' object has no attribute '_children'
        super(MLP, self).__init__(**kwargs)
        # hidden layer
        self.hidden = nn.Dense(256, activation='relu')
        # output layer
        self.output = nn.Dense(10)
    
    def forward(self, x):
        """
        define model's forward calcuation
        """
        return self.output(self.hidden(x))

In [3]:
x = nd.random.uniform(shape=(2, 20))
net = MLP()
net.initialize()
net(x)


[[ 0.09543003  0.04614332 -0.00286653 -0.07790346 -0.05130243  0.02942039
   0.08696645 -0.0190793  -0.04122177  0.05088576]
 [ 0.0769287   0.03099705  0.00856576 -0.04467198 -0.0692684   0.09132432
   0.06786594 -0.06187843 -0.03436674  0.04234695]]
<NDArray 2x10 @cpu(0)>

## 4.1.2 Sequential inheried from Block
page 115

In [4]:
class MySequential(nn.Block):
    def __init__(self, **kwargs):
        super(MySequential, self).__init__(**kwargs)
        
    def add(self, block: nn.Block):
        """
        block is a nn.Block instance, supposed that it has a unique name, We stored it into Block's instance variables,
        whose type is OrderedDick. And when call initialize() function, system will automatially initialize for _children.
        """
        self._children[block.name] = block
    
    def forward(self, x):
        """
        OrderedDict guarantee that using pre-defined elements for processing
        """
        for block in self._children.values():
            x = block(x)
        return x

In [5]:
net = MySequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(x)


[[ 0.0036223   0.00633331  0.03201144 -0.01369375  0.10336448 -0.03508019
  -0.00032164 -0.01676024  0.06978628  0.01303308]
 [ 0.03871716  0.02608212  0.03544959 -0.02521311  0.11005434 -0.01430662
  -0.03052465 -0.03852826  0.06321152  0.0038594 ]]
<NDArray 2x10 @cpu(0)>

## 4.1.3 Complex model without learning parameters
page 116

In [6]:
x = nd.random.uniform(shape=(2, 20))

In [7]:
class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        # using get_constant() create random weights, which don't change during training
        self.rand_weight = self.params.get_constant('rand_weight', nd.random.uniform(shape=(20, 20)))
        self.dense = nn.Dense(20, activation='relu')
        
    def forward(self, x):
        x = self.dense(x)
        # using created constant parameter, and NDArray's relu and dot function => Don't learn self.rand_weight
        x = nd.relu(nd.dot(x, self.rand_weight.data()) + 1)
        # reuse full connected layer
        x = self.dense(x)
        ## result: [26.93217]
        ## control flow, using asscalar to return scalar comparsion
        # refer to http://mathworld.wolfram.com/L2-Norm.html of definition of L2 norm
#         print("x:", x)
#         print("x.norm():", x.norm())
        while x.norm().asscalar() > 1:
            x /= 2
        if x.norm().asscalar() < 0.8:
            x *= 10
## result : [33.071712]
#         while x.norm() > 1:
#             x /= 2
#         if x.norm() < 0.8:
#             x *= 10
        return x.sum()

In [8]:
net = FancyMLP()
net.initialize()
# 1. must conform to column size, to 30 will cause "MXNetError: Shape inconsistent, Provided = [20,30], inferred shape=(20,20)"
# x = nd.random.uniform(shape=(3, 30))
# 2. row could be changed
net(x)


[3.536038]
<NDArray 1 @cpu(0)>

In [9]:
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu'),
                    nn.Dense(32, activation='relu'))
#         # issue: TypeError: 'list' object is not callable
#         self.net = [nn.Dense(64, activation='relu'),
#                      nn.Dense(32, activation='relu')]
        self.dense = nn.Dense(16, activation='relu')
        
    def forward(self, x):
        return self.dense(self.net(x))

In [10]:
net = nn.Sequential()
net.add(NestMLP(), nn.Dense(20), FancyMLP())
net.initialize()
net(x)


[4.310071]
<NDArray 1 @cpu(0)>

## 4.2 Model parameters
page 118

In [11]:
from mxnet import init, nd
from mxnet.gluon import nn

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

x = nd.random.uniform(shape=(2, 20))
y = net(x)

### 4.2.1 Access model parameters
page 118

In [12]:
net[0].params

dense10_ (
  Parameter dense10_weight (shape=(256, 20), dtype=float32)
  Parameter dense10_bias (shape=(256,), dtype=float32)
)

In [13]:
# find the default weight name and access data
net[0].params['dense10_weight'], net[0].weight

(Parameter dense10_weight (shape=(256, 20), dtype=float32),
 Parameter dense10_weight (shape=(256, 20), dtype=float32))

In [14]:
net[0].weight.data()


[[-0.03555115  0.01875034  0.02322027 ...  0.06564643  0.04601197
  -0.01915742]
 [ 0.05949537 -0.01434531 -0.06355897 ... -0.05305162  0.06888158
  -0.0361836 ]
 [-0.03508119 -0.03804309 -0.05517314 ... -0.0514059  -0.01693203
  -0.01760576]
 ...
 [ 0.03173313  0.01789995  0.02519771 ... -0.06176154 -0.03986754
  -0.04898471]
 [ 0.00564718  0.04665586 -0.00028374 ...  0.05332779  0.02100175
  -0.06427249]
 [ 0.0438781   0.05357236  0.02753124 ...  0.04084889 -0.01963295
   0.05668835]]
<NDArray 256x20 @cpu(0)>

In [15]:
# as no backpropagate, with grad all zeros
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

In [16]:
net[1].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [17]:
# find all net parameters
net.collect_params()

sequential2_ (
  Parameter dense10_weight (shape=(256, 20), dtype=float32)
  Parameter dense10_bias (shape=(256,), dtype=float32)
  Parameter dense11_weight (shape=(10, 256), dtype=float32)
  Parameter dense11_bias (shape=(10,), dtype=float32)
)

In [18]:
# find all regex weights parameters
net.collect_params('.*weight')

sequential2_ (
  Parameter dense10_weight (shape=(256, 20), dtype=float32)
  Parameter dense11_weight (shape=(10, 256), dtype=float32)
)

### 4.2.2 Access model parameters
page 120

In [19]:
# 非首次对模型初始化需要指定 force_reinit, initialization as nomralization
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True) 
net[0].weight.data()[0]


[-0.00045153 -0.0007181  -0.00303942  0.00052552 -0.00951675 -0.00264655
  0.00024275  0.01360035 -0.01203111  0.00198488 -0.01178844 -0.00134574
 -0.0096758  -0.02177683  0.01439619  0.01977801  0.00150742  0.00320251
 -0.01048041 -0.01397015]
<NDArray 20 @cpu(0)>

In [20]:
# initialize as constant
net.initialize(init=init.Constant(1), force_reinit=True)
net[0].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 20 @cpu(0)>

In [21]:
# initialize via init.Xavier
net.initialize(init=init.Xavier(), force_reinit=True)
net[0].weight.data()[0]


[ 0.02390976 -0.03466652 -0.07621067 -0.11896712 -0.09759908  0.14299902
  0.10603461  0.11694542 -0.13018094 -0.06749497 -0.00866343 -0.00790583
 -0.11328436 -0.00597025 -0.01266268  0.00247419  0.14153317  0.13778725
 -0.02249777 -0.1179658 ]
<NDArray 20 @cpu(0)>

### 4.2.3 Customized initialization method
page 121

In [22]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5

In [23]:
net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

Init dense10_weight (256, 20)
Init dense11_weight (10, 256)



[-6.008077   0.        -0.        -0.         0.        -0.
 -0.        -8.591784   5.629592  -0.         5.0204325 -0.
  8.544237  -6.7921295 -9.420949  -0.         7.913826  -8.087603
 -0.         7.4220505]
<NDArray 20 @cpu(0)>

In [24]:
# for set_data() to overwerite model parameter
net[0].weight.set_data(net[0].weight.data() + 1)
net[0].weight.data()[0]


[-5.008077   1.         1.         1.         1.         1.
  1.        -7.5917835  6.629592   1.         6.0204325  1.
  9.544237  -5.7921295 -8.420949   1.         8.913826  -7.0876026
  1.         8.42205  ]
<NDArray 20 @cpu(0)>

### 4.2.4 Shared model parameters
page 122

In [25]:
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(
    nn.Dense(8, activation='relu'),
    shared,
    # shared parameter for layer 2 and 3
    nn.Dense(8, activation='relu', params=shared.params),
    nn.Dense(10)
)
net.initialize()

x = nd.random.uniform(shape=(2, 20))
net(x)


[[-2.8930701e-05 -9.8856308e-06 -2.9208031e-05  2.2878423e-05
  -2.3972660e-05  3.5956542e-05 -2.5916641e-05  7.8715402e-08
  -2.5550988e-05 -1.0085280e-05]
 [ 3.8551381e-05  2.4488436e-06 -3.2839023e-05  5.3968040e-05
  -2.9661947e-05 -2.2196458e-05 -5.3009750e-05  6.9562004e-05
  -5.2102761e-05 -5.6052133e-05]]
<NDArray 2x10 @cpu(0)>

## 4.3 Model Lazy Initialization

### 4.3.1 Lazy initialization

In [26]:
from mxnet import init, nd
from mxnet.gluon import nn

class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print("Init", name, data.shape)
        # avoid initialization logic here
        
net = nn.Sequential()
net.add(
    nn.Dense(256, activation='relu'),
    nn.Dense(10)
)
net.initialize(init=MyInit())

In [27]:
# during initalization, there is no _init_weight prompt
# using shape reasoning and intialize weights
x = nd.random.uniform(shape=(2, 20))
y = net(x)
# Side effect: we can't get model parameter, before the first time of forward propagation

Init dense16_weight (256, 20)
Init dense17_weight (10, 256)


### 4.3.2 Avoid lazy initialization

In [28]:
# 当系统在调用 initialize函数时能够知道所有参数形状，那么延后初始化就不会发生。
# 我们这里给两个这样的情况。
# 第一个是模型已经被初始化过，而且我们要对模型进行重新初始化时。因为我们知道参数大小不会变，所以能够立即进行重新初始化。
net.initialize(init=MyInit(), force_reinit=True)

Init dense16_weight (256, 20)
Init dense17_weight (10, 256)


In [29]:
#  第二种情况是我们在创建层到时候指定了每个层的输入大小，使得系统不需要额外的信息来推测 参数形状。
net = nn.Sequential()
net.add(nn.Dense(256, in_units=20, activation='relu'))
net.add(nn.Dense(10, in_units=256))
net.initialize(init=MyInit())

Init dense18_weight (256, 20)
Init dense19_weight (10, 256)


## 4.4 Customized Layer

### 4.4.1 Customized layer without model parameters

In [30]:
from mxnet import nd, gluon
from mxnet.gluon import nn

In [31]:
class CenteredLayer(nn.Block):
    def __init__(self, **kwargs):
        super(CenteredLayer, self).__init__(**kwargs)
        
    def forward(self, x):
        return x - x.mean()

In [32]:
layer = CenteredLayer()
layer(nd.arange(1, 6))


[-2. -1.  0.  1.  2.]
<NDArray 5 @cpu(0)>

In [33]:
# more complex model
net = nn.Sequential()
net.add(
    nn.Dense(128),
    CenteredLayer()
)
net.initialize()
y = net(nd.random.uniform(shape=(4, 8)))
y.mean().asscalar()

-1.9645086e-10

### 4.4.2 Customized layer with model parameters

In [34]:
params = gluon.ParameterDict()
params.get('param2', shape=(2, 3))
params

(
  Parameter param2 (shape=(2, 3), dtype=<class 'numpy.float32'>)
)

In [35]:
class MyDense(nn.Block):
    """
    units: output nubmer of layer
    in_units: input number 
    """
    def __init__(self, units, in_units, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.weight = self.params.get('weight', shape=(in_units, units))
        self.bias = self.params.get('bias', shape=(units, ))
        
    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data() 
        return nd.relu(linear)

In [36]:
dense = MyDense(units=5, in_units=10)
dense.params

mydense0_ (
  Parameter mydense0_weight (shape=(10, 5), dtype=<class 'numpy.float32'>)
  Parameter mydense0_bias (shape=(5,), dtype=<class 'numpy.float32'>)
)

In [37]:
dense.initialize()
dense(nd.random.uniform(shape=(2, 10)))


[[0.07590036 0.19890553 0.06891428 0.         0.        ]
 [0.09435701 0.13325097 0.03792883 0.         0.        ]]
<NDArray 2x5 @cpu(0)>

In [38]:
net = nn.Sequential()
net.add(
    MyDense(32, in_units=64),
    MyDense(2, in_units=32)
)
net.initialize()
net(nd.random.uniform(shape=(2, 64)))


[[0.00054134 0.        ]
 [0.         0.        ]]
<NDArray 2x2 @cpu(0)>

## 4.5 Read and Store

### 4.5.1 Read Write NDArrays

In [39]:
from mxnet import nd
from mxnet.gluon import nn 
x = nd.ones(3)
nd.save('x', x)

In [40]:
x2 = nd.load('x')
x2

[
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>]

In [41]:
y = nd.zeros(4)
nd.save('xy', [x, y])
x2, y2 = nd.load('xy')
(x2, y2)

(
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>, 
 [0. 0. 0. 0.]
 <NDArray 4 @cpu(0)>)

In [42]:
mydict = {'x':x, 'y':y}
nd.save('mydict', mydict)
mydict2 = nd.load('mydict')
mydict2

{'x': 
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>, 'y': 
 [0. 0. 0. 0.]
 <NDArray 4 @cpu(0)>}

### 4.5.2 Read Write Parameters of Gluon Model

In [43]:
class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')
        self.output = nn.Dense(10)
        
    def forward(self, x):
        return self.output(self.hidden(x))

In [44]:
net = MLP()
net.initialize()

In [45]:
# 由于延后初始化，我们需要先运行一次前向计算才能实际初始化模型参数。 
x = nd.random.uniform(shape=(2, 20))
y = net(x)

In [46]:
filename = 'mlp.params'
net.save_parameters(filename)

In [47]:
net2 = MLP()
net2.load_parameters(filename)

In [48]:
y2 = net2(x)
np.all(y2 == y)


[1.]
<NDArray 1 @cpu(0)>

## 4.6 GPU device
### 4.6.1 NDarray GPU

In [49]:
import mxnet as mx 
from mxnet import nd
from mxnet.gluon import nn 

mx.cpu(), mx.gpu(), mx.gpu(1)

(cpu(0), gpu(0), gpu(1))

In [50]:
x = nd.array([1, 2, 3])
x.context

cpu(0)

In [51]:
a = nd.array([1, 2, 3], ctx=mx.gpu())
a


[1. 2. 3.]
<NDArray 3 @gpu(0)>

In [52]:
b = nd.random.uniform(shape=(2, 3), ctx=mx.gpu(1))
b


[[0.59119    0.313164   0.76352036]
 [0.9731786  0.35454726 0.11677533]]
<NDArray 2x3 @gpu(1)>

In [53]:
y = x.copyto(mx.gpu())
y


[1. 2. 3.]
<NDArray 3 @gpu(0)>

In [54]:
z = x.as_in_context(mx.gpu())
z


[1. 2. 3.]
<NDArray 3 @gpu(0)>

In [55]:
y.as_in_context(mx.gpu()) is y

True

In [56]:
y.copyto(mx.gpu()) is y

False

In [57]:
# GPU calcuation
# 注意，MXNet 要求计算的所有输入数据都在同一个 CPU/GPU 上。这个设计的原因是不同 CPU/GPU 之间的数据交互通常比较耗时。
# 因此，MXNet 希望用戶确切地指明计算的输入数据都在同一个 CPU/GPU 上。例如，如果将 CPU 上的 x 和 GPU 上的 y 做运算，会出现错误信息。
(z + 2).exp() * y


[ 20.085537 109.1963   445.2395  ]
<NDArray 3 @gpu(0)>

### 4.6.2 Gluon GPU

In [58]:
net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(ctx=mx.gpu())

In [59]:
# 当输入是 GPU 上的 NDArray 时，Gluon 会在相同的 GPU 上计算结果。
net(y)


[[-0.03091596]
 [-0.06183191]
 [-0.09274787]]
<NDArray 3x1 @gpu(0)>

In [60]:
net[0].weight.data()


[[-0.03091596]]
<NDArray 1x1 @gpu(0)>