In [1]:
from mxnet import nd
from mxnet.gluon import nn

class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')
        self.output = nn.Dense(10)
    def forward(self, x):
        return self.output(self.hidden(x))


In [2]:
X = nd.random.uniform(shape=(2, 20))
net = MLP()
net.initialize()
net(X)


[[ 0.09543004  0.04614332 -0.00286655 -0.07790346 -0.05130241  0.02942038
   0.08696645 -0.0190793  -0.04122177  0.05088576]
 [ 0.0769287   0.03099706  0.00856576 -0.044672   -0.06926838  0.09132431
   0.06786592 -0.06187843 -0.03436674  0.04234696]]
<NDArray 2x10 @cpu(0)>

In [3]:
#　Ｓｅｑｕｅｎｔｉａｌ继承自Ｂｌｏｃｋ类
class Mysquential(nn.Block):
    def __init__(self, **kwargs):
        super(Mysquential, self).__init__(**kwargs)
    
    def add(self, block):
        self._children[block.name] = block
    
    def forward(self ,x):
        for block in self._children.values():
            x = block(x)
        return x


In [4]:
net = Mysquential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(X)


[[ 0.00362229  0.00633331  0.03201145 -0.01369375  0.10336448 -0.0350802
  -0.00032165 -0.01676024  0.06978628  0.01303309]
 [ 0.03871717  0.02608212  0.03544958 -0.02521311  0.11005436 -0.01430663
  -0.03052467 -0.03852826  0.06321152  0.0038594 ]]
<NDArray 2x10 @cpu(0)>

In [5]:
# 复杂的模型
class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        self.rand_weight = self.params.get_constant('rand_wight', nd.random.uniform(shape=(20, 20)))
        self.dense = nn.Dense(20, activation='relu')
    
    def forward(self, x):
        x = self.dense(x)
        x = nd.relu(nd.dot(x, self.rand_weight.data()) + 1)
        x = self.dense(x)
        while x.norm().asscalar() > 1:
            x /= 2
        if x.norm().asscalar() <0.8:
            x *= 10
        return x.sum()

In [6]:
net = FancyMLP()
net.initialize()
net(X)


[18.571953]
<NDArray 1 @cpu(0)>

In [7]:
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu')
                    ,nn.Dense(32,activation='relu'))
        self.dense = nn.Dense(16,activation='relu')
    
    def forward(self, x):
        return self.dense(self.net(x))


net = nn.Sequential()
net.add(NestMLP(), nn.Dense(20), FancyMLP())
net.initialize()
net(X)


[24.86621]
<NDArray 1 @cpu(0)>

In [8]:
#　模型参数的访问、初始化、共享
from mxnet import init

In [9]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2,20))
Y = net(X)
Y


[[ 0.06281953  0.02262797 -0.04495928 -0.01636342  0.01223228 -0.02480634
  -0.03506397 -0.05295734 -0.04518763 -0.09792533]
 [ 0.01386758 -0.02968636  0.01167846  0.01244943 -0.0305337  -0.02907136
  -0.04247145 -0.02945885 -0.0278103  -0.05699407]]
<NDArray 2x10 @cpu(0)>

In [10]:
net[0].params, type(net[0].params)

(dense10_ (
   Parameter dense10_weight (shape=(256, 20), dtype=float32)
   Parameter dense10_bias (shape=(256,), dtype=float32)
 ),
 mxnet.gluon.parameter.ParameterDict)

In [11]:
net[0].params['dense10_weight'], net[0].weight

(Parameter dense10_weight (shape=(256, 20), dtype=float32),
 Parameter dense10_weight (shape=(256, 20), dtype=float32))

In [12]:
net[0].weight.data()


[[-0.06046963  0.00624272 -0.03472826 ... -0.01759475  0.0686483
  -0.06360765]
 [-0.01273243 -0.02659053 -0.04718638 ...  0.02570021  0.02275064
  -0.0166979 ]
 [-0.03555115  0.01875034  0.02322027 ...  0.06564643  0.04601197
  -0.01915742]
 ...
 [ 0.03173313  0.01789995  0.02519771 ... -0.06176154 -0.03986754
  -0.04898471]
 [ 0.00564718  0.04665586 -0.00028374 ...  0.05332779  0.02100175
  -0.06427249]
 [ 0.0438781   0.05357236  0.02753124 ...  0.04084889 -0.01963295
   0.05668835]]
<NDArray 256x20 @cpu(0)>

In [13]:
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

In [14]:
net[1].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [15]:
net.collect_params('.*weight')

sequential2_ (
  Parameter dense10_weight (shape=(256, 20), dtype=float32)
  Parameter dense11_weight (shape=(10, 256), dtype=float32)
)

In [16]:
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[ 0.00456489 -0.00413096 -0.00670578 -0.01211046 -0.01173558 -0.00717132
 -0.00955144  0.00219873 -0.001119    0.00037409 -0.0045823   0.00580286
  0.00251145 -0.01799514  0.00045524 -0.00941019 -0.00045153 -0.0007181
 -0.00303942  0.00052552]
<NDArray 20 @cpu(0)>

In [17]:
net[0].weight.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[-0.00566084 -0.00395055 -0.00454888  0.00064433  0.00537331  0.00544384
 -0.0075568  -0.009151    0.0013665   0.00658129 -0.00482921 -0.00525734
 -0.00567577  0.00581563  0.00287522 -0.00044662  0.01271125 -0.00222826
 -0.00131304 -0.02350498]
<NDArray 20 @cpu(0)>

In [18]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print("Init", name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5

net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()

Init dense10_weight (256, 20)
Init dense11_weight (10, 256)



[[-6.9326224 -0.        -6.008077  ... -0.         7.913826  -8.087603 ]
 [-0.         7.4220505  7.5674496 ...  0.        -0.        -7.361935 ]
 [-0.        -5.98968    0.        ...  0.        -6.4284005  0.       ]
 ...
 [ 5.1953526  0.         9.932615  ...  6.967886   6.283451  -0.       ]
 [-9.786386   0.         0.        ... -0.         0.        -0.       ]
 [-5.459861  -0.        -7.4569807 ...  8.597137  -0.         5.4490337]]
<NDArray 256x20 @cpu(0)>

In [19]:
net[0].weight.set_data(net[0].weight.data()+1)
net[0].weight.data()


[[-5.9326224  1.        -5.008077  ...  1.         8.913826  -7.0876026]
 [ 1.         8.42205    8.56745   ...  1.         1.        -6.361935 ]
 [ 1.        -4.98968    1.        ...  1.        -5.4284005  1.       ]
 ...
 [ 6.1953526  1.        10.932615  ...  7.967886   7.283451   1.       ]
 [-8.786386   1.         1.        ...  1.         1.         1.       ]
 [-4.459861   1.        -6.4569807 ...  9.597137   1.         6.4490337]]
<NDArray 256x20 @cpu(0)>

In [20]:
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'),
       shared,
       nn.Dense(8, activation='relu', params=shared.params),
       nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
net(X)

net[1].weight.data()[0] == net[2].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>

In [21]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print("Init", name, data.shape)

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'),
       nn.Dense(10))
net.initialize(MyInit(), force_reinit=True)

In [22]:
X = nd.random.uniform(shape=(2, 20))
Y = net(X)

Init dense16_weight (256, 20)
Init dense17_weight (10, 256)


In [23]:
net.initialize(MyInit(),force_reinit=True)

Init dense16_weight (256, 20)
Init dense17_weight (10, 256)


In [24]:
net = nn.Sequential()
net.add(nn.Dense(256, in_units=10, activation='relu'),
       nn.Dense(10, in_units=256))
net.initialize(MyInit(), force_reinit=True)

Init dense18_weight (256, 10)
Init dense19_weight (10, 256)


In [25]:
# 自定义层
class CenteredLayer(nn.Block):
    def __init__(self, **kwargs):
        super(CenteredLayer, self).__init__(**kwargs)
    
    def forward(self, x):
        return x - x.mean()

In [26]:
layer = CenteredLayer()
layer(nd.array([1,2,3,4,5]))


[-2. -1.  0.  1.  2.]
<NDArray 5 @cpu(0)>

In [28]:
net = nn.Sequential()
net.add(nn.Dense(128),
       CenteredLayer())

In [29]:
net.initialize()
y = net(nd.random.uniform(shape=(4,8)))
y.mean().asscalar()

-2.2555469e-10

In [31]:
from mxnet import gluon
params = gluon.ParameterDict()
params.get('param2', shape=(2,3))
params

(
  Parameter param2 (shape=(2, 3), dtype=<class 'numpy.float32'>)
)

In [33]:
class MyDense(nn.Block):
    def __init__(self, units, in_units, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.weight = self.params.get('weight', shape=(in_units, units))
        self.bias = self.params.get('bias', shape=(units))
    
    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
        return nd.relu(linear)

dense = MyDense(units=3, in_units=5)
dense.params

mydense1_ (
  Parameter mydense1_weight (shape=(5, 3), dtype=<class 'numpy.float32'>)
  Parameter mydense1_bias (shape=(3,), dtype=<class 'numpy.float32'>)
)

In [34]:
dense.initialize()
dense(nd.random.uniform(shape=(2, 5)))


[[0.09226421 0.         0.0142631 ]
 [0.11808416 0.         0.02365542]]
<NDArray 2x3 @cpu(0)>

In [39]:
net = nn.Sequential()
net.add(MyDense(8, in_units=64),
       MyDense(1, in_units=8))
net.initialize()
net(nd.random.uniform(shape=(2,64)))


[[0.]
 [0.]]
<NDArray 2x1 @cpu(0)>

In [41]:
# 读取与存储

x = nd.ones(3)
nd.save('x', x)

In [44]:
x2 = nd.load('x')
x2

[
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>]

In [45]:
y = nd.zeros(4)
nd.save('xy', [x,y])
x2, y2 = nd.load('xy')
(x2, y2)

(
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>,
 
 [0. 0. 0. 0.]
 <NDArray 4 @cpu(0)>)

In [46]:
mydict = {'x':x, 'y':y}
nd.save('mydict', mydict)
mydict2 = nd.load('mydict')
mydict2

{'x': 
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>,
 'y': 
 [0. 0. 0. 0.]
 <NDArray 4 @cpu(0)>}

In [48]:
class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')
        self.output = nn.Dense(10)
    
    def forward(self, x):
        return self.output(self.hidden(x))
net = MLP()
net.initialize()
X = nd.random.uniform(shape=(2, 20))
Y = net(X)

In [50]:
filename = 'mlp.params'
net.save_parameters(filename)

In [51]:
net2 = MLP()
net2.load_parameters(filename)

In [52]:
Y2 = net2(X)
Y2 == Y


[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
<NDArray 2x10 @cpu(0)>