In [1]:
from mxnet import nd
from mxnet.gluon import nn

class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')
        self.output = nn.Dense(10)
    def forward(self, x):
        return self.output(self.hidden(x))


In [2]:
X = nd.random.uniform(shape=(2, 20))
net = MLP()
net.initialize()
net(X)


[[ 0.09543004  0.04614332 -0.00286655 -0.07790346 -0.05130241  0.02942038
   0.08696645 -0.0190793  -0.04122177  0.05088576]
 [ 0.0769287   0.03099706  0.00856576 -0.044672   -0.06926838  0.09132431
   0.06786592 -0.06187843 -0.03436674  0.04234696]]
<NDArray 2x10 @cpu(0)>

In [3]:
#　Ｓｅｑｕｅｎｔｉａｌ继承自Ｂｌｏｃｋ类
class Mysquential(nn.Block):
    def __init__(self, **kwargs):
        super(Mysquential, self).__init__(**kwargs)
    
    def add(self, block):
        self._children[block.name] = block
    
    def forward(self ,x):
        for block in self._children.values():
            x = block(x)
        return x


In [4]:
net = Mysquential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(X)


[[ 0.00362229  0.00633331  0.03201145 -0.01369375  0.10336448 -0.0350802
  -0.00032165 -0.01676024  0.06978628  0.01303309]
 [ 0.03871717  0.02608212  0.03544958 -0.02521311  0.11005436 -0.01430663
  -0.03052467 -0.03852826  0.06321152  0.0038594 ]]
<NDArray 2x10 @cpu(0)>

In [5]:
# 复杂的模型
class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        self.rand_weight = self.params.get_constant('rand_wight', nd.random.uniform(shape=(20, 20)))
        self.dense = nn.Dense(20, activation='relu')
    
    def forward(self, x):
        x = self.dense(x)
        x = nd.relu(nd.dot(x, self.rand_weight.data()) + 1)
        x = self.dense(x)
        while x.norm().asscalar() > 1:
            x /= 2
        if x.norm().asscalar() <0.8:
            x *= 10
        return x.sum()

In [6]:
net = FancyMLP()
net.initialize()
net(X)


[18.571953]
<NDArray 1 @cpu(0)>

In [7]:
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu')
                    ,nn.Dense(32,activation='relu'))
        self.dense = nn.Dense(16,activation='relu')
    
    def forward(self, x):
        return self.dense(self.net(x))


net = nn.Sequential()
net.add(NestMLP(), nn.Dense(20), FancyMLP())
net.initialize()
net(X)


[24.86621]
<NDArray 1 @cpu(0)>

In [8]:
#　模型参数的访问、初始化、共享
from mxnet import init

In [9]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2,20))
Y = net(X)
Y


[[ 0.06281953  0.02262797 -0.04495928 -0.01636342  0.01223228 -0.02480634
  -0.03506397 -0.05295734 -0.04518763 -0.09792533]
 [ 0.01386758 -0.02968636  0.01167846  0.01244943 -0.0305337  -0.02907136
  -0.04247145 -0.02945885 -0.0278103  -0.05699407]]
<NDArray 2x10 @cpu(0)>

In [10]:
net[0].params, type(net[0].params)

(dense10_ (
   Parameter dense10_weight (shape=(256, 20), dtype=float32)
   Parameter dense10_bias (shape=(256,), dtype=float32)
 ),
 mxnet.gluon.parameter.ParameterDict)

In [12]:
net[0].params['dense10_weight'], net[0].weight

(Parameter dense10_weight (shape=(256, 20), dtype=float32),
 Parameter dense10_weight (shape=(256, 20), dtype=float32))

In [13]:
net[0].weight.data()


[[-0.06046963  0.00624272 -0.03472826 ... -0.01759475  0.0686483
  -0.06360765]
 [-0.01273243 -0.02659053 -0.04718638 ...  0.02570021  0.02275064
  -0.0166979 ]
 [-0.03555115  0.01875034  0.02322027 ...  0.06564643  0.04601197
  -0.01915742]
 ...
 [ 0.03173313  0.01789995  0.02519771 ... -0.06176154 -0.03986754
  -0.04898471]
 [ 0.00564718  0.04665586 -0.00028374 ...  0.05332779  0.02100175
  -0.06427249]
 [ 0.0438781   0.05357236  0.02753124 ...  0.04084889 -0.01963295
   0.05668835]]
<NDArray 256x20 @cpu(0)>

In [14]:
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

In [15]:
net[1].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [16]:
net.collect_params('.*weight')

sequential2_ (
  Parameter dense10_weight (shape=(256, 20), dtype=float32)
  Parameter dense11_weight (shape=(10, 256), dtype=float32)
)

In [18]:
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[-0.00566084 -0.00395055 -0.00454888  0.00064433  0.00537331  0.00544384
 -0.0075568  -0.009151    0.0013665   0.00658129 -0.00482921 -0.00525734
 -0.00567577  0.00581563  0.00287522 -0.00044662  0.01271125 -0.00222826
 -0.00131304 -0.02350498]
<NDArray 20 @cpu(0)>

In [20]:
net[0].weight.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[-0.00572768 -0.00633937 -0.00251258 -0.00467908 -0.00969493  0.00605472
 -0.01847969  0.01177194  0.00769348  0.01019374 -0.01192946  0.00357285
  0.00929739 -0.00075304 -0.00158822 -0.03007082  0.00256757  0.00213922
 -0.00118184  0.00639406]
<NDArray 20 @cpu(0)>

In [23]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print("Init", name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5

net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()

Init dense10_weight (256, 20)
Init dense11_weight (10, 256)



[[ 9.476374   9.491772  -6.752841  ... -0.        -5.597923  -7.6391687]
 [ 0.         5.6946106 -7.7700787 ...  8.051603   0.        -0.       ]
 [-6.4700127  9.401562   8.783218  ...  0.         8.4343815 -9.806334 ]
 ...
 [-9.418693   8.645208   5.3656855 ... -0.        -0.        -6.5955944]
 [-5.9026556 -0.         7.778509  ...  0.        -0.         0.       ]
 [ 9.853041  -0.         0.        ...  9.013855  -9.360153  -0.       ]]
<NDArray 256x20 @cpu(0)>

In [24]:
net[0].weight.set_data(net[0].weight.data()+1)
net[0].weight.data()


[[10.476374  10.491772  -5.752841  ...  1.        -4.597923  -6.6391687]
 [ 1.         6.6946106 -6.7700787 ...  9.051603   1.         1.       ]
 [-5.4700127 10.401562   9.783218  ...  1.         9.4343815 -8.806334 ]
 ...
 [-8.418693   9.645208   6.3656855 ...  1.         1.        -5.5955944]
 [-4.9026556  1.         8.778509  ...  1.         1.         1.       ]
 [10.853041   1.         1.        ... 10.013855  -8.360153   1.       ]]
<NDArray 256x20 @cpu(0)>

In [27]:
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'),
       shared,
       nn.Dense(8, activation='relu', params=shared.params),
       nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
net(X)

net[1].weight.data()[0] == net[2].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>

In [28]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print("Init", name, data.shape)

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'),
       nn.Dense(10))
net.initialize(MyInit(), force_reinit=True)

In [29]:
X = nd.random.uniform(shape=(2, 20))
Y = net(X)

Init dense20_weight (256, 20)
Init dense21_weight (10, 256)


In [30]:
net.initialize(MyInit(),force_reinit=True)

Init dense20_weight (256, 20)
Init dense21_weight (10, 256)


In [None]:
net = nn.Sequential()
net.add(nn.Dense(256, in_units=10, activation='relu'),
       nn.Dense(10, in_units=256))
net.initialize(MyInit(), force_reinit=True)