In [1]:
from mxnet.gluon import nn
from mxnet import nd


def get_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Dense(4, activation='relu'))
        net.add(nn.Dense(2))
    return net


x = nd.random.uniform(shape=(3, 5))

try:
    net = get_net()
    net(x)
except RuntimeError as e:
    print(e)


Parameter 'sequential0_dense0_weight' has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks


In [2]:
net = get_net()
net.initialize()
net(x)


[[0.00212593 0.00365805]
 [0.00161272 0.00441845]
 [0.00204872 0.00352518]]
<NDArray 3x2 @cpu(0)>

In [10]:
w = net[0].weight
b = net[0].bias
print('name is :', net[0].name, '\nweight: ', w, '\nbias: ', b)

name is : sequential1_dense0 
weight:  Parameter sequential1_dense0_weight (shape=(4, 5), dtype=float32) 
bias:  Parameter sequential1_dense0_bias (shape=(4,), dtype=float32)


In [20]:
params = net.collect_params()
print(params)
print(params['sequential1_dense0_weight'].data())
print(params['sequential1_dense0_weight'].shape)
print(params.get('dense0_bias').data())
print(params.get('dense0_weight').data())  # 通过get方法获取参数的信息，需要去除名称的前缀

sequential1_ (
  Parameter sequential1_dense0_weight (shape=(4, 5), dtype=float32)
  Parameter sequential1_dense0_bias (shape=(4,), dtype=float32)
  Parameter sequential1_dense1_weight (shape=(2, 4), dtype=float32)
  Parameter sequential1_dense1_bias (shape=(2,), dtype=float32)
)

[[-0.06206018  0.06491279 -0.03182812 -0.01631819 -0.00312688]
 [ 0.0408415   0.04370362  0.00404529 -0.0028032   0.00952624]
 [-0.01501013  0.05958354  0.04705103 -0.06005495 -0.02276454]
 [-0.0578019   0.02074406 -0.06716943 -0.01844618  0.04656678]]
<NDArray 4x5 @cpu(0)>
(4, 5)

[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>

[[-0.06206018  0.06491279 -0.03182812 -0.01631819 -0.00312688]
 [ 0.0408415   0.04370362  0.00404529 -0.0028032   0.00952624]
 [-0.01501013  0.05958354  0.04705103 -0.06005495 -0.02276454]
 [-0.0578019   0.02074406 -0.06716943 -0.01844618  0.04656678]]
<NDArray 4x5 @cpu(0)>


In [22]:
# initialize初始化权重，默认是[-0.07, 0.07]之间均匀分布的随机数
# 也可以使用其他方法，比如下方代码，生成均值为0， 方差为0.02的正态分布随机数
from mxnet import init
params.initialize(init=init.Normal(sigma=0.02), force_reinit=True)
print(net[0].weight.data())
print(net[0].bias.data())


[[ 0.01203553  0.00832525  0.01018762 -0.048427   -0.00321659]
 [ 0.00771733 -0.00632596  0.02594279 -0.00826195  0.01191582]
 [ 0.0098195   0.01036525  0.00784631  0.00566644  0.02093405]
 [ 0.00017574 -0.01174674 -0.0067725  -0.00926798 -0.01903343]]
<NDArray 4x5 @cpu(0)>

[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [23]:
# 全部初始化为1
params.initialize(init=init.One(), force_reinit=True)
print(net[0].weight.data())
print(net[0].bias.data())


[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
<NDArray 4x5 @cpu(0)>

[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [24]:
# 也可以自定义参数初始化函数
class _MyInit(init.Initializer):

    def __init__(self, *args, **kwargs):
        super(_MyInit, self).__init__(*args, **kwargs)
        self._verbose = True

    def _init_weight(self, _, arr):
        # 使用out=arr，不需要指定形状shape
        print('init weight', arr.shape)
        # 生成[5, 10]之前的随机数
        nd.random.uniform(low=5, high=10, out=arr)

    def _init_bias(self, _, arr):
        print('init bias', arr.shape)
        # 初始化偏移
        arr[:] = 2


params.initialize(init=_MyInit(), force_reinit=True)
print(net[0].weight.data())
print(net[0].bias.data())

init weight (4, 5)
init weight (2, 4)

[[5.4155626 9.646481  6.3885927 6.592845  5.0467834]
 [8.337051  9.21171   5.6589894 8.23587   8.581636 ]
 [9.20693   6.4470305 6.323651  5.915957  6.989104 ]
 [7.9325647 7.764107  5.100538  5.8247023 9.1447   ]]
<NDArray 4x5 @cpu(0)>

[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [25]:
net__ = get_net()
print(net__.collect_params())

sequential2_ (
  Parameter sequential2_dense0_weight (shape=(4, 0), dtype=float32)
  Parameter sequential2_dense0_bias (shape=(4,), dtype=float32)
  Parameter sequential2_dense1_weight (shape=(2, 0), dtype=float32)
  Parameter sequential2_dense1_bias (shape=(2,), dtype=float32)
)


In [26]:
net__.initialize(init=_MyInit())

In [27]:
net__(x)

init weight (4, 5)
init weight (2, 4)



[[820.3932  867.3587 ]
 [804.3137  850.8902 ]
 [648.2849  686.66437]]
<NDArray 3x2 @cpu(0)>

In [28]:
print(net__.collect_params())

sequential2_ (
  Parameter sequential2_dense0_weight (shape=(4, 5), dtype=float32)
  Parameter sequential2_dense0_bias (shape=(4,), dtype=float32)
  Parameter sequential2_dense1_weight (shape=(2, 4), dtype=float32)
  Parameter sequential2_dense1_bias (shape=(2,), dtype=float32)
)


In [31]:
# 共享模型参数

_net = nn.Sequential()
with _net.name_scope():
    _net.add(nn.Dense(4, in_units=4, activation='relu'))
    _net.add(nn.Dense(4, in_units=4, activation='relu', params=_net[-1].params))
    _net.add(nn.Dense(2, in_units=4))

_net.initialize(_MyInit())
print(_net[0].weight.data())
print(_net[1].weight.data())

init weight (4, 4)
init weight (2, 4)

[[6.439955  6.450388  6.917311  8.090076 ]
 [8.74585   7.1438437 9.3922615 5.6773705]
 [5.5143166 6.4914117 5.4618692 7.8498244]
 [6.770233  7.954364  7.7590814 7.8716264]]
<NDArray 4x4 @cpu(0)>

[[6.439955  6.450388  6.917311  8.090076 ]
 [8.74585   7.1438437 9.3922615 5.6773705]
 [5.5143166 6.4914117 5.4618692 7.8498244]
 [6.770233  7.954364  7.7590814 7.8716264]]
<NDArray 4x4 @cpu(0)>
