In [2]:
from mxnet.gluon import nn
from mxnet import nd

def get_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Dense(4, activation="relu"))
        net.add(nn.Dense(2))
    return net

x = nd.random.uniform(shape=(3,5))
x


[[ 0.63196146  0.28536561  0.32417494  0.39529243  0.07626633]
 [ 0.07499499  0.81058359  0.79967439  0.91248471  0.53449339]
 [ 0.85158187  0.67423069  0.05224402  0.81195873  0.76282388]]
<NDArray 3x5 @cpu(0)>

In [3]:
import sys
try:
    net = get_net()
    net(x)
except RuntimeError as err:
    sys.stderr.write(str(err))

Parameter sequential0_dense0_weight has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks

In [4]:
net.initialize()
net(x)


[[ 0.0028941   0.00242514]
 [ 0.00286929  0.00397363]
 [ 0.0031244   0.00457098]]
<NDArray 3x2 @cpu(0)>

In [5]:
w = net[0].weight
b = net[0].bias
print('name: ', net[0].name, '\nweight: ', w, '\nbias: ', b)

name:  sequential0_dense0 
weight:  

KeyError: 'shape'

In [6]:
print('weight:', w.data())
print('weight gradient', w.grad())
print('bias:', b.data())
print('bias gradient', b.grad())

weight: 
[[ 0.03638195  0.03401168  0.03667891  0.04761275  0.03341197]
 [ 0.04545165 -0.02250378 -0.00240316  0.01745223 -0.05132004]
 [ 0.03046847 -0.03716864  0.0205081  -0.04182778 -0.00910603]
 [ 0.04730445  0.06650987 -0.00573307 -0.05009212 -0.00125728]]
<NDArray 4x5 @cpu(0)>
weight gradient 
[[ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]]
<NDArray 4x5 @cpu(0)>
bias: 
[ 0.  0.  0.  0.]
<NDArray 4 @cpu(0)>
bias gradient 
[ 0.  0.  0.  0.]
<NDArray 4 @cpu(0)>


In [7]:
params = net.collect_params()
print(params)
print(params['sequential0_dense0_bias'].data())
print(params.get('dense0_weight').data())

KeyError: 'shape'

In [8]:
from mxnet import init
params.initialize(init=init.Normal(sigma=0.02), force_reinit=True)
print(net[0].weight.data(), net[0].bias.data())


[[-0.00022226 -0.00872944  0.01066942  0.00965733  0.00144618]
 [-0.00224081  0.02311502 -0.00355643  0.04915959 -0.01052857]
 [-0.00939048 -0.00198139 -0.02342937 -0.01300106  0.02418257]
 [-0.01333306  0.03112638 -0.01932316  0.00266476 -0.03635802]]
<NDArray 4x5 @cpu(0)> 
[ 0.  0.  0.  0.]
<NDArray 4 @cpu(0)>


In [9]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(4, activation="relu"))
    net.add(nn.Dense(4, activation="relu"))
    net.add(nn.Dense(4, activation="relu", params=net[-1].params))
    net.add(nn.Dense(2))

In [10]:
x


[[ 0.63196146  0.28536561  0.32417494  0.39529243  0.07626633]
 [ 0.07499499  0.81058359  0.79967439  0.91248471  0.53449339]
 [ 0.85158187  0.67423069  0.05224402  0.81195873  0.76282388]]
<NDArray 3x5 @cpu(0)>

In [11]:
net.initialize()
net(x)
print(net[1].weight.data())
print(net[2].weight.data())


[[-0.06604636  0.01222098 -0.03204519 -0.05303434]
 [-0.02752202 -0.0690815   0.01378246  0.00217288]
 [ 0.03288645 -0.0359733   0.03304712 -0.06168571]
 [-0.00429957 -0.04437919 -0.01383216  0.05596807]]
<NDArray 4x4 @cpu(0)>

[[-0.06604636  0.01222098 -0.03204519 -0.05303434]
 [-0.02752202 -0.0690815   0.01378246  0.00217288]
 [ 0.03288645 -0.0359733   0.03304712 -0.06168571]
 [-0.00429957 -0.04437919 -0.01383216  0.05596807]]
<NDArray 4x4 @cpu(0)>


In [12]:
print(net[0].weight.data())


[[ 0.01771     0.05402178 -0.02039515 -0.04567731 -0.05327704]
 [-0.05513101  0.0098961  -0.02168003  0.04066133  0.02287553]
 [-0.03386672  0.00060579 -0.04480089 -0.00195265  0.0005429 ]
 [-0.0331454  -0.02471167 -0.02431917 -0.05398483 -0.01459472]]
<NDArray 4x5 @cpu(0)>


In [13]:
# 通过重载_init_weight来实现不同的初始化方法
class MyInit(init.Initializer):
    def __init__(self):
        super(MyInit, self).__init__()
        self._verbose = True
    def _init_weight(self, _, arr):
        # 初始化权重，使用out=arr后我们不需指定形状
        print('init weight', arr.shape)
        nd.random.uniform(low=5, high=10, out=arr)

net = get_net()
net.initialize(MyInit())
net(x)
net[0].weight.data()

init weight (4, 5)
init weight (2, 4)



[[ 5.88147593  6.11206722  9.36877823  9.61968708  8.36958885]
 [ 8.76629639  9.8288765   7.79646254  7.8143549   6.83985138]
 [ 7.35032177  5.3169198   6.54792309  5.91882801  8.3103075 ]
 [ 8.71828175  6.23404789  6.57249832  5.95056915  9.08364677]]
<NDArray 4x5 @cpu(0)>