In [2]:
from mxnet import gluon
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(256, activation='relu'))
    net.add(gluon.nn.Dense(10))
print(net)

Sequential(
  (0): Dense(None -> 256, Activation(relu))
  (1): Dense(None -> 10, linear)
)


## 使用nn.Block定义网络

In [3]:
from mxnet import ndarray as nd
class MLP(gluon.nn.Block):
    def __init__(self,**kwargs):
        super(MLP,self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(256)
            self.dense1 = gluon.nn.Dense(10)
    def forward(self,x):
        return self.dense1(nd.relu(self.dense0))
#没有参数的层可以在forward中进行定义

使用起来同样进行实例化再进行初始化

In [4]:
net = MLP()
print(net)
net.initialize()

MLP(
  (dense0): Dense(None -> 256, linear)
  (dense1): Dense(None -> 10, linear)
)


观察MLP里面使用的其他命令:<p>
    __init__(self,**kwargs):  
    调用了gluon.nn.Block中的__init__函数，提供了prefix(指定名字)和params(指定模型参数两个参数)  
     self.name_scope():  
    调用了nn.Block中提供的name_scope()函数，给里面所有的层提供了prefix，使其在系统中独一无二

In [6]:
print('default prefix: ',net.dense0.name)
net2 = MLP(prefix='another_mlp_')
print('customized prefix',net2.dense0.name)

default prefix:  mlp0_dense0
customized prefix another_mlp_dense0


## nn.Block
在gluon中,nn.Block是一个一般化部件，可以进行嵌套
提供了：  
1.存储参数  
2.forward如何执行  
3.自动求导  
而nn.Sequential是一个nn.Block容器，通过add添加nn,Block，自动生成forward函数。  
一个简单实现:

In [7]:
class Sequential(gluon.nn.Block):
    def __init__(self,**kwargs):
        super(Sequential,self).__init__(**kwargs)
    def add(self,block):
        self._children.append(block)
    def forward(self,x):
        for block in self._children:
            x = block(x)
        return x

## 初始化模型参数 

In [9]:
def get_net():
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.Dense(4,activation='relu'))
        net.add(gluon.nn.Dense(2))
    return net
x = nd.random.uniform(shape=(3,5))

不initialize时：

In [10]:
import sys
try:
    net = get_net()
    net(x)
except RuntimeError as err:
    sys.stderr.write(str(err))

Parameter 'sequential1_dense0_weight' has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks

In [11]:
net.initialize()
net(x)


[[0.00212593 0.00365805]
 [0.00161272 0.00441845]
 [0.00204872 0.00352518]]
<NDArray 3x2 @cpu(0)>

In [12]:
w = net[0].weight
b = net[0].bias
print('name:',net[0].name,'\nweight:',w,'\nbias:',b)

name: sequential1_dense0 
weight: Parameter sequential1_dense0_weight (shape=(4, 5), dtype=float32) 
bias: Parameter sequential1_dense0_bias (shape=(4,), dtype=float32)


In [14]:
print('weight_data:',w.data())
print('weight_grad:',w.grad())

weight_data: 
[[-0.06206018  0.06491279 -0.03182812 -0.01631819 -0.00312688]
 [ 0.0408415   0.04370362  0.00404529 -0.0028032   0.00952624]
 [-0.01501013  0.05958354  0.04705103 -0.06005495 -0.02276454]
 [-0.0578019   0.02074406 -0.06716943 -0.01844618  0.04656678]]
<NDArray 4x5 @cpu(0)>
weight_grad: 
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
<NDArray 4x5 @cpu(0)>


通过collect_params来访问Block里面的所有参数，将获得一个名字对应的参数的dict,可以使用正常的[]访问参数，也可以使用get来访问，且不需要填写名字的前缀

In [22]:
params = net.collect_params()
print(params)
print(params['sequential1_dense0_weight'].data())
print(params.get('dense0_weight').data())

sequential1_ (
  Parameter sequential1_dense0_weight (shape=(4, 5), dtype=float32)
  Parameter sequential1_dense0_bias (shape=(4,), dtype=float32)
  Parameter sequential1_dense1_weight (shape=(2, 4), dtype=float32)
  Parameter sequential1_dense1_bias (shape=(2,), dtype=float32)
)

[[-0.06206018  0.06491279 -0.03182812 -0.01631819 -0.00312688]
 [ 0.0408415   0.04370362  0.00404529 -0.0028032   0.00952624]
 [-0.01501013  0.05958354  0.04705103 -0.06005495 -0.02276454]
 [-0.0578019   0.02074406 -0.06716943 -0.01844618  0.04656678]]
<NDArray 4x5 @cpu(0)>

[[-0.06206018  0.06491279 -0.03182812 -0.01631819 -0.00312688]
 [ 0.0408415   0.04370362  0.00404529 -0.0028032   0.00952624]
 [-0.01501013  0.05958354  0.04705103 -0.06005495 -0.02276454]
 [-0.0578019   0.02074406 -0.06716943 -0.01844618  0.04656678]]
<NDArray 4x5 @cpu(0)>


## 使用不同初始函数来初始化参数
也可以自己写一个init函数，需要写__init__,_init_weight以及_init_bias

In [23]:
from mxnet import init
params.initialize(init = init.Normal(sigma=0.2),force_reinit=True)
params.initialize(init = init.One(),force_reinit=True)

## 延后的初始化
在输入x之后,w和b的尺寸可以确认若需避免，需要对in_units大小进行赋值

## 进行参数共享
在nn.Dense中，对参数进行赋值params = net[i].params
那么在求导数的时候呢？


## 练习
1.研究net.collect_params()返回的是什么,net.params呢?  
2.如何对每个层使用不同的初始化函数  
3.如果两个层公用一个参数，求梯度的时候会发生什么  