## 创建数据集
使用相同数据集

In [1]:
from mxnet import ndarray as nd
from mxnet import autograd

num_inputs = 2
num_examples = 1000

true_w = [2, -3.4]
true_b = 4.2

x = nd.random_normal(shape = (num_examples, num_inputs))
y = true_w[0] * x[:, 0] + true_w[1] * x[:, 1] + true_b
y += 0.01 * nd.random_normal(shape = y.shape)

In [2]:
from mxnet import gluon

### 数据读取

In [3]:
batch_size = 10
dataset = gluon.data.ArrayDataset(x,y)
data_iter = gluon.data.DataLoader(dataset, batch_size, shuffle = True)

### 定义模型
用Sequential把所有的层串接起来,先定义的是一个空的模型

In [6]:
net = gluon.nn.Sequential()

In [7]:
net.add(gluon.nn.Dense(1))

In [8]:
net

Sequential(
  (0): Dense(None -> 1, linear)
)

### 初始化模型参数
使用Sequential所带有的方法进行初始化

In [9]:
net.initialize()

### 损失函数
gluon提供了平方损失函数

In [10]:
square_loss = gluon.loss.L2Loss()

### 优化
进行随机梯度下降，创建一个Trainer实例， 并且将模型参数传过去

In [13]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate' : 0.1})

### 训练
这里不再调用SGC, 而是使用trainer.step来更新

In [16]:
epochs = 5
batch_size = 10
for e in range(epochs):
    total_loss = 0
    for data,label in data_iter:
        with autograd.record():
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()
        trainer.step(batch_size)
        total_loss += nd.sum(loss).asscalar()
    print("Epochs: %d, average loss: %f" % (e, total_loss/num_examples))

Epochs: 0, average loss: 0.671321
Epochs: 1, average loss: 0.000049
Epochs: 2, average loss: 0.000049
Epochs: 3, average loss: 0.000049
Epochs: 4, average loss: 0.000049


### 从net中拿到需要的层，访问权重和位移 

In [17]:
dense = net[0]
dense.weight.data()


[[ 1.999774  -3.4003794]]
<NDArray 1x2 @cpu(0)>

In [18]:
dense.bias.data()


[4.2001123]
<NDArray 1 @cpu(0)>

In [19]:
help(dense.weight)

Help on Parameter in module mxnet.gluon.parameter object:

class Parameter(builtins.object)
 |  A Container holding parameters (weights) of Blocks.
 |  
 |  :py:class:`Parameter` holds a copy of the parameter on each :py:class:`Context` after
 |  it is initialized with ``Parameter.initialize(...)``. If :py:attr:`grad_req` is
 |  not ``'null'``, it will also hold a gradient array on each :py:class:`Context`::
 |  
 |      ctx = mx.gpu(0)
 |      x = mx.nd.zeros((16, 100), ctx=ctx)
 |      w = mx.gluon.Parameter('fc_weight', shape=(64, 100), init=mx.init.Xavier())
 |      b = mx.gluon.Parameter('fc_bias', shape=(64,), init=mx.init.Zero())
 |      w.initialize(ctx=ctx)
 |      b.initialize(ctx=ctx)
 |      out = mx.nd.FullyConnected(x, w.data(ctx), b.data(ctx), num_hidden=64)
 |  
 |  Parameters
 |  ----------
 |  name : str
 |      Name of this parameter.
 |  grad_req : {'write', 'add', 'null'}, default 'write'
 |      Specifies how to update gradient to grad arrays.
 |  
 |      - ``'wr

In [20]:
dense.weight.grad()


[[-0.01373221  0.0192259 ]]
<NDArray 1x2 @cpu(0)>