In [1]:
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon

num_inputs = 2
num_examples = 1000

true_w = [2, -3.4]
true_b = 4.2

X = nd.random_normal(shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += .01 * nd.random_normal(shape=y.shape)

In [2]:
batch_size = 10
dataset = gluon.data.ArrayDataset(X, y)
data_iter = gluon.data.DataLoader(dataset, batch_size, shuffle=True)

In [3]:
for data, label in data_iter:
    print(data, label)
    break


[[-0.84624261  2.31918931]
 [ 0.40309143 -0.62632275]
 [ 0.17280626  0.32586342]
 [ 0.19708104 -1.61753237]
 [-1.33007801  0.653795  ]
 [-0.33854252  1.38261044]
 [ 0.1243841  -0.78210461]
 [ 0.97388011  2.5685432 ]
 [-0.82135218  0.45946771]
 [-0.07879972 -0.65682143]]
<NDArray 10x2 @cpu(0)> 
[ -5.37132263   7.13942146   3.43410611  10.11131859  -0.67792529
  -1.17845225   7.0951705   -2.58344817   0.98947901   6.258183  ]
<NDArray 10 @cpu(0)>


In [4]:
net = gluon.nn.Sequential()

In [5]:
net.add(gluon.nn.Dense(1))

In [6]:
net.initialize()

In [7]:
square_loss = gluon.loss.L2Loss()

In [10]:
trainer = gluon.Trainer(
    net.collect_params(), 'sgd', {'learning_rate': 0.1})

In [11]:
epochs = 5
batch_size = 10
for e in range(epochs):
    total_loss = 0
    for data, label in data_iter:
        with autograd.record():
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()
        trainer.step(batch_size)
        total_loss += nd.sum(loss).asscalar()
    print("Epoch %d, average loss: %f" % (e, total_loss/num_examples))

Epoch 0, average loss: 0.000077
Epoch 1, average loss: 0.000047
Epoch 2, average loss: 0.000047
Epoch 3, average loss: 0.000046
Epoch 4, average loss: 0.000047


In [12]:
dense = net[0]
true_w, dense.weight.data()

([2, -3.4], 
 [[ 1.99922729 -3.39976001]]
 <NDArray 1x2 @cpu(0)>)

In [13]:
true_b, dense.bias.data()

(4.2, 
 [ 4.20017862]
 <NDArray 1 @cpu(0)>)