In [1]:
import random

from mxnet import ndarray as nd
from mxnet import autograd as ag


num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
batch_size = 10

x = nd.random_normal(shape=(num_examples, num_inputs))
y = true_w[0] * x[:, 0] + true_w[1] * x[:, 1] + true_b
y += .01 *nd.random_normal(shape=y.shape)

In [2]:
def data_iter():
    idx = list(range(num_examples))
    random.shuffle(idx)
    for i in range(0, num_examples, batch_size):
        j = nd.array(idx[i:min(i+batch_size, num_examples)])
        yield nd.take(x, j), nd.take(y, j)


w = nd.random_normal(shape=(num_inputs, 1))
b = nd.zeros((1, ))
params = [w, b]
for param in params:
    param.attach_grad()

In [3]:
def net(x):
    return nd.dot(x, w) + b


def squar_loss(yhat, y):
    return (yhat - y.reshape(yhat.shape)) ** 2


def sgd(params, lr):
    for param in params:
        param[:] = param - lr * param.grad

In [4]:
epochs = 5
learning_rate = .001
for e in range(epochs):
    total_loss = 0
    for data, label in data_iter():
        with ag.record():
            output = net(data)
            loss = squar_loss(output, label)
        loss.backward()
        sgd(params, learning_rate)
        total_loss += nd.sum(loss).asscalar()
    print('Epoch %d, average loss: %f' %(e, total_loss/num_examples))

Epoch 0, average loss: 6.857532
Epoch 1, average loss: 0.131342
Epoch 2, average loss: 0.002627
Epoch 3, average loss: 0.000152
Epoch 4, average loss: 0.000102


In [7]:
print(w, true_w)
print(b, true_b)


[[ 2.00007  ]
 [-3.3999279]]
<NDArray 2x1 @cpu(0)> [2, -3.4]

[4.199747]
<NDArray 1 @cpu(0)> 4.2
