## NDArray

In [17]:
import mxnet as mx
from mxnet import nd
mx.random.seed(1)

In [15]:
x = nd.empty((3, 4))
print(x)


[[  0.00000000e+00  -3.68934881e+19   0.00000000e+00  -3.68934881e+19]
 [  1.66400404e-30   1.40129846e-45   1.66875962e-30   1.40129846e-45]
 [  1.66875962e-30   1.40129846e-45   1.70322007e-03   2.78366539e-40]]
<NDArray 3x4 @cpu(0)>


In [16]:
X = nd.arange(24).reshape((2, 3, 4))

In [68]:
nd.sum(X)


[ 62.67525864]
<NDArray 1 @cpu(0)>

In [11]:
probabilities = nd.ones(6) / 6
probabilities


[ 0.16666667  0.16666667  0.16666667  0.16666667  0.16666667  0.16666667]
<NDArray 6 @cpu(0)>

In [13]:
nd.arange(5).reshape((1,5))


[[ 0.  1.  2.  3.  4.]]
<NDArray 1x5 @cpu(0)>

In [18]:
import numpy as np

In [19]:
def transform(data, label):
    return (nd.floor(data/128)).astype(np.float32), label.astype(np.float32)

In [69]:
mnist_train = mx.gluon.data.vision.MNIST(train=True, transform=transform)
mnist_test = mx.gluon.data.vision.MNIST(train=False, transform=transform)

In [47]:
ycount = nd.ones(shape=(10))
xcount = nd.ones(shape=(784, 10))

In [44]:
for data, label in mnist_train:
    x = data.reshape((784,))
    y = int(label)
    ycount[y] += 1
    xcount[:, y] += x
    

In [48]:
for i in range(10):
    xcount[:, i] = xcount[:, i]/ycount[i]

In [51]:
import matplotlib.pyplot as plt
fig, figarr = plt.subplots(1, 10, figsize=(15, 15))
for i in range(10):
    figarr[i].imshow(xcount[:, i].reshape((28, 28)).asnumpy(), cmap='hot')
    figarr[i].axes.get_xaxis().set_visible(False)
    figarr[i].axes.get_yaxis().set_visible(False)

plt.show()

In [52]:
import mxnet as mx
from mxnet import nd, autograd
mx.random.seed(1)

In [58]:
x = nd.array([[1, 2], [3, 4]])
x.attach_grad()
with autograd.record():
    y = x * 2
    z = y * x
z.backward()
print(x.grad)


[[  4.   8.]
 [ 12.  16.]]
<NDArray 2x2 @cpu(0)>


In [59]:
from __future__ import print_function
import mxnet as mx
from mxnet import nd, autograd, gluon
mx.random.seed(1)

## Linear Regression ##

In [89]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
print(data_ctx)
print(model_ctx)

cpu(0)
cpu(0)


In [64]:
num_inputs = 2
num_outputs = 1
num_examples = 10000

def real_fn(X):
    return 2 * X[:, 0] - 3.4 * X[:, 1] + 4.2

X = nd.random_normal(shape=(num_examples, num_inputs), ctx=data_ctx)
noise = .1 * nd.random_normal(shape=(num_examples,), ctx=data_ctx)
y = real_fn(X) + noise

In [65]:
print(X[0])
print(y[0])


[-0.48563418  1.60415435]
<NDArray 2 @cpu(0)>

[-2.25296068]
<NDArray 1 @cpu(0)>


In [66]:
batch_size = 4
train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y),
                                      batch_size=batch_size, shuffle=True)

In [67]:
w = nd.random_normal(shape=(num_inputs, num_outputs), ctx=model_ctx)
b = nd.random_normal(shape=num_outputs, ctx=model_ctx)
params = [w, b]

In [76]:
for param in params:
    param.attach_grad()

In [77]:
def net(X):
    return mx.nd.dot(X, w) + b

In [78]:
def square_loss(yhat, y):
    return nd.mean((yhat - y) ** 2)

In [79]:
def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad

In [90]:
epochs = 10
learning_rate = .0001
num_batches = num_examples/batch_size
for e in range(epochs):
    cumulative_loss = 0
    # inner loop
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx)
        label = label.as_in_context(model_ctx).reshape((-1, 1))
        with autograd.record():
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()
        SGD(params, learning_rate)
        cumulative_loss += loss.asscalar()
    print(cumulative_loss / num_batches)

0.00988283888692
0.0098828762029
0.0098828742602
0.00988287792697
0.009882837565
0.00988287159238
0.00988278206747
0.00988286801444
0.00988287745854
0.00988287407557


In [91]:
############################################
#    Re-initialize parameters because they
#    were already trained in the first loop
############################################
w[:] = nd.random_normal(shape=(num_inputs, num_outputs), ctx=model_ctx)
b[:] = nd.random_normal(shape=num_outputs, ctx=model_ctx)

############################################
#    Script to plot the losses over time
############################################
def plot(losses, X, sample_size=100):
    xs = list(range(len(losses)))
    f, (fg1, fg2) = plt.subplots(1, 2)
    fg1.set_title('Loss during training')
    fg1.plot(xs, losses, '-r')
    fg2.set_title('Estimated vs real function')
    fg2.plot(X[:sample_size, 1].asnumpy(),
             net(X[:sample_size, :]).asnumpy(), 'or', label='Estimated')
    fg2.plot(X[:sample_size, 1].asnumpy(),
             real_fn(X[:sample_size, :]).asnumpy(), '*g', label='Real')
    fg2.legend()

    plt.show()

learning_rate = .0001
losses = []
plot(losses, X)

for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx)
        label = label.as_in_context(model_ctx).reshape((-1, 1))
        with autograd.record():
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()
        SGD(params, learning_rate)
        cumulative_loss += loss.asscalar()

    print("Epoch %s, batch %s. Mean loss: %s" % (e, i, cumulative_loss/num_batches))
    losses.append(cumulative_loss/num_batches)

plot(losses, X)

Epoch 0, batch 2499. Mean loss: 30.9478561698
Epoch 1, batch 2499. Mean loss: 11.5304700437
Epoch 2, batch 2499. Mean loss: 4.29999789761
Epoch 3, batch 2499. Mean loss: 1.60748954448
Epoch 4, batch 2499. Mean loss: 0.604935379874
Epoch 5, batch 2499. Mean loss: 0.231497269925
Epoch 6, batch 2499. Mean loss: 0.0923481676151
Epoch 7, batch 2499. Mean loss: 0.0405698521162
Epoch 8, batch 2499. Mean loss: 0.0213062812812
Epoch 9, batch 2499. Mean loss: 0.0141362767211
