In [1]:
%matplotlib inline
import mxnet as mx
import numpy as np

Imperative
The basic building block for the imperative API is an NDArray. Much like Numpy, this object holds a tensor (or multi-dimensional array). Unlike Numpy, this object also stores a pointer to where the memory is held (CPU or GPU).

In [2]:
cpu_tensor = mx.nd.zeros((10,), ctx=mx.cpu())
gpu_tensor = mx.nd.zeros((10,), ctx=mx.gpu())

In [3]:
ctx = mx.gpu()
a = mx.nd.ones((10,), ctx=ctx)
b = mx.nd.ones((10,), ctx=ctx)
c = (a*5 + b) / 2
d = c + 1
print a.asnumpy(), b.asnumpy(), c.asnumpy(), d.asnumpy()

[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.] [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.] [ 3.  3.  3.  3.  3.  3.  3.  3.  3.  3.] [ 4.  4.  4.  4.  4.  4.  4.  4.  4.  4.]


Symbolic
While the imperative API is extremely powerful by itself, it is often very rigid and hard to prototype with. Everything must be known about the computation ahead of time, and must be written out by the user. The symbolic API tries to remedy this. Instead of working with defined arrays, you work with symbols that can be “compiled” or interpreted to a executable set of operations.

In [4]:
a1 = mx.sym.Variable('A')
b1 = mx.sym.Variable('B')
c1 = (a1*5 + b1) / 2
d1 = c1 + 1
print d1.list_arguments()
print d1.list_outputs()

['A', 'B']
['_plusscalar0_output']


In [5]:
input_shapes = {'A':(10,), 'B':(10,)}
arg_shape, out_shape, aux_shape = d1.infer_shape(**input_shapes)
print arg_shape, out_shape, aux_shape

[(10L,), (10L,)] [(10L,)] []


In [6]:
input_arguments = {}
input_arguments['A'] = mx.nd.ones((10,), ctx=ctx)
input_arguments['B'] = mx.nd.ones((10,), ctx=ctx)
executor = d1.bind(
    ctx = ctx,
    args = input_arguments,
    grad_req = 'null'
)

In [7]:
executor.arg_dict

{'A': <NDArray 10 @gpu(0)>, 'B': <NDArray 10 @gpu(0)>}

In [11]:
executor.arg_dict['A'][:] = np.random.rand(10,)
executor.arg_dict['B'][:] = np.random.rand(10,)
executor.forward()
executor.outputs[0].asnumpy()

array([ 2.60857749,  2.92020893,  3.51389575,  2.54067516,  2.88675952,
        3.11647558,  1.49772048,  3.15574646,  2.55480313,  3.22973537], dtype=float32)

In [12]:
grad_arguments = {}
grad_arguments['A'] = mx.nd.ones((10,), ctx=ctx)
grad_arguments['B'] = mx.nd.ones((10,), ctx=ctx)
executor = d1.bind(
    ctx = ctx,
    args = input_arguments,
    args_grad = grad_arguments,
    grad_req = 'write'
)

In [15]:
executor.arg_dict

{'A': <NDArray 10 @gpu(0)>, 'B': <NDArray 10 @gpu(0)>}

In [17]:
executor.forward(is_train=True)
out_grad = mx.nd.ones((10,), ctx=ctx)
#这部分每太看懂什么意思，文档解释如下
# in this particular example, the output symbol is not a scalar or loss symbol.
# Thus taking its gradient is not possible.
# What is commonly done instead is to feed in the gradient from a future computation.
# this is essentially how backpropagation works.
executor.backward([out_grad])

In [23]:
print executor.grad_arrays[0].asnumpy()
print executor.grad_arrays[1].asnumpy()

[ 2.5  2.5  2.5  2.5  2.5  2.5  2.5  2.5  2.5  2.5]
[ 0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5]
