In [2]:
from mxnet import autograd, np, npx

npx.set_np()

x = np.arange(4.0)
x

array([0., 1., 2., 3.])

In [4]:
x.attach_grad()
x.grad

array([0., 0., 0., 0.])

In [5]:
with autograd.record():
    # 2(x.x)
    y = 2 * np.dot(x, x)
y

array(28.)

In [6]:
y.backward()
x.grad

array([ 0.,  4.,  8., 12.])

In [8]:
# gradient of 2(x.x) should be 4x
x.grad == 4*x

array([ True,  True,  True,  True])

In [9]:
# gradient of sum(x) is 1
with autograd.record():
    y = x.sum()
y.backward()
x.grad

array([1., 1., 1., 1.])

In [11]:
# gradient of x^2 is 2x
with autograd.record():
    y = x * x
y.backward()
x.grad

array([0., 2., 4., 6.])

In [17]:
# calculates gradient of z with respect to x, ignoring y. u is treated as a constant
with autograd.record():
    y = x * x
    u = y.detach()
    z = u * x
z.backward()
x.grad == u

array([ True,  True,  True,  True])

In [18]:
# we can still get the gradient of y with respect to x though. should be 2x
y.backward()
x.grad == 2 * x

array([ True,  True,  True,  True])

In [19]:
def f(a):
    b = a * 2
    while np.linalg.norm(b) < 1000:
        b = b * 2
    if b.sum() > 0:
        c = b
    else:
        c = 100 * b
    return c

In [20]:
a = np.random.normal()
a.attach_grad()
with autograd.record():
    d = f(a)
d.backward()

In [22]:
a.grad == d / a

array(True)