In [1]:
#!/usr/bin/python3
import torch
import torch.autograd as tag
from torch.autograd.functional import jacobian, hessian

In [14]:
w = torch.tensor(5., requires_grad=True)
def f(x):
    return 2*x**3 + w*x**2 - 6*x
def fprime(x):
    return torch.diag(jacobian(func=f, inputs=x, create_graph=True))
def fprime_actual(x):
    return 6*x**2 + 2*w*x - 6
def fprimeprime_actual(x):
    return 12*x   + 2*w

x = torch.linspace(0,1,10)
x = torch.arange(10)
x = torch.tensor([3.], requires_grad=True)

x.requires_grad=True
f_x  = fprime(x)
f_xx = torch.diag(jacobian(func=fprime, inputs=x, create_graph=True))

print(f_x)
print()
print(fprime_actual(x))
print()
print(f_xx)
print()
print(fprimeprime_actual(x))
print()
def mse(x):
    return torch.sum(x**2).mean()

loss = mse(f(x) + f_x + f_xx)
# loss = f(x) + f_x + f_xx
loss.backward()
print(w.grad)

tensor([78.], grad_fn=<DiagBackward>)

tensor([78.], grad_fn=<SubBackward0>)

tensor([46.], grad_fn=<DiagBackward>)

tensor([46.], grad_fn=<AddBackward0>)

tensor(6970.)


In [1]:
d2ydx2 = tag.grad(dydx, x, create_graph=True)[0]
print(d2ydx2)   # should equal 46

# we can evaluate partial derivatives w.r.t. w one at a time
#f = y       # should equal 9
#f = dydx    # should equal 6
f = d2ydx2  # should equal 2

# i'm using .backward() here in place of tag.grad() to simulate
# how the neural network optimization function will expect
# gradients of the loss function to be evaluated.
#
# with .backward(), the gradients need to be explicitly zeroed
# after each call; my experimentation shows that multiple calls
# to tag.grad() followed by a single call to .backward()
# produces the correct derivatives without the explicit need to
# re-zero intermediate gradients.
f.backward()
print(w.grad)

#####################################
# CALCULATION 2: SUM OF DERIVATIVES #
#####################################
# reset
x = torch.tensor(3., requires_grad=True)
w = torch.tensor(5., requires_grad=True)

# recalculate gradients
y = 2*x**3 + w*x**2 - 6*x
dydx = tag.grad(y, x, create_graph=True)[0]
d2ydx2 = tag.grad(dydx, x, create_graph=True)[0]

# calculate the gradient w.r.t. w of the sum of all three
f = y + dydx + d2ydx2
f.backward()
print(w.grad)   # should equal 9 + 6 + 2 = 17

tensor(81., grad_fn=<SubBackward0>)
tensor(78., grad_fn=<AddBackward0>)
tensor(46., grad_fn=<AddBackward0>)
tensor(2.)
tensor(17.)
