# Example 1. Derivative of simple scalar function

In [1]:
import torch

$$ y = x^2 + 7x + 20 $$

In [2]:
def f(x):
    return x**2 + 7*x + 20

$$ \partial y / \partial x = 2x + 7 $$

In [3]:
x = torch.tensor(-3.0, requires_grad=True)

In [4]:
y = f(x)

In [5]:
y

tensor(8., grad_fn=<AddBackward0>)

In [6]:
y.backward()

In [7]:
x.grad

tensor(1.)

# Example 2. Derivative of sigmoid function

In [8]:
z = torch.tensor([5.0, -2.0, -1.0, 3.0], requires_grad=True)
z

tensor([ 5., -2., -1.,  3.], requires_grad=True)

In [9]:
a = torch.sigmoid(z)

In [10]:
a

tensor([0.9933, 0.1192, 0.2689, 0.9526], grad_fn=<SigmoidBackward0>)

In [11]:
a[2].backward()

In [12]:
z.grad

tensor([0.0000, 0.0000, 0.1966, 0.0000])

In [13]:
a[2]*(1-a[2])

tensor(0.1966, grad_fn=<MulBackward0>)

In [14]:
import torch

In [15]:
z = torch.tensor([5.0, -2.0, -1.0, 3.0], requires_grad=True)
z

tensor([ 5., -2., -1.,  3.], requires_grad=True)

In [16]:
a = torch.softmax(z, dim=0)

In [17]:
a

tensor([8.7817e-01, 8.0079e-04, 2.1768e-03, 1.1885e-01],
       grad_fn=<SoftmaxBackward0>)

In [18]:
a[2].backward()

In [19]:
z.grad

tensor([-1.9116e-03, -1.7431e-06,  2.1720e-03, -2.5871e-04])

In [20]:
torch.diag(a) - torch.mm( a.view(4, 1), a.view(1, 4) )

tensor([[ 1.0698e-01, -7.0323e-04, -1.9116e-03, -1.0437e-01],
        [-7.0323e-04,  8.0015e-04, -1.7431e-06, -9.5172e-05],
        [-1.9116e-03, -1.7431e-06,  2.1720e-03, -2.5871e-04],
        [-1.0437e-01, -9.5172e-05, -2.5871e-04,  1.0472e-01]],
       grad_fn=<SubBackward0>)