In [9]:
import torch

<b>Diff of function with scalar input and scalar output</b> \
y = 3x^2 + 4x + 2ux <br/>
<b>find:</b> 
dy/dx at x = 3
dy/dx at x = 2

then, \
z = 3*x**2 + x \
find: \
dz/dx at x = 3

then, \
find dy/dx at x = 7

In [10]:

x = torch.tensor(3.0, requires_grad=True)
u = torch.tensor(2.0, requires_grad=True)
y = 3*x**2 + 4*x + 2*u*x 

y.backward()
print('dy/dx for x = 3 : ', x.grad)
print('dy/dx for u = 2 : ', u.grad)

dy/dx for x = 3 :  tensor(26.)
dy/dx for u = 2 :  tensor(6.)


In [11]:
x.grad.zero_()
z = 3*x**2 + x
z.backward()
print('dz/dx at x = 3 : ', x.grad)

dz/dx at x = 3 :  tensor(19.)


In [12]:
x = torch.tensor(7.0, requires_grad=True)
y = 3*x**2 + 4*x + 2*u*x #you have to redefine y, otherwise it won't work
y.backward()
print(x.grad)

tensor(50.)


<b>Diff of function with vector input, scalar output</b> 


In [None]:
%reset #clears all variables and imports
import torch

<i>NOTE: torch.tensor([[1, 7, -2]]) and torch.tensor([1, 7, -2]) are not completely the same</i>

In [None]:
x = torch.tensor([[1, 7, -2]]) #this is a 2D tensor
print(x, x.t(), x.shape, x.t().shape, x@x.t())
#torch.dot(x, x) #throws an error, because dot product is only allowed for 1D tensor  

x = torch.tensor([1, 7, -2]) #this is an 1D tensor
print(x, x.t(), x.shape, x.t().shape, x@x.t())
torch.dot(x, x)

what does the following differentiation result mean?

In [85]:
x = torch.tensor([1.0, 7.0, -2.0], requires_grad=True)
y = 3*torch.dot(x, torch.tensor([1.0, 7.0, 5.0])) + x@x.t()
# print(y)
y.backward()
print(x.grad)

tensor(174., grad_fn=<AddBackward0>)
tensor([ 5., 35., 11.])


<b>Differentiation of function with vector output</b> \
grad can be normally created only for scalar output functions.\
for vector valued function a special tensor called "gradient" has to be passed to backward()\
<i> what does the output actually mean? </i>

In [1]:
%reset 
import torch

In [2]:
x = torch.tensor([1.0, 7.0, -2.0], requires_grad=True)
y = 3*x*x + x**2
print(y)
# y.backward() #throws error because: grad can be created only for scalar output functions
y.backward(gradient=torch.ones(len(y))) #meaning of the gradient tensor => page 86 of dive into deep learning book
print(x.grad)

tensor([  4., 196.,  16.], grad_fn=<AddBackward0>)
tensor([  8.,  56., -16.])


<b>Differentiation of functions defined with python control flow</b> \


In [11]:
def f(a):
    b = a*2
    while b.norm()<1000:
        b = b*2
    if b.sum()>0:
        c = b
    else:
        c = 100 * b 
    return c 

# f.backward() #will throw error
a = torch.tensor([5.0], requires_grad=True)
d = f(a)
d.backward()
print('df/da at a = 5: ', a.grad)

df/da at a = 5:  tensor([256.])
