# Autograd
useful video: https://www.youtube.com/watch?v=OIenNRt2bjg

Neural network training requires differenctiation (during the back propagation in the training)
Autograd is pytorch's automatic differentiation engine
Computes partial derivatives using chain rule (Vector Jacobian product)

also sth about accumulating the gradient and empyting the gradient

In [1]:
import torch

## X without grad option

In [2]:
print("------ x without grad option --------")
x = torch.randn(3)
y = x+2
print(x)
print(y)
print(x.grad_fn)
print(y.grad_fn)

print("------ z which is a function of y and hence x--------")
z = y*y*3
print(z)
print(z.grad_fn)

print("-----------")
z = z.mean()
print(z)
print(z.grad_fn)


------ x without grad option --------
tensor([ 0.1321, -1.4492, -3.5430])
tensor([ 2.1321,  0.5508, -1.5430])
None
None
------ z which is a function of y and hence x--------
tensor([13.6369,  0.9102,  7.1426])
None
-----------
tensor(7.2299)
None


## X with grad option

In [3]:
print("------ x with grad option --------")
# When requires_grad = True , it is tracked on a computational graph
x = torch.randn(3, requires_grad = True)
y = x+2
y = y.mean()
print(x)
print(y)
print(x.grad_fn)
print(y.grad_fn)

print("------ z which is a function of y and hence x-------")
z = y*y*3
print(z)
print(z.grad_fn)

print("-----------")
z = z.mean()
print(z)
print(z.grad_fn)

print(" ---- y.backward and z.backward----")

# Note that you can calculates y and z backward only on scalars , not on vectors. 
# so y = y.mean() and z = z.mean()

print(x.grad)
y.backward() # dy/dx
print(x.grad)
z.backward() # dz/dx
print(x.grad)


------ x with grad option --------
tensor([0.9165, 1.7777, 0.7955], requires_grad=True)
tensor(3.1632, grad_fn=<MeanBackward0>)
None
<MeanBackward0 object at 0x78a1526b8a60>
------ z which is a function of y and hence x-------
tensor(30.0173, grad_fn=<MulBackward0>)
<MulBackward0 object at 0x78a1526b8a60>
-----------
tensor(30.0173, grad_fn=<MeanBackward0>)
<MeanBackward0 object at 0x78a1526b8a60>
 ---- y.backward and z.backward----
None
tensor([0.3333, 0.3333, 0.3333])
tensor([6.6597, 6.6597, 6.6597])


In [4]:
print("------ x with grad option --------")
# When requires_grad = True , it is tracked on a computational graph
x = torch.randn(3, requires_grad = True)
y = x+2
y = y.mean()
print(x)
print(y)
print(x.grad_fn)
print(y.grad_fn)

print("\n------ z which is a function of y and hence x-------")
z = y*y*3
print(z)
print(z.grad_fn)

print("\n-----------")
z = z.mean()
print(z)
print(z.grad_fn)


print("\n ---- z.backward-----")
print(x.grad)
z.backward() # dz/dx
print(x.grad)

------ x with grad option --------
tensor([-2.6937, -0.1593,  0.3369], requires_grad=True)
tensor(1.1613, grad_fn=<MeanBackward0>)
None
<MeanBackward0 object at 0x78a1529132e0>

------ z which is a function of y and hence x-------
tensor(4.0459, grad_fn=<MulBackward0>)
<MulBackward0 object at 0x78a1529132e0>

-----------
tensor(4.0459, grad_fn=<MeanBackward0>)
<MeanBackward0 object at 0x78a1529132e0>

 ---- z.backward-----
None
tensor([2.3226, 2.3226, 2.3226])


In [5]:
print("------ x,w with grad option --------")
# When requires_grad = True , it is tracked on a computational graph
x1 = torch.randn(3, requires_grad = True)
w1 = torch.randn(3, requires_grad = True)
y1 = 2*w1 + x1 + 2
y1 = y1.mean()
print(x1)
print(y1)
print(x1.grad_fn)
print(y1.grad_fn)

print("\n------ z which is a function of y and hence x,w-------")
z1 = y1*y1*3
print(z1)
print(z1.grad_fn)

print("\n-----------")
z1 = z1.mean()
print(z1)
print(z1.grad_fn)

print("\n ---- y.backward and z.backward----")

print(x1.grad)
# retain graph is needed so that intermediary results are stored
y1.backward(retain_graph=True) # dy/dx and dy/dw ?? 
print(x1.grad)
z1.backward(retain_graph=True) # dz/dx and dy/dw ??
print(x1.grad)


------ x,w with grad option --------
tensor([ 0.5146, -0.1458,  0.1280], requires_grad=True)
tensor(2.7758, grad_fn=<MeanBackward0>)
None
<MeanBackward0 object at 0x78a15270d150>

------ z which is a function of y and hence x,w-------
tensor(23.1157, grad_fn=<MulBackward0>)
<MulBackward0 object at 0x78a15270d150>

-----------
tensor(23.1157, grad_fn=<MeanBackward0>)
<MeanBackward0 object at 0x78a15270d150>

 ---- y.backward and z.backward----
None
tensor([0.3333, 0.3333, 0.3333])
tensor([5.8850, 5.8850, 5.8850])
