#### Import of the Packages

In [1]:
import torch
import session_info
session_info.show()

##### Creation of a gradient

In [2]:
x=torch.randn((3,3), requires_grad= True)
print(x)

tensor([[ 0.1143,  0.2841, -0.5264],
        [-0.6123,  0.7081, -0.4720],
        [ 0.6106,  2.2232, -0.3975]], requires_grad=True)


In [3]:
y= x+ 3 # Pytorch create the backward propagation automatically (grad_fn) in function of the operation(here :'Add')
print(y)

tensor([[3.1143, 3.2841, 2.4736],
        [2.3877, 3.7081, 2.5280],
        [3.6106, 5.2232, 2.6025]], grad_fn=<AddBackward0>)


In [4]:
z= y*y*4 # (here:'Mul')
print(z)

tensor([[ 38.7956,  43.1403,  24.4749],
        [ 22.8051,  54.9995,  25.5625],
        [ 52.1460, 109.1273,  27.0919]], grad_fn=<MulBackward0>)


In [5]:
z=z.mean() # backward propagation take also into account function as mean
print(z)

tensor(44.2381, grad_fn=<MeanBackward0>)


In [6]:
z.backward() # Calcul of the gradient of z in function of x
print(x.grad) # x have a gradient attribute where the gradients are stored

tensor([[2.7683, 2.9192, 2.1988],
        [2.1224, 3.2961, 2.2471],
        [3.2094, 4.6428, 2.3133]])


In [7]:
# Note that z must be a scalar in order to perform the backprogation or we need to create a tensor of the same shape than our ouptup and pass it as an argument of the backwardfunction
x=torch.randn((3,3), requires_grad= True)
y= x+3
z= y*y*4
print(z)
v= torch.randn((3,3)) # if we don't write a similar line, we have an error because z as shape of (3,3) and is not a scalar
z.backward(v)
print(x.grad)

tensor([[52.8347, 77.1650, 44.2443],
        [26.7257, 56.7161, 60.0763],
        [30.7208, 33.5716, 46.7049]], grad_fn=<MulBackward0>)
tensor([[-21.5975, -18.9212,   5.1556],
        [ 10.3257,  45.3074,  -5.8241],
        [-22.1698, -73.6094, -57.6298]])


##### Avoiding gradient calculation

In [8]:
x=torch.randn((3,3), requires_grad= True)
y=torch.randn((3,3), requires_grad= True)
a= x+y
y= y*y
z= a*y*4
z=z.mean()
print(x)
print(y)
print(a)
print(z)

tensor([[-1.5059, -1.4039, -1.5108],
        [ 0.6519,  0.1825,  0.1147],
        [ 0.6961,  1.2298, -0.9203]], requires_grad=True)
tensor([[0.1327, 0.0459, 0.0281],
        [0.8244, 2.1726, 0.0407],
        [0.1453, 0.0200, 3.4654]], grad_fn=<MulBackward0>)
tensor([[-1.1417, -1.1896, -1.3433],
        [ 1.5598,  1.6565,  0.3165],
        [ 0.3149,  1.0885, -2.7819]], grad_fn=<AddBackward0>)
tensor(-2.1862, grad_fn=<MeanBackward0>)


In [9]:
# there are three method to block gradient calculation:

# y.requires_grad_(False) #modify the variable in place 
# y.detach() # create a new variable which doesn't requires grad
# with torch.no_grad(): # make the calculation without grad for calculation in this statement

x=torch.randn((3,3), requires_grad= True)
y=torch.randn((3,3), requires_grad= True)
a= x+y
y.requires_grad_(False)
y= y*y
z= a*y*4
z=z.mean()
print(x)
print(y) # we see that y have no more grad_fn=<MulBackward0>
print(a)
print(z)

tensor([[ 1.6505,  1.1895,  0.0063],
        [-0.6821,  0.0202,  1.1396],
        [-1.5339, -0.8748,  1.5900]], requires_grad=True)
tensor([[3.6010e-02, 5.7549e-03, 1.6263e-01],
        [7.4891e-01, 6.9824e-01, 1.2357e-03],
        [1.0960e-01, 2.0458e+00, 5.9786e-02]])
tensor([[ 1.4607,  1.1136,  0.4095],
        [-1.5475,  0.8558,  1.1747],
        [-1.8649, -2.3051,  1.8345]], grad_fn=<AddBackward0>)
tensor(-2.3311, grad_fn=<MeanBackward0>)


In [10]:
x=torch.randn((3,3), requires_grad= True)
y=torch.randn((3,3), requires_grad= True)
a= x+y
b= y.detach()
y= b*b
z= a*y*4
z=z.mean()
print(x)
print(b) # we see that b have no grad_fn and neither y construct from b
print(y) 
print(a)
print(z)

tensor([[ 0.8030,  0.4487, -1.1967],
        [-0.1160, -0.4889,  0.1093],
        [-0.0298,  0.8566,  0.7563]], requires_grad=True)
tensor([[-0.6931, -0.8447, -0.1337],
        [ 1.3497, -0.4994,  1.0149],
        [-0.3147,  0.7554,  0.1300]])
tensor([[0.4804, 0.7136, 0.0179],
        [1.8218, 0.2494, 1.0301],
        [0.0990, 0.5706, 0.0169]])
tensor([[ 0.1099, -0.3961, -1.3304],
        [ 1.2337, -0.9883,  1.1243],
        [-0.3445,  1.6120,  0.8862]], grad_fn=<AddBackward0>)
tensor(1.6917, grad_fn=<MeanBackward0>)


In [11]:
x=torch.randn((3,3), requires_grad= True)
y=torch.randn((3,3), requires_grad= True)
a= x+y
with torch.no_grad():
    y= y*y
z= a*y*4
z=z.mean()
print(x)
print(y) # we see that y have no more grad_fn=<MulBackward0>
print(a)
print(z)

tensor([[ 0.4044,  0.4430, -0.9974],
        [ 1.7554, -0.5372, -2.4439],
        [-0.6931,  0.0660,  1.3092]], requires_grad=True)
tensor([[0.4598, 2.4854, 2.4870],
        [0.8949, 0.8754, 0.7962],
        [0.1314, 0.4976, 1.5905]])
tensor([[ 1.0825,  2.0195,  0.5796],
        [ 0.8094, -1.4729, -3.3362],
        [-0.3306, -0.6393,  0.0480]], grad_fn=<AddBackward0>)
tensor(1.5341, grad_fn=<MeanBackward0>)


In [12]:
x=torch.randn((3,3), requires_grad= True)
y=torch.randn((3,3), requires_grad= True)
with torch.no_grad():
    a= x+y
    y= y*y
z= a*y*4
z=z.mean() 
print(x)
print(y) 
print(a)
print(z) # we see that puting 'a' calculation in the with statement remove the grad_fn for z and a  

tensor([[-0.9041, -0.5193, -0.5964],
        [-2.2481, -0.4334,  0.4614],
        [ 0.3522, -0.9819, -1.5909]], requires_grad=True)
tensor([[4.0364, 0.2205, 3.3057],
        [0.2523, 0.3319, 2.6696],
        [1.1538, 1.8679, 4.4042]])
tensor([[ 1.1050, -0.0497,  1.2218],
        [-2.7504,  0.1427,  2.0953],
        [ 1.4263,  0.3848, -3.6895]])
tensor(-0.1999)


In [13]:
x=torch.randn((3,3), requires_grad= True)
y=torch.randn((3,3), requires_grad= True)
with torch.no_grad():
    a= x+y
y= y*y
z= a*y*4
z=z.mean() 
print(x)
print(y) 
print(a)
print(z) # and here the gradient will occur for y but not a and x

tensor([[ 0.0183,  0.6481,  0.1480],
        [ 0.8674,  0.9631, -0.9559],
        [-0.6429,  0.4659, -0.1904]], requires_grad=True)
tensor([[1.5266e-01, 3.2413e-01, 6.0584e-04],
        [5.1309e+00, 1.6359e+00, 9.0738e-01],
        [4.0881e-01, 1.1446e-02, 1.4528e-01]], grad_fn=<MulBackward0>)
tensor([[-0.3725,  0.0788,  0.1234],
        [ 3.1325, -0.3159, -1.9085],
        [-1.2823,  0.5729,  0.1907]])
tensor(5.9123, grad_fn=<MeanBackward0>)


##### Avoiding gradient accumulation during training

In [14]:
# When we call the backward function, the gradients will accumulate into the .grad function

x=torch.ones((3,3), requires_grad= True)
for epoch in range(3):
    output= (x*3).sum()
    output.backward()
    print(x.grad)

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.]])
tensor([[9., 9., 9.],
        [9., 9., 9.],
        [9., 9., 9.]])


In [15]:
# the gradients become incorrect, so we need to reinitialize the gradient
x=torch.ones((3,3), requires_grad= True)
for epoch in range(3):
    output= (x*3).sum()
    output.backward()
    print(x.grad)
    x.grad.zero_() # this put x.grad to zero

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])


###### Exemple of backpropagation

In [16]:
x= torch.tensor(1.0)
y=torch.tensor(2.0)

w= torch.tensor(1.0, requires_grad=True)

# forward pass and loss computation
y_pred= w*x
loss=(y_pred-y)**2
print(loss)
#backward pass
loss.backward()
print(w.grad)
# update w
# next forward and backward pass 

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)
