# Pytorch - Back Propagation & Gradient Descent

### Back propagation with linear regression

In [4]:
import torch
import numpy as np

### Solution 1 - Fully manual calculations with Numpy

In [40]:
X = np.array([10, 20, 30, 40, 50], dtype=np.float32)
Y = np.array([20, 40, 60, 80, 100], dtype=np.float32)

In [41]:
W = 0.0
learning_rate = 0.0003 # this must be adjusted (for example 0.001 won't work)

In [44]:
def forward(x):
    return W * x

# MSE
def loss(y_actual, y_predicted):
    return ((y_predicted - y_actual) ** 2).mean()

# dLoss/dW
def gradient(x, y_actual, y_predicted):
    return np.dot(-x * 2 / 5, y_predicted - y_actual).mean()
    

In [43]:
for epoch in range(20):
    # prediction
    y = forward(X)
    
    # loss
    l = loss(y, Y)
    
    # grad
    grad = gradient(X, y, Y)
     
    if (epoch % 1 == 0):
        print(f'epoch={epoch}, loss={l:.5f}, W={W:.5f}, grad={grad:.5f}')
    
    # weight update
    W -= learning_rate * grad 

epoch=0, loss=4400.00000, W=0.00000, grad=-4400.00000
epoch=1, loss=508.63995, W=1.32000, grad=-1496.00000
epoch=2, loss=58.79878, W=1.76880, grad=-508.63995
epoch=3, loss=6.79714, W=1.92139, grad=-172.93767
epoch=4, loss=0.78575, W=1.97327, grad=-58.79877
epoch=5, loss=0.09083, W=1.99091, grad=-19.99150
epoch=6, loss=0.01050, W=1.99691, grad=-6.79723
epoch=7, loss=0.00121, W=1.99895, grad=-2.31105
epoch=8, loss=0.00014, W=1.99964, grad=-0.78572
epoch=9, loss=0.00002, W=1.99988, grad=-0.26723
epoch=10, loss=0.00000, W=1.99996, grad=-0.09064
epoch=11, loss=0.00000, W=1.99999, grad=-0.03098
epoch=12, loss=0.00000, W=2.00000, grad=-0.01048
epoch=13, loss=0.00000, W=2.00000, grad=-0.00372
epoch=14, loss=0.00000, W=2.00000, grad=-0.00096
epoch=15, loss=0.00000, W=2.00000, grad=-0.00056
epoch=16, loss=0.00000, W=2.00000, grad=-0.00036
epoch=17, loss=0.00000, W=2.00000, grad=0.00000
epoch=18, loss=0.00000, W=2.00000, grad=0.00000
epoch=19, loss=0.00000, W=2.00000, grad=0.00000


### Solution 2 - using Pytorch tensors

In [38]:
X = torch.tensor([10, 20, 30, 40, 50], dtype=torch.float32) # input vector
Y = torch.tensor([20, 40, 60, 80, 100], dtype=torch.float32) # output vector
W = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)
learning_rate = torch.tensor(0.0003, dtype=torch.float32)


In [45]:
# we can use previously defined functions i.e. forward() and loss() 

In [39]:
for epoch in range(20):
    # prediction
    y = forward(X)
    
    # loss
    l = loss(y, Y)
    
    # grad
    l.backward()
    grad = W.grad
     
    if (epoch % 1 == 0):
        print(f'epoch={epoch}, loss={l:.5f}, W={W:.5f}, grad={grad:.5f}')
    
    # weight update ... but outside computation tree (to not intefere with gradient)
    with torch.no_grad():
        W -= learning_rate * grad 
    
    # this is important to not accumulated gradient from previous iterations
    W.grad.zero_()
    

epoch=0, loss=4400.00000, W=0.00000, grad=-4400.00000
epoch=1, loss=508.63995, W=1.32000, grad=-1495.99988
epoch=2, loss=58.79878, W=1.76880, grad=-508.63998
epoch=3, loss=6.79714, W=1.92139, grad=-172.93767
epoch=4, loss=0.78575, W=1.97327, grad=-58.79877
epoch=5, loss=0.09083, W=1.99091, grad=-19.99150
epoch=6, loss=0.01050, W=1.99691, grad=-6.79723
epoch=7, loss=0.00121, W=1.99895, grad=-2.31105
epoch=8, loss=0.00014, W=1.99964, grad=-0.78572
epoch=9, loss=0.00002, W=1.99988, grad=-0.26723
epoch=10, loss=0.00000, W=1.99996, grad=-0.09064
epoch=11, loss=0.00000, W=1.99999, grad=-0.03098
epoch=12, loss=0.00000, W=2.00000, grad=-0.01048
epoch=13, loss=0.00000, W=2.00000, grad=-0.00372
epoch=14, loss=0.00000, W=2.00000, grad=-0.00132
epoch=15, loss=0.00000, W=2.00000, grad=-0.00056
epoch=16, loss=0.00000, W=2.00000, grad=-0.00036
epoch=17, loss=0.00000, W=2.00000, grad=0.00000
epoch=18, loss=0.00000, W=2.00000, grad=0.00000
epoch=19, loss=0.00000, W=2.00000, grad=0.00000


In [None]:
# so the grad computation is exacly the same as by the manual solution

### Solution 3 - using Pytorch Neural Network module

In [8]:
import torch.nn as nn
import torch.optim as optim

In [35]:
X = torch.tensor([[10], [20], [30], [40], [50]], dtype=torch.float32) # input vector [nb_samples * nb_features] -> here 5 * 1
Y = torch.tensor([[20], [40], [60], [80], [100]], dtype=torch.float32) # output vector [nb_samples * nb_features] -> here 5 * 1

nb_samples, nb_features = X.shape
nb_samples, nb_features

(5, 1)

In [36]:
learning_rate =0.0003

input_size = nb_features
output_size = nb_features

# we don't need to use weights vector, but instead we use Linear model
model = nn.Linear(in_features=input_size, out_features=output_size)

loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [38]:
# Initial prediction
X_test = torch.tensor([[60]], dtype=torch.float32)
Y_test_pred = model(X_test)
Y_test_pred

tensor([[54.5764]], grad_fn=<AddmmBackward>)

In [39]:
# Training (hmm ? )

max_epochs = 100

for epoch in range(max_epochs):
    # prediction (forward pass)
    y = model(X)
    
    # loss
    l = loss_function(Y, y)
    
    # gradients (backward pass)
    grad = l.backward()
    
    if (epoch % 5 == 0 or epoch + 1 == max_epochs):
        w = model.weight.item()
        b = model.bias.item()
        print(f'epoch={epoch}, loss={l:.5f}, W={w:.5f}, b={b:.5f}')
    
    # weights update in the model
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()

epoch=0, loss=1297.12988, W=0.90255, b=0.42367
epoch=5, loss=0.06371, W=1.98275, b=0.45289
epoch=10, loss=0.03727, W=1.98763, b=0.45278
epoch=15, loss=0.03723, W=1.98766, b=0.45253
epoch=20, loss=0.03719, W=1.98766, b=0.45228
epoch=25, loss=0.03715, W=1.98767, b=0.45204
epoch=30, loss=0.03711, W=1.98768, b=0.45179
epoch=35, loss=0.03707, W=1.98768, b=0.45154
epoch=40, loss=0.03703, W=1.98769, b=0.45130
epoch=45, loss=0.03699, W=1.98770, b=0.45105
epoch=50, loss=0.03695, W=1.98770, b=0.45081
epoch=55, loss=0.03691, W=1.98771, b=0.45056
epoch=60, loss=0.03687, W=1.98772, b=0.45032
epoch=65, loss=0.03683, W=1.98772, b=0.45007
epoch=70, loss=0.03679, W=1.98773, b=0.44982
epoch=75, loss=0.03675, W=1.98774, b=0.44958
epoch=80, loss=0.03671, W=1.98774, b=0.44933
epoch=85, loss=0.03667, W=1.98775, b=0.44909
epoch=90, loss=0.03663, W=1.98776, b=0.44885
epoch=95, loss=0.03659, W=1.98776, b=0.44860
epoch=99, loss=0.03656, W=1.98777, b=0.44840


In [40]:
# Prediction after training:
X_test = torch.tensor([[60]], dtype=torch.float32)
Y_test_pred = model(X_test)
Y_test_pred

tensor([[119.7146]], grad_fn=<AddmmBackward>)