There are 4 steps for back-propagation:


1.   Predition
2.   Gradient computation
3.   Loss computation
4.   Parameters update


### 1 ONLY Numpy

In [1]:
# libraries
import numpy as np

In [29]:
# arrays -> replaced: tensor.torch
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)
# weight
w = 0.0

In [30]:
# (1) Predition
def forward(x):
  return w * x

# (2) Loss calculation
def loss_func(y, y_pred):
  '''MSE: 1/N*(w*x - y)**2'''
  return (((y_pred-y)**2).mean())

# (3) Gradient computation
def gradient(x,y,y_pred):
  '''formula is from the deritive of the loss function formulation
  with respect to the weights'''
  return np.mean(2*x*(y_pred-y))

# Training
learning_rate = 0.01
n_iters = 100
for epoch in range(n_iters):
  y_pred = forward(X)
  loss = loss_func(Y, y_pred)
  dw = gradient(X, Y, y_pred)
  # (4) Parameter update
  w -= learning_rate * dw

  if epoch % 5 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {loss:.8f}')

print(f'Prediction: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 6: w = 1.246, loss = 5.90623236
epoch 11: w = 1.665, loss = 1.16278565
epoch 16: w = 1.851, loss = 0.22892293
epoch 21: w = 1.934, loss = 0.04506905
epoch 26: w = 1.971, loss = 0.00887291
epoch 31: w = 1.987, loss = 0.00174685
epoch 36: w = 1.994, loss = 0.00034391
epoch 41: w = 1.997, loss = 0.00006770
epoch 46: w = 1.999, loss = 0.00001333
epoch 51: w = 1.999, loss = 0.00000262
epoch 56: w = 2.000, loss = 0.00000052
epoch 61: w = 2.000, loss = 0.00000010
epoch 66: w = 2.000, loss = 0.00000002
epoch 71: w = 2.000, loss = 0.00000000
epoch 76: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 86: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
epoch 96: w = 2.000, loss = 0.00000000
Prediction: f(5) = 10.000


### 2 with Pytorch tensors
here we want to do everything as automated as possible!
1. Loss & Optimizer
3. Training:
  1.   forward pass/compute prediction
  2.   backward pass/gradient
  3.   update weights



In [54]:
# libraries
import torch
import torch.nn as nn

In [55]:
# tensors
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)
# weight
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [56]:
# (1) Predition
def forward(x):
  return w * x

# (3) Gradient computation
def gradient(x,y,y_pred):
  '''formula is from the deritive of the loss function formulation
  with respect to the weights'''
  return np.mean(2*x*(y_pred-y))

# Training
learning_rate = 0.01
n_iters = 100

# (2) Loss & Optimizer
loss_func = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)

for epoch in range(n_iters):
  y_pred = forward(X)
  loss = loss_func(Y, y_pred)

  loss.backward() # dloss/dw
  # (4) Manually weight update or automatically
  if optimizer is False:
    with torch.no_grad():
      w -= learning_rate * w.grad
  else:
    optimizer.step()

  # zero gradients
  if optimizer is False:
    w.grad.zero_()
  else:
    optimizer.zero_grad()

  if epoch % 5 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {loss:.8f}')

print(f'Prediction: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 6: w = 1.246, loss = 5.90623236
epoch 11: w = 1.665, loss = 1.16278565
epoch 16: w = 1.851, loss = 0.22892261
epoch 21: w = 1.934, loss = 0.04506890
epoch 26: w = 1.971, loss = 0.00887291
epoch 31: w = 1.987, loss = 0.00174685
epoch 36: w = 1.994, loss = 0.00034392
epoch 41: w = 1.997, loss = 0.00006770
epoch 46: w = 1.999, loss = 0.00001333
epoch 51: w = 1.999, loss = 0.00000262
epoch 56: w = 2.000, loss = 0.00000052
epoch 61: w = 2.000, loss = 0.00000010
epoch 66: w = 2.000, loss = 0.00000002
epoch 71: w = 2.000, loss = 0.00000000
epoch 76: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 86: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
epoch 96: w = 2.000, loss = 0.00000000
Prediction: f(5) = 10.000


### 3 with Pytorch model
1. Design the model -> Input_size, Output_size, forward pass

In [51]:
# libraries
import torch
import torch.nn as nn

In [57]:
# tensors
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape

In [62]:
# model & model size
input_size = n_features
output_size = n_features

class LinearRegression(nn.Module):
  def __init__(self, model_input_size, model_output_size):
    super(LinearRegression, self).__init__()
    # define Layers
    self.lin = nn.Linear(model_input_size, model_output_size)

  def forward(self, x):
    return self.lin(x)

model = LinearRegression(input_size, output_size) # very simpler: model = nn.Linear(input_size, output_size)

# Training
learning_rate = 0.01
n_iters = 100

# Loss & Optimizer
loss_func = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
  y_pred = model(X)
  loss = loss_func(Y, y_pred)

  loss.backward() # dloss/dw
  # weight update
  optimizer.step()

  # zero gradients
  optimizer.zero_grad()

  if epoch % 5 == 0:
    [w, b] = model.parameters()
    print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {loss:.8f}')

# Predition
print(f'Prediction: f(5) = {model(X_test).item():.3f}')

epoch 1: w = 0.381, loss = 33.52361298
epoch 6: w = 1.328, loss = 5.39398146
epoch 11: w = 1.708, loss = 0.86930871
epoch 16: w = 1.861, loss = 0.14146911
epoch 21: w = 1.923, loss = 0.02434854
epoch 26: w = 1.948, loss = 0.00546289
epoch 31: w = 1.958, loss = 0.00237946
epoch 36: w = 1.963, loss = 0.00183914
epoch 41: w = 1.965, loss = 0.00170922
epoch 46: w = 1.966, loss = 0.00164656
epoch 51: w = 1.967, loss = 0.00159596
epoch 56: w = 1.967, loss = 0.00154851
epoch 61: w = 1.968, loss = 0.00150272
epoch 66: w = 1.968, loss = 0.00145833
epoch 71: w = 1.969, loss = 0.00141524
epoch 76: w = 1.969, loss = 0.00137344
epoch 81: w = 1.970, loss = 0.00133287
epoch 86: w = 1.970, loss = 0.00129349
epoch 91: w = 1.971, loss = 0.00125528
epoch 96: w = 1.971, loss = 0.00121821
Prediction: f(5) = 9.941
