Source: https://www.youtube.com/watch?v=3Kb0QS6z7WA

In [2]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [18]:
saved_weights = [0.1, 0.2, 0.3, 0.25]
loaded_weights = torch.tensor(saved_weights)
loaded_weights

tensor([0.1000, 0.2000, 0.3000, 0.2500])

In [19]:
def preprocess(a):
    return a
weights = preprocess(loaded_weights)  # some function
weights


tensor([0.1000, 0.2000, 0.3000, 0.2500])

In [20]:
weights.requires_grad_()
out = weights.pow(2).sum()
out


tensor(0.2025, grad_fn=<SumBackward0>)

In [21]:
out.backward()
weights.grad

tensor([0.2000, 0.4000, 0.6000, 0.5000])

In [22]:
weights # still the same as before, because no update yet

tensor([0.1000, 0.2000, 0.3000, 0.2500], requires_grad=True)

In [23]:
weights.detach()

tensor([0.1000, 0.2000, 0.3000, 0.2500])

In [24]:
out = weights.pow(2).sum()
out

tensor(0.2025, grad_fn=<SumBackward0>)

In [25]:
out.backward()
weights.grad

In [17]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

In [18]:
x

tensor(1.)

In [19]:
y

tensor(2.)

In [20]:
w

tensor(1., requires_grad=True)

In [21]:
print(w.grad)

None


In [22]:
# forward pass and compute the loss
y_hat = w*x 
y_hat

tensor(1., grad_fn=<MulBackward0>)

In [23]:
loss = (y_hat-y)
loss

tensor(-1., grad_fn=<SubBackward0>)

In [24]:
loss = (y_hat-y)**2
loss

tensor(1., grad_fn=<PowBackward0>)

In [25]:
# backward pass
loss.backward

<bound method Tensor.backward of tensor(1., grad_fn=<PowBackward0>)>

In [26]:
print(w.grad)

None


In [None]:
# update weights
# next forward and backwars

Source: https://www.youtube.com/watch?v=E-I2DNVzQLg

First, do every thing manually

In [1]:
import numpy as np

In [15]:
# f = 2*x
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

In [16]:
# prediction model
def forward(x):
    return w*x

# loss = mse
def loss(y, y_predicted):
    return ((y_predicted -y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# dJ = 1/N 2*x (w*x -y)
def gradient(x,y, y_predicted):
    return np.dot(2*x, y_predicted-y).mean() 

In [17]:
print(f'Prediction before traiing: f(5) = {forward(5):.3f}')

Prediction before traiing: f(5) = 0.000


In [18]:
# training
learning_rate = 0.01
n_iters =10

for epoch in range(n_iters):
    # prediciton = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradient
    dw = gradient(X,Y,y_pred)
    
    # update weights
    w -= learning_rate* dw
    
    if epoch % 1 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
        
print(f'Prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 1.200, loss = 30.00000000
epoch 2: w = 1.680, loss = 4.79999924
epoch 3: w = 1.872, loss = 0.76800019
epoch 4: w = 1.949, loss = 0.12288000
epoch 5: w = 1.980, loss = 0.01966083
epoch 6: w = 1.992, loss = 0.00314574
epoch 7: w = 1.997, loss = 0.00050331
epoch 8: w = 1.999, loss = 0.00008053
epoch 9: w = 1.999, loss = 0.00001288
epoch 10: w = 2.000, loss = 0.00000206
Prediction after training: f(5) = 9.999


Now, try with Torch

Replace the gradient block

In [19]:
import torch
# f = 2*x
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [20]:
# training
learning_rate = 0.01
n_iters =10

for epoch in range(n_iters):
    # prediciton = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradient with the help from torch
    l.backward() # dl/dw
        # manually: dw = gradient(X,Y,y_pred)
    
    # update weights
    with torch.no_grad():
        w -= learning_rate* w.grad
        # manually:  w -= learning_rate* dw
    
    # zero gradients before the new iteration
    w.grad.zero_()
    
    if epoch % 1 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
        
print(f'Prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 2: w = 0.555, loss = 21.67499924
epoch 3: w = 0.772, loss = 15.66018772
epoch 4: w = 0.956, loss = 11.31448650
epoch 5: w = 1.113, loss = 8.17471695
epoch 6: w = 1.246, loss = 5.90623236
epoch 7: w = 1.359, loss = 4.26725292
epoch 8: w = 1.455, loss = 3.08308983
epoch 9: w = 1.537, loss = 2.22753215
epoch 10: w = 1.606, loss = 1.60939169
Prediction after training: f(5) = 8.031


Not close to the result, but will be close if we increase the iterations

Use Torch Loss, Torch update weights

In [22]:
import torch
import torch.nn as nn

# f = 2*x
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [24]:
loss = nn.MSELoss()
    # manually: def loss(y, y_predicted):
    #               return ((y_predicted -y)**2).mean()

optimiser = torch.optim.SGD([w], lr=learning_rate)

In [25]:
# training
learning_rate = 0.01
n_iters =10

for epoch in range(n_iters):
    # prediciton = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradient with the help from torch
    l.backward() # dl/dw
        # manually 1: dw = gradient(X,Y,y_pred)
    
    # update weights
    optimiser.step()
        # manually 1:
        #   with torch.no_grad():
        #       w -= learning_rate* w.grad
        # manually 2:  w -= learning_rate* dw
    
    # zero gradients before the new iteration
    optimiser.zero_grad()
    # manually 1: w.grad.zero_()
    
    if epoch % 1 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
        
print(f'Prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 2: w = 0.555, loss = 21.67499924
epoch 3: w = 0.772, loss = 15.66018772
epoch 4: w = 0.956, loss = 11.31448650
epoch 5: w = 1.113, loss = 8.17471695
epoch 6: w = 1.246, loss = 5.90623236
epoch 7: w = 1.359, loss = 4.26725292
epoch 8: w = 1.455, loss = 3.08308983
epoch 9: w = 1.537, loss = 2.22753215
epoch 10: w = 1.606, loss = 1.60939169
Prediction after training: f(5) = 8.031


Use Torch Model

In [30]:
# f = 2*x
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

# w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [31]:
n_samples, n_features = X.shape
print(n_samples, n_features)

4 1


In [32]:
input_size = n_features
output_size = n_features

In [33]:
model = nn.Linear(input_size, output_size)

In [34]:
X_test = torch.tensor([[5]], dtype=torch.float32)

In [36]:
print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

Prediction before training: f(5) = -2.169


In [37]:
optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate)
    # optimiser = torch.optim.SGD([w], lr=learning_rate)

In [38]:
# training
learning_rate = 0.01
n_iters =10

for epoch in range(n_iters):
    # prediciton = forward pass
    y_pred = model(X)
    
    # loss
    l = loss(Y, y_pred)
        
    # gradient with the help from torch
    l.backward() # dl/dw
    
    # update weights
    optimiser.step()

    optimiser.zero_grad()
    
    if epoch % 1 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')
        
print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

epoch 1: w = 0.053, loss = 47.21068954
epoch 2: w = 0.367, loss = 32.76074219
epoch 3: w = 0.628, loss = 22.73422241
epoch 4: w = 0.845, loss = 15.77701759
epoch 5: w = 1.026, loss = 10.94954586
epoch 6: w = 1.177, loss = 7.59985352
epoch 7: w = 1.303, loss = 5.27555943
epoch 8: w = 1.408, loss = 3.66276717
epoch 9: w = 1.495, loss = 2.54367256
epoch 10: w = 1.568, loss = 1.76714301
Prediction after training: f(5) = 7.923


Custom Model

In [39]:
class LinearReg(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearReg, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        return self.lin(x)
    

In [42]:
model = LinearReg(input_size, output_size)

In [43]:
for epoch in range(n_iters):
    # prediciton = forward pass
    y_pred = model(X)
    
    # loss
    l = loss(Y, y_pred)
        
    # gradient with the help from torch
    l.backward() # dl/dw
    
    # update weights
    optimiser.step()

    optimiser.zero_grad()
    
    if epoch % 1 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')
        
print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

epoch 1: w = -0.683, loss = 49.53252411
epoch 2: w = -0.683, loss = 49.53252411
epoch 3: w = -0.683, loss = 49.53252411
epoch 4: w = -0.683, loss = 49.53252411
epoch 5: w = -0.683, loss = 49.53252411
epoch 6: w = -0.683, loss = 49.53252411
epoch 7: w = -0.683, loss = 49.53252411
epoch 8: w = -0.683, loss = 49.53252411
epoch 9: w = -0.683, loss = 49.53252411
epoch 10: w = -0.683, loss = 49.53252411
Prediction after training: f(5) = -3.075
