In [26]:
import torch

Steps:
1. training set
2. weight initialization
3. model prediction
4. loss (in linear regression, Minimum Squared Error is using to calculate the loss)
    MSE = 1/N * (y_gold-y_pred) ** 2 
5. gradient
    calculating with pytorch
    
    - important remarks:
        - variables must be torch tensors instead of numpy arrays
        - the variable which wants to get optimized must have the attribute 'requires_grad = True'
        - using loss.backward() function need to use w.grad.zer_() to empty the last gradients,
        - while updating the weights computational graph must not add it to its architecture so
          one should use one of these three options:
            1. w.requires_grad_(False) 
            2. y = w.detach()
            3. with torch.no_grad() wrapper
      (what we used here)
     


In [27]:
#linear regression f = w * x where w = 2

#X = np.array([1,2,3,4], dtype=np.float32)
#Y = np.array([2,4,6,8], dtype=np.float32) # as the function is 2 * x
#here we want to add pytorch power so we need to change np arrays to torch tensors

#training set with pytorch
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32) # as the function is 2 * x

In [28]:
# weight initialization with pytorch
# also we need the gradient for this variable so we add the attribute
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [29]:
#model predition calculation, the same
def forward_pass(x):
    return w * x

In [30]:
#loss calculation, the same
def loss(y_gold,y_pred):
    return((y_pred- y_gold)**2).mean()


In [31]:
#gradient calculation, we don't need this part using pytorch for gradient
#def gradient(x,y_gold,y_pred):
#    return np.dot(2*x,y_pred-y_gold).mean()

In [32]:
print(f'Prediction before training:f(5)={forward_pass(5):.3f}')

Prediction before training:f(5)=0.000


In [33]:
learning_rate = 0.01
n_iters = 10

In [34]:
for epoch in range(n_iters):
    # prediction = forward_pass
    y_pred = forward_pass(X)
    
    # loss
    l = loss(Y,y_pred)
    
    # grad = backward pass
    l.backward() #dl/dw
    
    # update weights 
    #this must not be a part of computational graph >> wrap in a with statement
    with torch.no_grad():
        w -= learning_rate * w.grad #update formula for linear regression, as dw is not present here, we use w.grad
        
    # zero gradients (because of the usage of backward())
    w.grad.zero_()
    
    if epoch % 1 == 0: # we want to print every step
        
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
        
print(f'Prediction after training:f(5)={forward_pass(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 2: w = 0.555, loss = 21.67499924
epoch 3: w = 0.772, loss = 15.66018772
epoch 4: w = 0.956, loss = 11.31448650
epoch 5: w = 1.113, loss = 8.17471695
epoch 6: w = 1.246, loss = 5.90623236
epoch 7: w = 1.359, loss = 4.26725292
epoch 8: w = 1.455, loss = 3.08308983
epoch 9: w = 1.537, loss = 2.22753215
epoch 10: w = 1.606, loss = 1.60939169
Prediction after training:f(5)=8.031


In [36]:
# in the previous part iterations were 10, let's see what will happen with 50 of them

learning_rate = 0.01
n_iters = 50
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

for epoch in range(n_iters):
    # prediction = forward_pass
    y_pred = forward_pass(X)
    
    # loss
    l = loss(Y,y_pred)
    
    # grad
    l.backward()
    
    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad #update formula for linear regression
        
    w.grad.zero_()
    
    if epoch % 3 == 0: # we want to print every 3 step
        
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
        
print(f'Prediction after training:f(5)={forward_pass(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 4: w = 0.956, loss = 11.31448650
epoch 7: w = 1.359, loss = 4.26725292
epoch 10: w = 1.606, loss = 1.60939169
epoch 13: w = 1.758, loss = 0.60698116
epoch 16: w = 1.851, loss = 0.22892261
epoch 19: w = 1.909, loss = 0.08633806
epoch 22: w = 1.944, loss = 0.03256231
epoch 25: w = 1.966, loss = 0.01228084
epoch 28: w = 1.979, loss = 0.00463169
epoch 31: w = 1.987, loss = 0.00174685
epoch 34: w = 1.992, loss = 0.00065882
epoch 37: w = 1.995, loss = 0.00024848
epoch 40: w = 1.997, loss = 0.00009371
epoch 43: w = 1.998, loss = 0.00003534
epoch 46: w = 1.999, loss = 0.00001333
epoch 49: w = 1.999, loss = 0.00000503
Prediction after training:f(5)=9.997
