## 5.5 Pytorch'autograd: Backpropagating all things

In [1]:
import torch
import numpy as np

import matplotlib.pyplot as plt


### Data

In [2]:
#input
t_u =  [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]

#labels temp in C
t_c =  [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]

#convert into tensor
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)

- Rescale inputs

In [3]:
t_un = t_u * 0.1

### Model

In [4]:
def model(w,b,inputs=t_u):
    out = w * inputs + b
    return out

### Loss function

In [5]:
def loss_fn(preds, targets=t_c):
    ''' return Mean Square Error as MSE
    '''
    squared_diffs = (preds - targets)**2
    mean_sq_diffs = squared_diffs.mean()
    return mean_sq_diffs

## 5.5.2 Optimize a la carte

- Different Optimization algorithms

In [6]:
import torch.optim as optim

In [7]:
dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

- Using Gradient Descent Optimizer via optim
    - SGD : stochastic gradient descent
        - stochastic: from random mini-batch instead of the whole dataset

In [8]:
params = torch.tensor([1.0,0.0],requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [9]:
preds = model(*params, inputs=t_c)
loss = loss_fn(preds, targets=t_u)
#calculate gradient
loss.backward()
#update params 
optimizer.step()

print(params)

tensor([1.0099e+00, 8.2600e-04], requires_grad=True)


- Next iteration: 
    - Adding zero_grad

In [10]:
preds = model(*params, inputs=t_u)
loss = loss_fn(preds, targets=t_c)
#zero grad
optimizer.zero_grad()
#calculate gradient
loss.backward()
#update params 
optimizer.step()

print(params)

tensor([ 9.6414e-01, -1.0268e-05], requires_grad=True)


- Updated Version of Training Loop with Hyper Parameters:
    - Num epochs
    - Optimizer will include learning_rate

In [11]:
params = torch.tensor([1.0,0.0],requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD(params=[params],lr=learning_rate)

In [14]:
def training_loop(n_epochs,params, optimizer, inputs = t_u, targets=t_c):
    for epoch in range(1,n_epochs+1):
        w,b = params
        preds = model(w,b,inputs)
        loss = loss_fn(preds, targets)

        #zero grad
        optimizer.zero_grad()
        #calculate gradients
        loss.backward()
        #update parameters
        optimizer.step()

        #log 
        if epoch % 500 == 0:
            print(f"Epoch: {epoch} - loss:{loss:.4f}")
            
    return params
            
    

In [13]:
training_loop(
    n_epochs=5000,
    params=params,
    optimizer=optimizer,
    inputs=t_un,
    targets=t_c
)

Epoch: 500 - loss:7.8601
Epoch: 1000 - loss:3.8285
Epoch: 1500 - loss:3.0922
Epoch: 2000 - loss:2.9577
Epoch: 2500 - loss:2.9331
Epoch: 3000 - loss:2.9286
Epoch: 3500 - loss:2.9278
Epoch: 4000 - loss:2.9277
Epoch: 4500 - loss:2.9277
Epoch: 5000 - loss:2.9276


tensor([  5.3671, -17.3012], requires_grad=True)

- Testing other optimizers
    - Adam

In [17]:
params = torch.tensor([1.0,0.0],requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate)

training_loop(
    n_epochs = 5000,
    params = params,
    optimizer=optimizer,
    inputs=t_u,
    targets=t_c
)


Epoch: 500 - loss:7.6129
Epoch: 1000 - loss:3.0867
Epoch: 1500 - loss:2.9286
Epoch: 2000 - loss:2.9276
Epoch: 2500 - loss:2.9276
Epoch: 3000 - loss:2.9276
Epoch: 3500 - loss:2.9276
Epoch: 4000 - loss:2.9276
Epoch: 4500 - loss:2.9276
Epoch: 5000 - loss:2.9276


tensor([  0.5368, -17.3048], requires_grad=True)

### 5.5.3 Training, validation and overfitting