### 5.5.1 Computing the gradient automatically


In [7]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2)

In [9]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0,
                    3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [2]:
def model(t_u, w, b):
    return w * t_u + b

In [3]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

Letâ€™s again initialize a parameters tensor:

In [4]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

USING THE GRAD ATTRIBUTE
Notice the requires_grad=True argument to the tensor constructor? That argument
is telling PyTorch to track the entire family tree of tensors resulting from operations
on params<br>the value of the derivative will be automatically populated as a grad attribute of the
params tensor

In [5]:
params.grad is None

True

All we have to do to populate it is to start with a tensor with requires_grad set to
True, then call the model and compute the loss, and then call backward on the loss
tensor:

In [10]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()
params.grad

tensor([4517.2969,   82.6000])

grad sad ima derivacije od lossa po svakoj varijabli

Why Accumulation Matters:<br>
If we call backward() multiple times (e.g., in a training loop), the gradients add up.
This can lead to incorrect gradient values if not handled properly.
Zeroing the Gradient:<br>
To avoid accumulation, we explicitly zero the gradients before each optimization step.<br>
We use optimizer.zero_grad() to reset the gradients to zero.

In [12]:
if params.grad is not None:
    params.grad.zero_()

In [14]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        if params.grad is not None:#This could be done at any point in the loop prior to calling loss.backward().
            params.grad.zero_()

        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        loss.backward()

        with torch.no_grad(): #more about this in next section
            params -= learning_rate * params.grad

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params

test

In [15]:
training_loop(
    n_epochs = 5000,
    learning_rate = 1e-2,
    params = torch.tensor([1.0, 0.0], requires_grad=True),
    t_u = t_un,
    t_c = t_c)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

same result as before, we dont need to calculate derivatives by hand