# 4. Autograd

In [1]:
import torch

## Setting `requires_grad`

In [2]:
x = torch.ones(2, 2)
print(x)

tensor([[1., 1.],
        [1., 1.]])


In [3]:
print(x.requires_grad)

False


In [4]:
x.requires_grad_(True)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [5]:
print(x.requires_grad)

True


### Tracking operations

In [6]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [7]:
print(y.requires_grad)

True


In [8]:
print(y.grad_fn)

<AddBackward0 object at 0x7fd6c69971d0>


In [9]:
z = y.mean()
print(z)

tensor(3., grad_fn=<MeanBackward0>)


---
## Back to our example :
$loss = (x \cdot W + b - y)^{2}$

In [10]:
x = torch.Tensor([1,2,3])
y = torch.Tensor([1])

W = torch.rand((3,1), requires_grad=True)
b = torch.rand(1, requires_grad=True)

In [11]:
print(W, "\n\n", b)

tensor([[0.9042],
        [0.7522],
        [0.8710]], requires_grad=True) 

 tensor([0.0365], requires_grad=True)


In [12]:
loss = (x @ W + b - y) ** 2
print(loss)

tensor([16.4687], grad_fn=<PowBackward0>)


In [13]:
loss.backward()

In [14]:
print(W.grad, "\n\n", b.grad)

tensor([[ 8.1163],
        [16.2326],
        [24.3490]]) 

 tensor([8.1163])


In [15]:
print(x.grad, y.grad)

None None


#### Gradients accumulate !

In [16]:
loss = (x @ W + b - y) ** 2
loss.backward()

In [17]:
print(W.grad, "\n\n", b.grad)

tensor([[16.2326],
        [32.4653],
        [48.6979]]) 

 tensor([16.2326])


---
## Gradient for models parameters

In [18]:
neural_net = torch.nn.Sequential(
    torch.nn.Linear(5, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 2),
)

loss_fn = torch.nn.CrossEntropyLoss()

In [19]:
neural_net[0]

Linear(in_features=5, out_features=10, bias=True)

In [20]:
neural_net[0].weight

Parameter containing:
tensor([[-0.0413, -0.3093,  0.4390,  0.3687, -0.1938],
        [ 0.4243, -0.3848, -0.2911, -0.1650, -0.0040],
        [ 0.3839, -0.2687,  0.1979,  0.1076, -0.0383],
        [ 0.3495, -0.3514, -0.3620,  0.3856, -0.0580],
        [-0.0685,  0.2883,  0.2188,  0.3055, -0.1945],
        [-0.1295,  0.1796, -0.3659, -0.3636, -0.2883],
        [-0.0981, -0.3949, -0.2657,  0.2916,  0.3420],
        [ 0.0533,  0.1903,  0.0290, -0.1312,  0.0392],
        [-0.1108,  0.0505, -0.4309,  0.0172, -0.0320],
        [ 0.3829, -0.3946, -0.4220, -0.2172, -0.1591]], requires_grad=True)

A `Parameter` has its `requires_grad` set to `True` by default.

In [21]:
x = torch.rand(15, 5) # batch_size, input_size
y = torch.randint(1, (15,)) # batch_size, output_size == num_classes

In [22]:
predictions = neural_net(x)
loss = loss_fn(predictions, y)
print(loss)

tensor(0.8388, grad_fn=<NllLossBackward>)


In [23]:
print(neural_net[0].weight.grad)

None


In [24]:
loss.backward()

In [25]:
print(neural_net[0].weight.grad)

tensor([[ 1.0360e-02,  5.7847e-03,  1.7197e-02,  1.4631e-02,  1.2965e-02],
        [ 1.1575e-01,  6.4645e-02,  8.0067e-02,  8.7133e-02,  8.5515e-02],
        [ 8.4349e-02,  5.5889e-02,  8.0124e-02,  7.7567e-02,  8.1082e-02],
        [ 1.5190e-03,  2.8688e-04,  1.7164e-04,  9.9888e-04,  7.3172e-04],
        [-1.3382e-01, -1.1000e-01, -1.2494e-01, -1.1786e-01, -1.3032e-01],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 4.1714e-05,  3.7429e-05,  3.6802e-05,  3.2374e-05,  3.9703e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]])


In [26]:
neural_net.zero_grad()

In [27]:
print(neural_net[0].weight.grad)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


---
## Stop the history tracking

In [28]:
x = torch.ones(2, 2, requires_grad=True)
print(x.requires_grad)

True


In [29]:
y = x ** 2
print(y.requires_grad)

True


In [30]:
with torch.no_grad():
    z = x ** 2
    print(z.requires_grad)

False


Useful during inference

---
# Building our training loop (4 / 5)

In [44]:
# INITIALIZATION

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, RandomCrop
from torchvision.datasets import ImageFolder

device = torch.device("cpu")

transform = Compose((RandomCrop((50, 50)), ToTensor()))
dataset = ImageFolder(root="alien-vs-predator/", transform=transform)
loader = DataLoader(dataset, batch_size=5, shuffle=True)

model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(7500, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 2),
)
model.to(device)

loss_fn = nn.CrossEntropyLoss()

In [43]:
# TRAINING LOOP

for samples, labels in loader:
    samples = samples.to(device)
    labels = labels.to(device)
    predictions = model(samples)
    loss = loss_fn(predictions, labels)
    loss.backward()
    # update model parameters
    model.zero_grad()