# 4. Autograd

In [1]:
import torch

## Setting `requires_grad`

In [2]:
x = torch.ones(2, 2)
print(x)

tensor([[1., 1.],
        [1., 1.]])


In [3]:
print(x.requires_grad)

False


In [4]:
x.requires_grad_(True)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [5]:
print(x.requires_grad)

True


### Tracking operations

In [6]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [7]:
print(y.requires_grad)

True


In [8]:
print(y.grad_fn)

<AddBackward0 object at 0x7fb2c3b6b1d0>


In [9]:
z = y.mean()
print(z)

tensor(3., grad_fn=<MeanBackward0>)


---
## Back to our example :
$loss = (x \cdot W + b - y)^{2}$

In [10]:
x = torch.Tensor([1,2,3])
y = torch.Tensor([1])

W = torch.rand((3,1), requires_grad=True)
b = torch.rand(1, requires_grad=True)

In [11]:
print(W, "\n\n", b)

tensor([[0.8218],
        [0.4686],
        [0.2999]], requires_grad=True) 

 tensor([0.2149], requires_grad=True)


In [12]:
loss = (x @ W + b - y) ** 2
print(loss)

tensor([3.5099], grad_fn=<PowBackward0>)


In [13]:
loss.backward()

In [14]:
print(W.grad, "\n\n", b.grad)

tensor([[ 3.7470],
        [ 7.4939],
        [11.2409]]) 

 tensor([3.7470])


In [15]:
print(x.grad, y.grad)

None None


#### Gradients accumulate !

In [16]:
loss = (x @ W + b - y) ** 2
loss.backward()

In [17]:
print(W.grad, "\n\n", b.grad)

tensor([[ 7.4939],
        [14.9879],
        [22.4818]]) 

 tensor([7.4939])


---
## Gradient for models parameters

In [18]:
neural_net = torch.nn.Sequential(
    torch.nn.Linear(5, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 2),
)

loss_fn = torch.nn.CrossEntropyLoss()

In [19]:
neural_net[0]

Linear(in_features=5, out_features=10, bias=True)

In [20]:
neural_net[0].weight

Parameter containing:
tensor([[ 0.4294,  0.0337, -0.2311,  0.0377,  0.0801],
        [ 0.4146, -0.1736, -0.2673, -0.3404,  0.0725],
        [ 0.4463,  0.1993, -0.0819, -0.2448, -0.3261],
        [-0.3281, -0.0064, -0.0174, -0.1731, -0.2123],
        [-0.2785, -0.4120, -0.2925,  0.4362, -0.1486],
        [-0.0699,  0.0110, -0.1470, -0.1024,  0.4003],
        [-0.1453,  0.1581,  0.2857,  0.1355, -0.2738],
        [ 0.2341, -0.0654,  0.1149,  0.1655,  0.2232],
        [ 0.0311, -0.2406, -0.0920, -0.2115,  0.3290],
        [ 0.3677,  0.2816,  0.4042, -0.1564,  0.4045]], requires_grad=True)

A `Parameter` has its `requires_grad` set to `True` by default.

In [21]:
x = torch.rand(15, 5) # batch_size, input_size
y = torch.randint(1, (15,)) # batch_size, output_size == num_classes

In [22]:
predictions = neural_net(x)
loss = loss_fn(predictions, y)
print(loss)

tensor(1.4418, grad_fn=<NllLossBackward>)


In [23]:
print(neural_net[0].weight.grad)

None


In [24]:
loss.backward()

In [25]:
print(neural_net[0].weight.grad)

tensor([[0.0725, 0.0549, 0.0426, 0.0458, 0.0523],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.1334, 0.1174, 0.1326, 0.1028, 0.1191],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0080, 0.0060, 0.0257, 0.0060, 0.0279],
        [0.0190, 0.0171, 0.0204, 0.0166, 0.0182],
        [0.2251, 0.2021, 0.2417, 0.1969, 0.2160],
        [0.1375, 0.1234, 0.1476, 0.1202, 0.1319],
        [0.1518, 0.1363, 0.1630, 0.1328, 0.1457]])


In [26]:
neural_net.zero_grad()

In [27]:
print(neural_net[0].weight.grad)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


---
## Stop the history tracking

In [28]:
x = torch.ones(2, 2, requires_grad=True)
print(x.requires_grad)

True


In [29]:
y = x ** 2
print(y.requires_grad)

True


In [30]:
with torch.no_grad():
    z = x ** 2
    print(z.requires_grad)

False


Useful during inference

---
# Building our training loop (4 / 5)

In [31]:
# INITIALIZATION

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, RandomCrop
from torchvision.datasets import ImageFolder

device = torch.device("cpu")

transform = Compose((RandomCrop((50, 50)), ToTensor()))
dataset = ImageFolder(root="../alien-vs-predator/", transform=transform)
loader = DataLoader(dataset, batch_size=5, shuffle=True)

model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(7500, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 2),
)
model.to(device)

loss_fn = nn.CrossEntropyLoss()

In [32]:
# TRAINING LOOP

for samples, labels in loader:
    samples = samples.to(device)
    labels = labels.to(device)
    predictions = model(samples)
    loss = loss_fn(predictions, labels)
    loss.backward()
    # update model parameters
    model.zero_grad()