# 5. Optimizers

In [14]:
import torch
from torch import optim

### Instantiating

In [15]:
dir(optim)[:12]

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'Optimizer',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam']

In [16]:
neural_net = torch.nn.Sequential(
    torch.nn.Linear(5, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 2),
)

loss_fn = torch.nn.CrossEntropyLoss()

In [17]:
optimizer = optim.SGD(neural_net.parameters(), lr=0.1)

### Optimizing

In [18]:
x = torch.rand(15, 5)  # batch_size, input_size
y = torch.randint(1, (15,))  # batch_size, output_size == num_classes

predictions = neural_net(x)  # make predictions
loss = loss_fn(predictions, y)  # compute loss
loss.backward()  # compute gradients

In [19]:
print(neural_net[0].bias)

Parameter containing:
tensor([ 0.0739,  0.0030,  0.1339, -0.2540,  0.2114,  0.3464,  0.1264,  0.4113,
        -0.0657,  0.2398], requires_grad=True)


In [20]:
optimizer.step()

In [21]:
print(neural_net[0].bias)

Parameter containing:
tensor([ 0.0887,  0.0033,  0.1374, -0.2540,  0.2281,  0.3452,  0.1350,  0.4063,
        -0.0638,  0.2298], requires_grad=True)


### Schedulers

In [22]:
optim.lr_scheduler.LambdaLR
optim.lr_scheduler.ExponentialLR
optim.lr_scheduler.MultiStepLR
optim.lr_scheduler.StepLR
# etc

torch.optim.lr_scheduler.StepLR

In [23]:
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)  # Decays the learning rate by gamma every epoch

In [24]:
print(optimizer.param_groups[0]["lr"])

scheduler.step()

print(optimizer.param_groups[0]["lr"])

0.1
0.08000000000000002


---
# Building our training loop (5 / 5)

In [25]:
# INITIALIZATION

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, RandomCrop
from torchvision.datasets import ImageFolder

device = torch.device("cpu")

transform = Compose((RandomCrop((50, 50)), ToTensor()))
dataset = ImageFolder(root="../alien-vs-predator/", transform=transform)
loader = DataLoader(dataset, batch_size=5, shuffle=True)

model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(7500, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 2),
)
model.to(device)

loss_fn = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=0.1)

In [26]:
# TRAINING LOOP

for samples, labels in loader:
    samples = samples.to(device)
    labels = labels.to(device)
    predictions = model(samples)
    loss = loss_fn(predictions, labels)
    loss.backward()
    optimizer.step()
    model.zero_grad() ##