### Lambda LR

In [1]:
import torch
import torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
lr = 0.1
model = nn.Linear(10, 1)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

lambda1 = lambda epoch: epoch/10
scheduler = lr_scheduler.LambdaLR(optimizer, lambda1)

print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.0, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'initial_lr': 0.1, 'params': [0, 1]}]}


In [3]:
# In this example, our learning rate is increasing, ideally it should be decreasing
for epoch in range(5):
    # loss.backward()
    optimizer.step()
    # validate(...)
    scheduler.step()
    print(optimizer.state_dict()['param_groups'][0]['lr'])

0.010000000000000002
0.020000000000000004
0.03
0.04000000000000001
0.05


### Multiplicative LR
- Multiply the learning rate of each parameter group by the factor given in the specified function.

In [4]:
lr = 0.1
model = nn.Linear(10, 1)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

lambda1 = lambda epoch: 0.95
scheduler = lr_scheduler.MultiplicativeLR(optimizer, lambda1)

print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.1, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'initial_lr': 0.1, 'params': [0, 1]}]}


In [5]:
# Each time factor "0.95" will be multiplied to the last learning rate
for epoch in range(5):
    # loss.backward()
    optimizer.step()
    # validate(...)
    scheduler.step()
    print(optimizer.state_dict()['param_groups'][0]['lr'])

0.095
0.09025
0.0857375
0.08145062499999998
0.07737809374999999


### StepLR
- Decays the learning rate of each parameter group by gamma every step_size epochs.

In [None]:
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
# Assuming optimizer uses lr = 0.05 for all groups
# lr = 0.05     if epoch < 30
# lr = 0.005    if 30 <= epoch < 60
# lr = 0.0005   if 60 <= epoch < 90

### MultiStepLR
- Decays the learning rate of each parameter group by gamma once the number of epoch reaches one of the milestones.

In [None]:
# Assuming optimizer uses lr = 0.05 for all groups
# lr = 0.05     if epoch < 30
# lr = 0.005    if 30 <= epoch < 80
# lr = 0.0005   if epoch >= 80
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

### ExponentialLR
- Decays the learning rate of each parameter group by gamma every epoch. When last_epoch=-1, sets initial lr as lr.

### CosineAnnealingLR
### ReduceLROnPlateau
- Reduce learning rate when a metric has stopped improving. Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates. This scheduler reads a metrics quantity and if no improvement is seen for a ‘patience’ number of epochs, the learning rate is reduced.

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
for epoch in range(10):
    train(...)
    val_loss = validate(...)
    # Note that step should be called after validate()
    scheduler.step(val_loss)