# Learning rate schedules

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append('..')

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from transformers import (
    get_cosine_schedule_with_warmup,
    get_constant_schedule_with_warmup
)

In [None]:
# create simple model
model = nn.Linear(1, 1)

# create optimizer
lr = 1e-04

optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
# create constant schedule with warmup
constant_scheduler = get_constant_schedule_with_warmup(
    optimizer,
    num_warmup_steps=200
)

In [None]:
# create cosine schedule with warmup
cosine_scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=100,
    num_training_steps=1000
)

In [None]:
# get LR scalings for plotting
steps = np.arange(1000)

constant_values = np.array([constant_scheduler.lr_lambdas[0](step) for step in steps])
cosine_values = np.array([cosine_scheduler.lr_lambdas[0](step) for step in steps])

# plot learning rate schedules
fig, ax = plt.subplots(figsize=(6, 4))
ax.plot(steps, constant_values, alpha=0.7, label='constant with warmup')
ax.plot(steps, cosine_values, alpha=0.7, label='cosine with warmup')
ax.set(xlabel='training step', ylabel='LR scaling factor')
ax.set_xlim((steps.min(), steps.max()))
ax.legend()
ax.grid(visible=True, which='both', color='gray', alpha=0.2, linestyle='-')
ax.set_axisbelow(True)
fig.tight_layout()