In [1]:
import tensorflow as tf

mnist = tf.keras.datasets.mnist.load_data()
(X_train, y_train), (X_test, y_test) = mnist

X_train, X_test = X_train/255. , X_test/255.

X_train[0].shape

(28, 28)

In [2]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(200, activation="relu"),
    tf.keras.layers.Dense(200, activation="relu"),
    tf.keras.layers.Dense(10, activation="softmax"),
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 200)               157000    
                                                                 
 dense_1 (Dense)             (None, 200)               40200     
                                                                 
 dense_2 (Dense)             (None, 10)                2010      
                                                                 
Total params: 199210 (778.16 KB)
Trainable params: 199210 (778.16 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [3]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    optimizer = optimizer,
    metrics = ["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


### Power Scheduling

In [4]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, weight_decay=0.0001)
model.compile(
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    optimizer = optimizer,
    metrics = ["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


### Exponential Scheduling

In [5]:
def exponential_decay_fn(epoch, lr):
    return lr * 0.1 ** (epoch / 20)

initial_learning_rate = 0.01
optimizer = tf.keras.optimizers.SGD(learning_rate=initial_learning_rate)

lr_scheduler = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: exponential_decay_fn(epoch, initial_learning_rate)
)

model.compile(
    loss=tf.keras.losses.sparse_categorical_crossentropy,
    optimizer=optimizer,
    metrics=["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1, callbacks=[lr_scheduler])

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [7]:
# learing rate schedules
history.history["lr"]

[0.01, 0.008912509, 0.007943282]

### Piecewise Constant Scheduling

In [15]:
def piecewise_constant_lr_fn(epoch):
    if epoch<3:
        return 0.01
    elif epoch<15:
        return 0.005
    else:
        return 0.001
    
    
optimizer = tf.keras.optimizers.SGD()

lr_scheduler = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: piecewise_constant_lr_fn(epoch)
)

model.compile(
    loss=tf.keras.losses.sparse_categorical_crossentropy,
    optimizer=optimizer,
    metrics=["accuracy"]
)

history = model.fit(X_train, y_train, epochs=5, validation_split=0.1, callbacks=[lr_scheduler])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [16]:
history.history["lr"]

[0.01, 0.01, 0.01, 0.005, 0.005]

### Performance Scheduling

In [17]:
import math

batch_size = 32
n_epochs = 15
n_steps = n_epochs * math.ceil(len(X_train)/batch_size)

schedule_learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01, decay_steps=n_steps, decay_rate=0.1
)

optimizer = tf.keras.optimizers.SGD(learning_rate=schedule_learning_rate)

model.compile(
    loss=tf.keras.losses.sparse_categorical_crossentropy,
    optimizer=optimizer,
    metrics=["accuracy"]
)

history = model.fit(X_train, y_train, epochs=15, validation_split=0.1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


Performance Scheduling is consideded one of the best learning rate schedulin.