In [1]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models
import time

# Load and preprocess CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize pixel values
y_train, y_test = tf.keras.utils.to_categorical(y_train, 10), tf.keras.utils.to_categorical(y_test, 10)

def create_shallow_model():
    model = models.Sequential([
        layers.Input(shape=(32, 32, 3)),
        layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.3),  # Add dropout to prevent overfitting
        layers.Flatten(),
        layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        layers.Dropout(0.3),  # Dropout in Dense layer
        layers.Dense(10, activation='softmax')
    ])
    return model


# Training function
def train_model(model, optimizer, epochs, use_scheduler=False, schedule_func=None):
    if use_scheduler:
        lr_schedule = tf.keras.callbacks.LearningRateScheduler(schedule_func)
        callbacks = [lr_schedule]
    else:
        callbacks = []

    start_time = time.time()
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=epochs, batch_size=64, 
                        validation_data=(x_test, y_test), callbacks=callbacks, verbose=1)
    end_time = time.time()
    
    # Extract training and validation accuracy
    training_accuracy = history.history['accuracy']
    validation_accuracy = history.history['val_accuracy']
    return end_time - start_time, training_accuracy, validation_accuracy

# Exponential Decay Scheduler
def exponential_decay_schedule(epoch):
    initial_lr = 0.01
    decay_rate = 0.1
    decay_steps = 10
    lr = initial_lr * tf.math.exp(-decay_rate * epoch / decay_steps)
    return float(lr)

# Step Decay Scheduler
def step_decay_schedule(epoch):
    initial_lr = 0.01
    drop_factor = 0.5
    epochs_drop = 5
    lr = initial_lr * (drop_factor ** (epoch // epochs_drop))
    return float(lr)

# Experiment 1: SGD with Exponential Decay
model_exp_decay = create_shallow_model()
optimizer_exp_decay = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
time_exp_decay, train_acc_exp_decay, val_acc_exp_decay = train_model(
    model_exp_decay, optimizer_exp_decay, epochs=20, use_scheduler=True, schedule_func=exponential_decay_schedule
)

# Experiment 2: SGD with Step Decay
model_step_decay = create_shallow_model()
optimizer_step_decay = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
time_step_decay, train_acc_step_decay, val_acc_step_decay = train_model(
    model_step_decay, optimizer_step_decay, epochs=20, use_scheduler=True, schedule_func=step_decay_schedule
)

# Print results
print(f"Training Time - Exponential Decay: {time_exp_decay:.2f}s")
print(f"Training Time - Step Decay: {time_step_decay:.2f}s")

# Evaluate models
_, accuracy_exp_decay = model_exp_decay.evaluate(x_test, y_test, verbose=0)
_, accuracy_step_decay = model_step_decay.evaluate(x_test, y_test, verbose=0)

print(f"Test Accuracy - Exponential Decay: {accuracy_exp_decay * 100:.2f}%")
print(f"Test Accuracy - Step Decay: {accuracy_step_decay * 100:.2f}%")

# Print training and validation accuracy for Exponential Decay
print("\nExponential Decay:")
for epoch in range(20):
    print(f"Epoch {epoch + 1}: Training Accuracy = {train_acc_exp_decay[epoch] * 100:.2f}%, Validation Accuracy = {val_acc_exp_decay[epoch] * 100:.2f}%")

# Print training and validation accuracy for Step Decay
print("\nStep Decay:")
for epoch in range(20):
    print(f"Epoch {epoch + 1}: Training Accuracy = {train_acc_step_decay[epoch] * 100:.2f}%, Validation Accuracy = {val_acc_step_decay[epoch] * 100:.2f}%")


Epoch 1/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.2822 - loss: 3.4676 - val_accuracy: 0.4756 - val_loss: 1.7540 - learning_rate: 0.0100
Epoch 2/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.4591 - loss: 1.7944 - val_accuracy: 0.5059 - val_loss: 1.6213 - learning_rate: 0.0099
Epoch 3/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 19ms/step - accuracy: 0.4821 - loss: 1.6997 - val_accuracy: 0.5025 - val_loss: 1.6379 - learning_rate: 0.0098
Epoch 4/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 19ms/step - accuracy: 0.4872 - loss: 1.6843 - val_accuracy: 0.5275 - val_loss: 1.6027 - learning_rate: 0.0097
Epoch 5/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.4928 - loss: 1.6825 - val_accuracy: 0.5038 - val_loss: 1.6628 - learning_rate: 0.0096
Epoch 6/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37