# Home assignment

* Author: Romain Tavenard (@rtavenar)
* License: CC-BY-NC-SA

A home assignment from a course on Deep Learning at EDHEC.

## Problem statement

The dataset we are interested in here is called "CIFAR10". It is described [in this page](https://keras.io/api/datasets/cifar10/).

You should load the data, **select only 5,000 samples out of the total 50,000 ones**, and preprocess it if needed.
You should compare several candidate neural network architectures, and make a decision about which is best for the task at hand.
You should be explicit about the indicator(s) you base your decision on.

Finally, as a bonus, you could try to evaluate whether it is better to:
* train a model from scratch on this dataset alone ;
* use a large model that was pre-trained on ImageNet ;
* pre-train a model on another dataset called [CIFAR100](https://keras.io/api/datasets/cifar100/) and fine-tune it on CIFAR10.

## Deadline

Deadline for this home assignment is February 28th, 2025.
You should use the link on moodle to hand in your assignment.
A single ipynb file should be provided,
with execution traces.
This assignment is to be done **by groups of three, at most** and names of all students should be included in the file name.

## Data loading

You can use the dedicated `keras` utility to load this dataset: <https://keras.io/api/datasets/cifar10/>

## Loading the Data

In [1]:
import os
import keras
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import InputLayer, Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.utils import to_categorical
import numpy as np
from keras.utils import to_categorical


In [3]:
os.environ["KERAS_BACKEND"] = "torch"
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

#### Shaping the dataset CIFAR10

In [9]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
assert x_train.shape == (50000, 32, 32, 3)
assert x_test.shape == (10000, 32, 32, 3)
assert y_train.shape == (50000, 1)
assert y_test.shape == (10000, 1)

In [10]:
num_train_samples = 4500
num_test_samples = 500

np.random.seed(42)

train_indices = np.random.choice(x_train.shape[0], num_train_samples, replace=False)
test_indices = np.random.choice(x_test.shape[0], num_test_samples, replace=False)

x_train = x_train[train_indices]
y_train = y_train[train_indices]
x_test = x_test[test_indices]
y_test = y_test[test_indices]

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

x_train = x_train / 255
x_test = x_test / 255

In [11]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(4500, 32, 32, 3)
(500, 32, 32, 3)
(4500, 10)
(500, 10)


#### Keras CIFAR100

#### Shaping the dataset CIFAR100

In [12]:
(x_train_100, y_train_100), (x_test_100, y_test_100) = keras.datasets.cifar100.load_data()
assert x_train_100.shape == (50000, 32, 32, 3)
assert x_test_100.shape == (10000, 32, 32, 3)
assert y_train_100.shape == (50000, 1)
assert y_test_100.shape == (10000, 1)

In [13]:
y_train_100 = to_categorical(y_train_100)
y_test_100 = to_categorical(y_test_100)

x_train_100 = x_train_100 / 255
x_test_100 = x_test_100 / 255

# Models

## Simple CNN Models

### Baseline Model

In [14]:
def baseline_model():
    model = Sequential([
        InputLayer(input_shape=(32, 32, 3)),
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=2),
        Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=2),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

### Baseline Model Including Data Augmentation

In [15]:
def augmented_baseline_model():
    data_augmentation = Sequential([
        RandomFlip("horizontal"),
        RandomRotation(0.1),
        RandomZoom(0.2),
    ])

    model = Sequential([
        InputLayer(input_shape=(32, 32, 3)),
        data_augmentation,
        Conv2D(filters=16, kernel_size=(2, 2), activation='relu'),
        MaxPooling2D(pool_size=4),
        Conv2D(filters=16, kernel_size=(2, 2), activation='relu'),
        MaxPooling2D(pool_size=2),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

### Le Net Model

In [16]:
def lenet_model():
    """LeNet architecture implementation"""
    model = Sequential([
        # First convolutional layer
        Conv2D(6, kernel_size=(5, 5), padding='same', activation='relu', input_shape=(32, 32, 3)),
        MaxPooling2D(pool_size=(2, 2)),

        # Second convolutional layer
        Conv2D(16, kernel_size=(5, 5), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),

        # Fully connected layers
        Flatten(),
        Dense(120, activation='relu'),
        Dense(84, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])

## Deeper CNNs

### Basic 3 Block Deep CNN

In [17]:
def deep_cnn_model():
    model = Sequential([
        InputLayer(input_shape=(32, 32, 3)),
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])

### Basic 3 Block Deep CNN with regularization

In [18]:
# CNN with Global Average Pooling and L2 regularization
def regularized_deep_cnn():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu',
                               kernel_regularizer=tf.keras.regularizers.l2(1e-4),
                               input_shape=(32, 32, 3)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu',
                              kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.3),

        tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu',
                              kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu',
                              kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.4),

        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    return model

## Transfer Learning Models

### ResNet50 Imagenet, only tuning classification layers

In [19]:
def resnet50_false_model(x_train):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=x_train[0].shape)
    for layer in base_model.layers:
        layer.trainable = False

    model = Sequential([
        base_model,
        Flatten(),
        Dense(10, activation="softmax")
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

### ResNet50 Imagenet, tuning all layers

In [20]:
def resnet50_true_model(x_train):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=x_train[0].shape)
    for layer in base_model.layers:
        layer.trainable = True

    model = Sequential([
        base_model,
        Flatten(),
        Dense(10, activation="softmax")
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

### CIFAR100 Pretraining, CIFAR10 Fine Tuning

In [21]:
def pretrain_and_finetune(x_train_100, y_train_100, x_test_100, y_test_100, x_train, y_train, x_test, y_test):
    # Early stopping callback
    cb = EarlyStopping(monitor="val_accuracy", patience=10, restore_best_weights=True)

    # Pretrain on CIFAR-100
    model = Sequential([
        InputLayer(input_shape=(32, 32, 3)),
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=2),
        Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=2),
        Flatten(),
        Dense(256, activation='relu'),
        Dense(100, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train_100, y_train_100, epochs=100000, validation_data=(x_test_100, y_test_100), callbacks=[cb])
    model.save('cifar100_pretrained_model.h5')

    # Fine-tune on CIFAR-10
    model_cifar100 = load_model('cifar100_pretrained_model.h5')
    feature_extractor = model_cifar100.layers[-3].output
    output = Dense(10, activation='softmax')(feature_extractor)

    model_cifar10 = Model(inputs=model_cifar100.input, outputs=output)
    for layer in model_cifar100.layers:
        layer.trainable = False

    model_cifar10.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model_cifar10

### Deep NN, CIFAR100 Pretraining, CIFAR10 Fine Tuning

In [22]:
def pretrain_and_finetune_deep(x_train_100, y_train_100, x_test_100, y_test_100, x_train, y_train, x_test, y_test):
    # Early stopping callback
    cb = EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True)

    # Pretrain on CIFAR-100 with deeper architecture
    model = Sequential([
        InputLayer(input_shape=(32, 32, 3)),
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(100, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train_100, y_train_100, epochs=100000, validation_data=(x_test_100, y_test_100), callbacks=[cb])
    model.save('cifar100_pretrained_model2.h5')

    # Fine-tune on CIFAR-10
    model_cifar100 = load_model('cifar100_pretrained_model2.h5')
    feature_extractor = model_cifar100.layers[-3].output
    output = Dense(10, activation='softmax')(feature_extractor)

    model_cifar10 = Model(inputs=model_cifar100.input, outputs=output)
    for layer in model_cifar100.layers:
        layer.trainable = False

    model_cifar10.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model_cifar10.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

    return model_cifar10

# Model Training

## Function

In [23]:
def train_models(models, x_train, y_train, x_val, y_val):
    results = {}
    for name, model in models.items():
        print(f"\nTraining {name}...")
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        callbacks = [
            EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
        ]

        history = model.fit(
            x_train, y_train, batch_size=64, epochs=100,
            validation_data=(x_val, y_val),
            callbacks=callbacks, verbose=1
        )
        results[name] = {'history': history}
    return results

## Training

In [24]:
# Define models
models = {
    "Baseline CNN": baseline_model(),
    "Augmented CNN": augmented_baseline_model(),
    "Le Net Model": lenet_model(),
    "Deeper CNN": deep_cnn_model(),
    "Regularized Deeper CNN": regularized_deep_cnn(),
    "Res Net 50 False": resnet50_false_model(x_train),
    "Res Net 50 True": resnet50_true_model(x_train),
}

# Run the training, plotting, and evaluation
results = train_models(models, x_train, y_train, x_val, y_val)





NameError: name 'augmented_model' is not defined

# Model Evaluation

## Plotting

#### Function

In [None]:
def plot_learning_curves(results):
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    for name, result in results.items():
        plt.plot(result['history'].history['accuracy'], label=f'{name} (Train)')
        plt.plot(result['history'].history['val_accuracy'], label=f'{name} (Val)')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()

    plt.subplot(1, 2, 2)
    for name, result in results.items():
        plt.plot(result['history'].history['loss'], label=f'{name} (Train)')
        plt.plot(result['history'].history['val_loss'], label=f'{name} (Val)')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()

    plt.tight_layout()
    plt.show()



#### Plots

In [None]:
plot_learning_curves(results)


## Testing

#### Function

In [None]:
def evaluate_models(models, results, x_test, y_test):
    print("\nModel Evaluation:")
    best_model_name, best_acc = None, 0

    for name, model in models.items():
        test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
        results[name]['test_acc'] = test_acc
        print(f"{name} - Test Accuracy: {test_acc:.4f}")
        if test_acc > best_acc:
            best_model_name, best_acc = name, test_acc

    print(f"\nBest model: {best_model_name} with test accuracy of {best_acc:.4f}")


#### Tests

In [None]:
evaluate_models(models, results, x_test, y_test)