# Handling Overfitting with CIFAR-10 (Images)

####  I want to train a feed-forward neural network (Dense Neural Network) on the CIFAR-10 dataset (small color images of 10 object classes), So that I can explore how regularization techniques (dropout, early stopping, weight decay) help reduce overfitting on more complex datasets.

In [2]:

import os, json, math, random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# ---------------------------
# Reproducibility
# ---------------------------
SEED = 1337
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

# ---------------------------
# Config
# ---------------------------
OUTDIR = "mlp_cifar10_runs"
os.makedirs(OUTDIR, exist_ok=True)

BATCH_SIZE = 256
EPOCHS = 60  # long enough to see overfitting for baseline MLP
VAL_SPLIT = 0.2
LR = 1e-3

In [4]:
# ---------------------------
# Data
# ---------------------------
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
num_classes = 10

# Normalize to [0,1] and flatten for MLP
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0
x_train = x_train.reshape((x_train.shape[0], -1))
x_test  = x_test.reshape((x_test.shape[0], -1))

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test  = keras.utils.to_categorical(y_test, num_classes)

input_dim = x_train.shape[1]


In [5]:
# ---------------------------
# Model builder
# ---------------------------
def build_mlp(
    input_dim:int,
    hidden_units=(512, 256),
    dropout_rate:float=0.0,
    l2_lambda:float=0.0,
    use_batchnorm:bool=False,
    num_classes:int=10,
):
    reg = regularizers.l2(l2_lambda) if l2_lambda and l2_lambda > 0 else None
    inputs = keras.Input(shape=(input_dim,))
    x = inputs
    for i, hu in enumerate(hidden_units):
        x = layers.Dense(hu, activation=None, kernel_regularizer=reg, name=f"dense_{i}")(x)
        if use_batchnorm:
            x = layers.BatchNormalization(name=f"bn_{i}")(x)
        x = layers.ReLU(name=f"relu_{i}")(x)
        if dropout_rate and dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name=f"dropout_{i}")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="logits")(x)
    model = keras.Model(inputs, outputs)
    return model

def compile_model(model, lr=LR):
    opt = keras.optimizers.Adam(learning_rate=lr)
    model.compile(
        optimizer=opt,
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model


In [6]:
# ---------------------------
# Training helper
# ---------------------------
def train_experiment(
    run_name:str,
    hidden_units=(512,256),
    dropout_rate=0.0,
    l2_lambda=0.0,
    use_batchnorm=False,
    use_early_stopping=False,
    patience=8,
    max_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    lr=LR,
):
    print(f"\n=== RUN: {run_name} ===")
    print(json.dumps({
        "hidden_units": hidden_units,
        "dropout_rate": dropout_rate,
        "l2_lambda": l2_lambda,
        "batchnorm": use_batchnorm,
        "early_stopping": use_early_stopping,
        "epochs": max_epochs,
        "lr": lr
    }, indent=2))

    model = build_mlp(
        input_dim=input_dim,
        hidden_units=hidden_units,
        dropout_rate=dropout_rate,
        l2_lambda=l2_lambda,
        use_batchnorm=use_batchnorm,
        num_classes=num_classes
    )
    compile_model(model, lr)

    callbacks = []
    if use_early_stopping:
        es = keras.callbacks.EarlyStopping(
            monitor="val_accuracy",
            patience=patience,
            mode="max",
            restore_best_weights=True,
            verbose=1
        )
        callbacks.append(es)

    history = model.fit(
        x_train, y_train,
        validation_split=VAL_SPLIT,
        epochs=max_epochs,
        batch_size=batch_size,
        callbacks=callbacks,
        verbose=2
    )

    # Evaluate on held-out test set
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)

    # Find best val accuracy and epoch
    val_acc = history.history["val_accuracy"]
    best_epoch = int(np.argmax(val_acc)) + 1  # 1-based
    best_val_acc = float(np.max(val_acc))

    print(f"[{run_name}] Best val_acc={best_val_acc:.4f} at epoch {best_epoch}, Test acc={test_acc:.4f}")

    # Save plots
    fig1 = plt.figure()
    plt.plot(history.history["accuracy"], label="train_acc")
    plt.plot(history.history["val_accuracy"], label="val_acc")
    plt.xlabel("Epoch"); plt.ylabel("Accuracy"); plt.legend(); plt.title(run_name + " - Accuracy")
    acc_path = os.path.join(OUTDIR, f"{run_name}_accuracy.png")
    fig1.savefig(acc_path, dpi=160, bbox_inches="tight")
    plt.close(fig1)

    fig2 = plt.figure()
    plt.plot(history.history["loss"], label="train_loss")
    plt.plot(history.history["val_loss"], label="val_loss")
    plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.title(run_name + " - Loss")
    loss_path = os.path.join(OUTDIR, f"{run_name}_loss.png")
    fig2.savefig(loss_path, dpi=160, bbox_inches="tight")
    plt.close(fig2)

    # Return summary row and history dict
    row = {
        "run": run_name,
        "hidden_units": str(hidden_units),
        "dropout_rate": dropout_rate,
        "l2_lambda": l2_lambda,
        "batchnorm": use_batchnorm,
        "early_stopping": use_early_stopping,
        "epochs_run": len(history.history["loss"]),
        "best_epoch": best_epoch,
        "best_val_acc": best_val_acc,
        "test_acc": float(test_acc),
        "acc_plot": acc_path,
        "loss_plot": loss_path
    }
    return row, history.history


In [7]:
# ---------------------------
# Experiments
# ---------------------------
results = []
histories = {}

# 1) Baseline (no regularization)
row, hist = train_experiment(
    run_name="baseline_mlp",
    hidden_units=(512, 256),
    dropout_rate=0.0,
    l2_lambda=0.0,
    use_batchnorm=False,
    use_early_stopping=False,
    max_epochs=EPOCHS
)
results.append(row); histories[row["run"]] = hist

# 2) + Dropout
row, hist = train_experiment(
    run_name="mlp_dropout",
    hidden_units=(512, 256),
    dropout_rate=0.5,    # strong regularization for MLP on images
    l2_lambda=0.0,
    use_batchnorm=False,
    use_early_stopping=False,
    max_epochs=EPOCHS
)
results.append(row); histories[row["run"]] = hist


=== RUN: baseline_mlp ===
{
  "hidden_units": [
    512,
    256
  ],
  "dropout_rate": 0.0,
  "l2_lambda": 0.0,
  "batchnorm": false,
  "early_stopping": false,
  "epochs": 60,
  "lr": 0.001
}
Epoch 1/60
157/157 - 4s - 22ms/step - accuracy: 0.2976 - loss: 1.9885 - val_accuracy: 0.3107 - val_loss: 1.9062
Epoch 2/60
157/157 - 3s - 16ms/step - accuracy: 0.3841 - loss: 1.7410 - val_accuracy: 0.3542 - val_loss: 1.8183
Epoch 3/60
157/157 - 3s - 16ms/step - accuracy: 0.4093 - loss: 1.6615 - val_accuracy: 0.3828 - val_loss: 1.7242
Epoch 4/60
157/157 - 3s - 18ms/step - accuracy: 0.4332 - loss: 1.5991 - val_accuracy: 0.4095 - val_loss: 1.6684
Epoch 5/60
157/157 - 3s - 19ms/step - accuracy: 0.4531 - loss: 1.5475 - val_accuracy: 0.4197 - val_loss: 1.6402
Epoch 6/60
157/157 - 3s - 16ms/step - accuracy: 0.4657 - loss: 1.5082 - val_accuracy: 0.4281 - val_loss: 1.6154
Epoch 7/60
157/157 - 4s - 23ms/step - accuracy: 0.4756 - loss: 1.4749 - val_accuracy: 0.4383 - val_loss: 1.5808
Epoch 8/60
157/157 - 

In [8]:
# 3) + Early Stopping (same as dropout config but with ES)
row, hist = train_experiment(
    run_name="mlp_dropout_earlystop",
    hidden_units=(512, 256),
    dropout_rate=0.5,
    l2_lambda=0.0,
    use_batchnorm=False,
    use_early_stopping=True,
    patience=8,
    max_epochs=20
)
results.append(row); histories[row["run"]] = hist


=== RUN: mlp_dropout_earlystop ===
{
  "hidden_units": [
    512,
    256
  ],
  "dropout_rate": 0.5,
  "l2_lambda": 0.0,
  "batchnorm": false,
  "early_stopping": true,
  "epochs": 20,
  "lr": 0.001
}
Epoch 1/20
157/157 - 6s - 35ms/step - accuracy: 0.1995 - loss: 2.1528 - val_accuracy: 0.3011 - val_loss: 1.9429
Epoch 2/20
157/157 - 5s - 34ms/step - accuracy: 0.2454 - loss: 2.0188 - val_accuracy: 0.3142 - val_loss: 1.9302
Epoch 3/20
157/157 - 5s - 31ms/step - accuracy: 0.2612 - loss: 1.9821 - val_accuracy: 0.3333 - val_loss: 1.9076
Epoch 4/20
157/157 - 5s - 29ms/step - accuracy: 0.2723 - loss: 1.9528 - val_accuracy: 0.3446 - val_loss: 1.8874
Epoch 5/20
157/157 - 5s - 34ms/step - accuracy: 0.2784 - loss: 1.9367 - val_accuracy: 0.3401 - val_loss: 1.8972
Epoch 6/20
157/157 - 5s - 33ms/step - accuracy: 0.2835 - loss: 1.9328 - val_accuracy: 0.3397 - val_loss: 1.8945
Epoch 7/20
157/157 - 4s - 28ms/step - accuracy: 0.2938 - loss: 1.9087 - val_accuracy: 0.3379 - val_loss: 1.8768
Epoch 8/20
15

In [9]:
# 4) + L2 (weight decay) — compare to dropout
row, hist = train_experiment(
    run_name="mlp_l2",
    hidden_units=(512, 256),
    dropout_rate=0.0,
    l2_lambda=1e-4,   # typical small L2
    use_batchnorm=False,
    use_early_stopping=True,  # ES works well with L2 too
    patience=8,
    max_epochs=20
)
results.append(row); histories[row["run"]] = hist


=== RUN: mlp_l2 ===
{
  "hidden_units": [
    512,
    256
  ],
  "dropout_rate": 0.0,
  "l2_lambda": 0.0001,
  "batchnorm": false,
  "early_stopping": true,
  "epochs": 20,
  "lr": 0.001
}
Epoch 1/20
157/157 - 10s - 62ms/step - accuracy: 0.2830 - loss: 2.1340 - val_accuracy: 0.3277 - val_loss: 1.9494
Epoch 2/20
157/157 - 6s - 35ms/step - accuracy: 0.3743 - loss: 1.8338 - val_accuracy: 0.3734 - val_loss: 1.8215
Epoch 3/20
157/157 - 7s - 44ms/step - accuracy: 0.4099 - loss: 1.7321 - val_accuracy: 0.3932 - val_loss: 1.7620
Epoch 4/20
157/157 - 6s - 36ms/step - accuracy: 0.4283 - loss: 1.6685 - val_accuracy: 0.4102 - val_loss: 1.7161
Epoch 5/20
157/157 - 7s - 43ms/step - accuracy: 0.4506 - loss: 1.6096 - val_accuracy: 0.4209 - val_loss: 1.6997
Epoch 6/20
157/157 - 7s - 46ms/step - accuracy: 0.4642 - loss: 1.5675 - val_accuracy: 0.4307 - val_loss: 1.6890
Epoch 7/20
157/157 - 6s - 38ms/step - accuracy: 0.4744 - loss: 1.5351 - val_accuracy: 0.4324 - val_loss: 1.6838
Epoch 8/20
157/157 - 6s 

In [10]:
# 5) Deeper model (more capacity  more overfitting risk)
row, hist = train_experiment(
    run_name="mlp_deeper",
    hidden_units=(1024, 512, 256, 128),
    dropout_rate=0.5,     # keep some regularization so it trains
    l2_lambda=1e-4,       # combine L2 to study effect on depth
    use_batchnorm=False,
    use_early_stopping=True,
    patience=10,
    max_epochs=20
)
results.append(row); histories[row["run"]] = hist


=== RUN: mlp_deeper ===
{
  "hidden_units": [
    1024,
    512,
    256,
    128
  ],
  "dropout_rate": 0.5,
  "l2_lambda": 0.0001,
  "batchnorm": false,
  "early_stopping": true,
  "epochs": 20,
  "lr": 0.001
}
Epoch 1/20
157/157 - 17s - 106ms/step - accuracy: 0.1287 - loss: 2.5435 - val_accuracy: 0.1795 - val_loss: 2.3233
Epoch 2/20
157/157 - 14s - 92ms/step - accuracy: 0.1769 - loss: 2.3130 - val_accuracy: 0.2356 - val_loss: 2.2226
Epoch 3/20
157/157 - 12s - 76ms/step - accuracy: 0.2012 - loss: 2.2225 - val_accuracy: 0.2515 - val_loss: 2.1473
Epoch 4/20
157/157 - 13s - 80ms/step - accuracy: 0.2151 - loss: 2.1678 - val_accuracy: 0.2485 - val_loss: 2.1066
Epoch 5/20
157/157 - 14s - 87ms/step - accuracy: 0.2271 - loss: 2.1295 - val_accuracy: 0.2565 - val_loss: 2.1022
Epoch 6/20
157/157 - 14s - 91ms/step - accuracy: 0.2309 - loss: 2.0983 - val_accuracy: 0.2422 - val_loss: 2.0891
Epoch 7/20
157/157 - 20s - 130ms/step - accuracy: 0.2373 - loss: 2.0660 - val_accuracy: 0.2580 - val_loss: 

In [11]:
# ---------------------------
# Save summary & histories
# ---------------------------
df = pd.DataFrame(results)
csv_path = os.path.join(OUTDIR, "results_summary.csv")
df.to_csv(csv_path, index=False)

with open(os.path.join(OUTDIR, "histories.json"), "w") as f:
    json.dump(histories, f)

print("\n=== SUMMARY (Acceptance Criteria) ===")
# 1) Baseline model without regularization
base = df[df["run"]=="baseline_mlp"].iloc[0]
print(f"Baseline best val_acc={base['best_val_acc']:.4f} at epoch {base['best_epoch']} (epochs run={base['epochs_run']})")


=== SUMMARY (Acceptance Criteria) ===
Baseline best val_acc=0.5045 at epoch 27 (epochs run=60)


In [12]:
# 2) Add dropout and compare
drop = df[df["run"]=="mlp_dropout"].iloc[0]
print(f"Dropout best val_acc={drop['best_val_acc']:.4f} at epoch {drop['best_epoch']}")

Dropout best val_acc=0.4008 at epoch 41


In [13]:
# 3) Early stopping best epoch recorded
drop_es = df[df["run"]=="mlp_dropout_earlystop"].iloc[0]
print(f"Dropout+EarlyStopping best val_acc={drop_es['best_val_acc']:.4f} at epoch {drop_es['best_epoch']} (epochs run={drop_es['epochs_run']})")

# 4) L2 weight regularization
l2r = df[df["run"]=="mlp_l2"].iloc[0]
print(f"L2 best val_acc={l2r['best_val_acc']:.4f} at epoch {l2r['best_epoch']}")

# 5) Depth effects (deeper model)
deep = df[df["run"]=="mlp_deeper"].iloc[0]
print(f"Deeper model best val_acc={deep['best_val_acc']:.4f} at epoch {deep['best_epoch']} (epochs run={deep['epochs_run']})")

print("\nSaved:")
print(f"- Per-run accuracy/loss plots: {OUTDIR}/*_accuracy.png, *_loss.png")
print(f"- CSV summary: {csv_path}")
print(f"- Full histories JSON: {os.path.join(OUTDIR, 'histories.json')}")

Dropout+EarlyStopping best val_acc=0.3822 at epoch 20 (epochs run=20)
L2 best val_acc=0.4911 at epoch 20
Deeper model best val_acc=0.2710 at epoch 9 (epochs run=19)

Saved:
- Per-run accuracy/loss plots: mlp_cifar10_runs/*_accuracy.png, *_loss.png
- CSV summary: mlp_cifar10_runs\results_summary.csv
- Full histories JSON: mlp_cifar10_runs\histories.json
