In [1]:
# STEP 1 — Load & prepare CIFAR-10 (flatten to vectors)
import time
import numpy as np
from tensorflow import keras

# Load CIFAR-10: images (32x32x3), labels 0..9

In [2]:



(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
y_train = y_train.ravel()
y_test  = y_test.ravel()

# Flatten images to 3072-length vectors and scale to [0,1]
x_train = x_train.reshape(len(x_train), -1).astype("float32") / 255.0
x_test  = x_test.reshape(len(x_test),  -1).astype("float32") / 255.0

input_dim = x_train.shape[1]   # 32*32*3 = 3072
num_classes = 10
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 0us/step
(50000, 3072) (50000,) (10000, 3072) (10000,)


# STEP 2 — Baseline MLP (no regularization)

In [4]:

def build_baseline(input_dim, num_classes=10):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(512, activation="relu"),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

baseline = build_baseline(input_dim)
t0 = time.perf_counter()
hist_base = baseline.fit(
    x_train, y_train,
    epochs=30, batch_size=128, validation_split=0.1, verbose=0
)
time_base = time.perf_counter() - t0

# Best validation accuracy and its epoch (1-based)
val_accs = hist_base.history["val_accuracy"]
best_val_acc_base = float(np.max(val_accs))
best_epoch_base = int(np.argmax(val_accs) + 1)

test_acc_base = float(baseline.evaluate(x_test, y_test, verbose=0)[1])

print(f"BASELINE  | best_val_acc={best_val_acc_base:.4f} at epoch={best_epoch_base} | "
      f"test_acc={test_acc_base:.4f} | time={time_base:.1f}s")


BASELINE  | best_val_acc=0.5214 at epoch=29 | test_acc=0.5155 | time=535.4s


# STEP 3 — MLP with Dropout

In [5]:

def build_dropout(input_dim, num_classes=10, p=0.5):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(512, activation="relu"),
        keras.layers.Dropout(p),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(p),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

drop = build_dropout(input_dim, p=0.5)
t0 = time.perf_counter()
hist_drop = drop.fit(
    x_train, y_train,
    epochs=30, batch_size=128, validation_split=0.1, verbose=0
)
time_drop = time.perf_counter() - t0

val_accs = hist_drop.history["val_accuracy"]
best_val_acc_drop = float(np.max(val_accs))
best_epoch_drop = int(np.argmax(val_accs) + 1)
test_acc_drop = float(drop.evaluate(x_test, y_test, verbose=0)[1])

print(f"DROPOUT   | best_val_acc={best_val_acc_drop:.4f} at epoch={best_epoch_drop} | "
      f"test_acc={test_acc_drop:.4f} | time={time_drop:.1f}s")


DROPOUT   | best_val_acc=0.3552 at epoch=28 | test_acc=0.3471 | time=567.7s


# STEP 4 — MLP with L2 weight regularization (kernel_regularizer)

In [6]:

from tensorflow.keras import regularizers

def build_l2(input_dim, num_classes=10, l2=1e-4):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(512, activation="relu",
                           kernel_regularizer=regularizers.l2(l2)),
        keras.layers.Dense(256, activation="relu",
                           kernel_regularizer=regularizers.l2(l2)),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

l2m = build_l2(input_dim, l2=1e-4)
t0 = time.perf_counter()
hist_l2 = l2m.fit(
    x_train, y_train,
    epochs=30, batch_size=128, validation_split=0.1, verbose=0
)
time_l2 = time.perf_counter() - t0

val_accs = hist_l2.history["val_accuracy"]
best_val_acc_l2 = float(np.max(val_accs))
best_epoch_l2 = int(np.argmax(val_accs) + 1)
test_acc_l2 = float(l2m.evaluate(x_test, y_test, verbose=0)[1])

print(f"L2(1e-4)  | best_val_acc={best_val_acc_l2:.4f} at epoch={best_epoch_l2} | "
      f"test_acc={test_acc_l2:.4f} | time={time_l2:.1f}s")


L2(1e-4)  | best_val_acc=0.5330 at epoch=22 | test_acc=0.5167 | time=547.5s


# STEP 5 — Combine Dropout + L2 + EarlyStopping

In [7]:

def build_drop_l2(input_dim, num_classes=10, p=0.5, l2=1e-4):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(512, activation="relu",
                           kernel_regularizer=regularizers.l2(l2)),
        keras.layers.Dropout(p),
        keras.layers.Dense(256, activation="relu",
                           kernel_regularizer=regularizers.l2(l2)),
        keras.layers.Dropout(p),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

combo = build_drop_l2(input_dim, p=0.5, l2=1e-4)

early = keras.callbacks.EarlyStopping(
    monitor="val_accuracy", mode="max",
    patience=5, restore_best_weights=True
)

t0 = time.perf_counter()
hist_combo = combo.fit(
    x_train, y_train,
    epochs=50, batch_size=128, validation_split=0.1,
    callbacks=[early], verbose=0
)
time_combo = time.perf_counter() - t0

val_accs = hist_combo.history["val_accuracy"]
best_val_acc_combo = float(np.max(val_accs))
best_epoch_combo = int(np.argmax(val_accs) + 1)
test_acc_combo = float(combo.evaluate(x_test, y_test, verbose=0)[1])

print(f"DROP+L2+ES| best_val_acc={best_val_acc_combo:.4f} at epoch={best_epoch_combo} | "
      f"test_acc={test_acc_combo:.4f} | time={time_combo:.1f}s (early stopping)")


DROP+L2+ES| best_val_acc=0.3274 at epoch=6 | test_acc=0.3259 | time=206.7s (early stopping)


# STEP 6 — Final commparing tables

In [8]:

import pandas as pd

summary = pd.DataFrame([
    {"Model":"BASELINE",     "Best Val Acc":best_val_acc_base,  "Best Epoch":best_epoch_base,  "Test Acc":test_acc_base,  "Time (s)":time_base},
    {"Model":"DROPOUT",      "Best Val Acc":best_val_acc_drop,  "Best Epoch":best_epoch_drop,  "Test Acc":test_acc_drop,  "Time (s)":time_drop},
    {"Model":"L2 (1e-4)",    "Best Val Acc":best_val_acc_l2,    "Best Epoch":best_epoch_l2,    "Test Acc":test_acc_l2,    "Time (s)":time_l2},
    {"Model":"DROP+L2+ES",   "Best Val Acc":best_val_acc_combo, "Best Epoch":best_epoch_combo, "Test Acc":test_acc_combo, "Time (s)":time_combo},
])
print("\n=== CIFAR-10 DenseNN — Regularization Comparison ===")
print(summary)



=== CIFAR-10 DenseNN — Regularization Comparison ===
        Model  Best Val Acc  Best Epoch  Test Acc    Time (s)
0    BASELINE        0.5214          29    0.5155  535.355442
1     DROPOUT        0.3552          28    0.3471  567.685718
2   L2 (1e-4)        0.5330          22    0.5167  547.515501
3  DROP+L2+ES        0.3274           6    0.3259  206.710857


# q1 - How does adding dropout layers affect training vs validation accuracy?

In the results, Dropout alone lowered both validation and test accuracy compared to the baseline.

Baseline Val Acc: 0.5214, Test Acc: 0.5155

Dropout Val Acc: 0.3552, Test Acc: 0.3471

This shows that Dropout made the network underfit in this setup — the model lost capacity and could not learn strong patterns.

# q2- Does early stopping prevent wasted training time?

Yes. With Dropout + L2 + EarlyStopping, training stopped early at epoch 6 instead of running full 30 epochs.

Training time dropped to 206s compared to ~535–567s for other models.

However, accuracy was lower (Val Acc 0.3274, Test Acc 0.3259) because regularization was very strong.

So, early stopping saves time, but combined with heavy regularization it can lead to underfitting.

# q3- Can L2 weight regularization improve generalization?

Yes, slightly. the L2 model achieved the highest validation accuracy (0.5330) and a slightly better test accuracy (0.5167) than the baseline.

This suggests L2 weight decay improved generalization a little, by reducing overfitting without hurting performance.

# q4- How does model depth affect overfitting on CIFAR-10?

In the experiments, model depth was kept constant (2 hidden layers).

So the results here don’t show depth effects directly.

But in general: deeper models often overfit on CIFAR-10 with dense layers, which is why regularization techniques (Dropout, L2, EarlyStopping) are tested.

# In summary:

Dropout hurt performance in this dense model.

EarlyStopping reduced wasted time but combined with heavy regularization caused underfitting.

L2 gave the best overall balance (slight accuracy improvement, stable validation accuracy).

Model depth was not varied here, so no conclusion about depth from your run.