
# CIFAR-10 — Best-of-Both CNN (Merged & Improved)

This notebook merges the clean modular design and optimizer experiments from **CIFAR10_CNN_Assignment.ipynb** with the stronger regularization and analysis from **assignment8.ipynb**, then **improves** the model with:
- Keras **preprocessing/augmentation layers** (RandomFlip/Rotate/Zoom/Contrast)
- **Conv → BatchNorm → SiLU** (swish) blocks + **Dropout**
- **Label smoothing**, **AdamW** optimizer, **cosine decay** learning rate schedule
- **EarlyStopping**, **ReduceLROnPlateau**, **ModelCheckpoint**
- Clear **training curves**, **confusion matrix**, and **misclassified samples**

> Expect ~75–80%+ test accuracy with reasonable training (varies by seed/hyperparams).


In [None]:

# ✅ Setup
import os, math, time, itertools, numpy as np, tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers.schedules import CosineDecayRestarts

print("TF version:", tf.__version__)
np.random.seed(42); tf.random.set_seed(42)


## Load & Prepare CIFAR-10

In [None]:

# 📦 Load
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
num_classes = 10

# 🧹 Normalize to [0,1]
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

# One-hot for analysis convenience (loss uses label smoothing with categorical targets)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat  = to_categorical(y_test,  num_classes)

print("Train:", x_train.shape, y_train.shape, "| Test:", x_test.shape, y_test.shape)


## Data Augmentation (Keras Preprocessing Layers)

In [None]:

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
], name="augment")

# Optional: mixup/cutmix (advanced) – left out for simplicity


## Model — Conv/BN/SiLU Blocks + Dropout

In [None]:

def conv_block(x, filters, dropout=0.0):
    x = layers.Conv2D(filters, 3, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)  # SiLU
    if dropout > 0:
        x = layers.Dropout(dropout)(x)
    return x

def build_model(input_shape=(32,32,3), classes=10, base_width=64, dropout=0.25, dense_dropout=0.5):
    inp = layers.Input(shape=input_shape)
    x = data_augmentation(inp)

    # Stem
    x = conv_block(x, base_width, dropout=0.05)

    # Blocks
    for f in [base_width, base_width*2, base_width*2]:
        x = conv_block(x, f, dropout=dropout)
        x = conv_block(x, f, dropout=dropout)
        x = layers.MaxPooling2D()(x)
        x = layers.Dropout(0.1)(x)

    x = layers.Conv2D(base_width*4, 1, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(dense_dropout)(x)
    x = layers.Dense(256, activation="swish")(x)
    x = layers.Dropout(dense_dropout)(x)
    out = layers.Dense(classes, activation="softmax")(x)

    model = models.Model(inp, out, name="BestOfBoth_CIFAR10")
    return model

model = build_model()
model.summary()


## Optimizers & Compile Helper

In [None]:

def make_optimizer(opt_name="adamw", initial_lr=3e-3, weight_decay=1e-4, steps_per_epoch=391, t0_epochs=10):
    """
    opt_name: 'adamw' | 'adam' | 'rmsprop' | 'sgd'
    Uses CosineDecayRestarts by default for smoother convergence.
    """
    schedule = CosineDecayRestarts(initial_lr, first_decay_steps=max(steps_per_epoch*5, 200), t_mul=2.0, m_mul=0.8, alpha=0.0)

    if opt_name.lower() == "adamw":
        try:
            return tf.keras.optimizers.AdamW(learning_rate=schedule, weight_decay=weight_decay)
        except Exception:
            # Fallback for older TF
            return tf.keras.optimizers.Adam(learning_rate=schedule)
    if opt_name.lower() == "adam":
        return tf.keras.optimizers.Adam(learning_rate=schedule)
    if opt_name.lower() == "rmsprop":
        return tf.keras.optimizers.RMSprop(learning_rate=schedule, rho=0.9)
    if opt_name.lower() == "sgd":
        return tf.keras.optimizers.SGD(learning_rate=schedule, momentum=0.9, nesterov=True)
    raise ValueError("Unknown optimizer: " + opt_name)

def compile_model(model, opt_name="adamw", init_lr=3e-3):
    opt = make_optimizer(opt_name=opt_name, initial_lr=init_lr)
    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
        metrics=["accuracy"]
    )
    return model


## Training Utilities

In [None]:

def get_callbacks(run_name="best_of_both"):
    return [
        callbacks.EarlyStopping(monitor="val_accuracy", patience=10, restore_best_weights=True, verbose=1),
        callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=4, min_lr=1e-5, verbose=1),
        callbacks.ModelCheckpoint(filepath=f"{run_name}.keras", monitor="val_accuracy",
                                  save_best_only=True, verbose=1),
    ]

def plot_history(history):
    hist = history.history
    plt.figure(figsize=(6,4))
    plt.plot(hist["accuracy"], label="train_acc")
    plt.plot(hist["val_accuracy"], label="val_acc")
    plt.xlabel("Epoch"); plt.ylabel("Accuracy"); plt.title("Accuracy")
    plt.legend(); plt.grid(True); plt.show()

    plt.figure(figsize=(6,4))
    plt.plot(hist["loss"], label="train_loss")
    plt.plot(hist["val_loss"], label="val_loss")
    plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Loss")
    plt.legend(); plt.grid(True); plt.show()


## Fit a Strong Baseline (AdamW + Cosine LR)

In [None]:

BATCH_SIZE = 128
EPOCHS = 40  # EarlyStopping will cut this if overfitting
VAL_SPLIT = 0.1

strategy = tf.distribute.get_strategy()
with strategy.scope():
    model = build_model(dense_dropout=0.5)  # keep stronger final dropout
    model = compile_model(model, opt_name="adamw", init_lr=3e-3)

history = model.fit(
    x_train, y_train_cat,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_split=VAL_SPLIT,
    callbacks=get_callbacks("best_of_both_adamw"),
    verbose=2
)
plot_history(history)


## Evaluate on Test Set

In [None]:

test_loss, test_acc = model.evaluate(x_test, y_test_cat, verbose=0)
print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.4f}")


## Confusion Matrix & Misclassified Samples

In [None]:

class_names = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]

y_pred = model.predict(x_test, verbose=0).argmax(axis=1)
y_true = y_test.flatten()

cm = confusion_matrix(y_true, y_pred)
print("Classification report:\n")
print(classification_report(y_true, y_pred, target_names=class_names, digits=4))

# Plot confusion matrix
plt.figure(figsize=(7,6))
plt.imshow(cm, interpolation='nearest')
plt.title("Confusion Matrix")
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45)
plt.yticks(tick_marks, class_names)
plt.xlabel("Predicted"); plt.ylabel("True")
plt.tight_layout()
plt.show()

# Show a few misclassified examples
idxs = np.where(y_pred != y_true)[0][:25]
plt.figure(figsize=(8,8))
for i, idx in enumerate(idxs):
    plt.subplot(5,5,i+1)
    plt.imshow(x_test[idx])
    plt.title(f"T:{class_names[y_true[idx]]}\nP:{class_names[y_pred[idx]]}", fontsize=8)
    plt.axis("off")
plt.tight_layout()
plt.show()


## (Optional) Quick Optimizer Comparison (Short Runs)

In [None]:

def quick_trial(opt_name, epochs=8, run_name=None):
    with tf.distribute.get_strategy().scope():
        m = build_model()
        m = compile_model(m, opt_name=opt_name, init_lr=3e-3)
    h = m.fit(
        x_train, y_train_cat,
        batch_size=128, epochs=epochs, validation_split=0.1, verbose=0
    )
    val_acc = max(h.history["val_accuracy"])
    return val_acc

opts = ["adamw", "adam", "rmsprop", "sgd"]
results = {opt: float(quick_trial(opt, epochs=6)) for opt in opts}
print("Val accuracy (best across short runs):")
for k,v in results.items():
    print(f"{k:>7s}: {v:.4f}")


## Export Model Definition to Python Script

In [None]:

script_path = "/mnt/data/cifar10_best_of_both.py"
with open(script_path, "w") as f:
    f.write('''
import numpy as np, tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.optimizers.schedules import CosineDecayRestarts
from tensorflow.keras.utils import to_categorical

def conv_block(x, filters, dropout=0.0):
    x = layers.Conv2D(filters, 3, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)
    if dropout > 0:
        x = layers.Dropout(dropout)(x)
    return x

def build_model(input_shape=(32,32,3), classes=10, base_width=64, dropout=0.25, dense_dropout=0.5):
    data_augmentation = tf.keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.05),
        layers.RandomZoom(0.1),
        layers.RandomContrast(0.1),
    ])
    inp = layers.Input(shape=input_shape)
    x = data_augmentation(inp)
    x = conv_block(x, base_width, dropout=0.05)
    for f in [base_width, base_width*2, base_width*2]:
        x = conv_block(x, f, dropout=dropout)
        x = conv_block(x, f, dropout=dropout)
        x = layers.MaxPooling2D()(x)
        x = layers.Dropout(0.1)(x)
    x = layers.Conv2D(base_width*4, 1, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(dense_dropout)(x)
    x = layers.Dense(256, activation="swish")(x)
    x = layers.Dropout(dense_dropout)(x)
    out = layers.Dense(classes, activation="softmax")(x)
    return models.Model(inp, out, name="BestOfBoth_CIFAR10")

def make_optimizer(opt_name="adamw", initial_lr=3e-3, weight_decay=1e-4, steps_per_epoch=391, t0_epochs=10):
    schedule = CosineDecayRestarts(initial_lr, first_decay_steps=max(steps_per_epoch*5, 200), t_mul=2.0, m_mul=0.8, alpha=0.0)
    if opt_name.lower() == "adamw":
        try:
            return tf.keras.optimizers.AdamW(learning_rate=schedule, weight_decay=weight_decay)
        except Exception:
            return tf.keras.optimizers.Adam(learning_rate=schedule)
    if opt_name.lower() == "adam":
        return tf.keras.optimizers.Adam(learning_rate=schedule)
    if opt_name.lower() == "rmsprop":
        return tf.keras.optimizers.RMSprop(learning_rate=schedule, rho=0.9)
    if opt_name.lower() == "sgd":
        return tf.keras.optimizers.SGD(learning_rate=schedule, momentum=0.9, nesterov=True)
    raise ValueError("Unknown optimizer: " + opt_name)

def compile_model(model, opt_name="adamw", init_lr=3e-3):
    opt = make_optimizer(opt_name=opt_name, initial_lr=init_lr)
    model.compile(optimizer=opt,
                  loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
                  metrics=["accuracy"])
    return model
''')
print("Saved script to:", script_path)
