In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import cifar10
import numpy as np
import cv2
import random

# Load and normalize data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train.astype("float32") / 255.0, x_test.astype("float32") / 255.0

mean = np.array([0.4914, 0.4822, 0.4465])
std = np.array([0.2023, 0.1994, 0.2010])
x_train = (x_train - mean) / std
x_test = (x_test - mean) / std

x_val, y_val = x_train[-5000:], y_train[-5000:]
x_train, y_train = x_train[:-5000], y_train[:-5000]

# One-hot encode with label smoothing
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_val = tf.keras.utils.to_categorical(y_val, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Augmentation: CutMix + light color jitter
def color_jitter(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    hsv = hsv.astype(np.float32)
    hsv[..., 1] *= np.random.uniform(0.9, 1.1)  # saturation
    hsv[..., 2] *= np.random.uniform(0.9, 1.1)  # brightness
    hsv = np.clip(hsv, 0, 255).astype(np.uint8)
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)

def cutmix(images, labels, alpha=1.0):
    batch_size = images.shape[0]
    indices = tf.random.shuffle(tf.range(batch_size))
    shuffled_images = tf.gather(images, indices)
    shuffled_labels = tf.gather(labels, indices)

    lam = np.random.beta(alpha, alpha)
    rx, ry = np.random.randint(32), np.random.randint(32)
    rw, rh = int(32 * np.sqrt(1 - lam)), int(32 * np.sqrt(1 - lam))
    x1, y1 = np.clip(rx - rw // 2, 0, 32), np.clip(ry - rh // 2, 0, 32)
    x2, y2 = np.clip(rx + rw // 2, 0, 32), np.clip(ry + rh // 2, 0, 32)

    images[:, y1:y2, x1:x2, :] = shuffled_images[:, y1:y2, x1:x2, :]
    lam = 1 - ((x2 - x1) * (y2 - y1) / (32 * 32))
    labels = lam * labels + (1 - lam) * shuffled_labels
    return images, labels

def augment_batch(images, labels):
    batch = []
    for img in images:
        img = (img * std + mean) * 255.0
        img = img.astype(np.uint8)
        img = tf.image.random_flip_left_right(img)
        img = tf.image.pad_to_bounding_box(img, 4, 4, 40, 40)
        img = tf.image.random_crop(img, [32, 32, 3])
        img = color_jitter(img.numpy())
        img = (img.astype(np.float32) / 255.0 - mean) / std
        batch.append(img)
    batch = np.stack(batch)
    return cutmix(batch, labels)

def data_generator(x, y, batch_size):
    while True:
        idx = np.random.permutation(len(x))
        x, y = x[idx], y[idx]
        for i in range(0, len(x), batch_size):
            images, labels = x[i:i+batch_size], y[i:i+batch_size]
            yield augment_batch(images, labels)

# Model
inputs = layers.Input(shape=(32, 32, 3))
x = layers.Conv2D(16, 3, padding="same", use_bias=False)(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)

for _ in range(3):
    shortcut = x
    y = layers.Conv2D(16, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(16, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

shortcut = layers.Conv2D(32, 1, strides=2, use_bias=False)(x)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.Conv2D(32, 3, strides=2, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(32, 3, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.add([shortcut, x])
x = layers.ReLU()(x)

for _ in range(2):
    shortcut = x
    y = layers.Conv2D(32, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(32, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

shortcut = layers.Conv2D(64, 1, strides=2, use_bias=False)(x)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.Conv2D(64, 3, strides=2, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(64, 3, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.add([shortcut, x])
x = layers.ReLU()(x)

for _ in range(2):
    shortcut = x
    y = layers.Conv2D(64, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(64, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(10, activation="softmax")(x)
model = models.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9, weight_decay=5e-4),
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
    metrics=["accuracy"]
)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "best_model.h5", monitor="val_accuracy", save_best_only=True, mode="max", verbose=1
)

lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 0.1 if epoch < 82 else (0.01 if epoch < 123 else 0.001)
)

model.fit(
    data_generator(x_train, y_train, 128),
    steps_per_epoch=len(x_train) // 128,
    epochs=164,
    validation_data=(x_val, y_val),
    callbacks=[checkpoint, lr_schedule],
    verbose=2
)

model.load_weights("best_model.h5")
test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=100, verbose=2)
print(f"\n✅ Final test accuracy: {test_acc:.4f}")

# ✅ Experiment 3 — Advanced Augmentations

#     Augmentation: CutMix + Color Jitter + Label Smoothing

#     Loss: CategoricalCrossentropy with label_smoothing=0.1

#     Optimizer: SGD + momentum, weight_decay=5e-4

#     Batching: Custom generator

#     Accuracy: 91.73% (final test accuracy)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

mean = [0.4914, 0.4822, 0.4465]
std = [0.2023, 0.1994, 0.2010]
x_train = (x_train - mean) / std
x_test = (x_test - mean) / std

x_val = x_train[-5000:]
y_val = y_train[-5000:]
x_train = x_train[:-5000]
y_train = y_train[:-5000]

datagen = ImageDataGenerator(
    width_shift_range=4/32,
    height_shift_range=4/32,
    horizontal_flip=True,
    fill_mode="reflect"
)
datagen.fit(x_train)

inputs = layers.Input(shape=(32, 32, 3))
x = layers.Conv2D(16, 3, padding="same", use_bias=False)(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)

for _ in range(3):
    shortcut = x
    y = layers.Conv2D(16, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(16, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

shortcut = layers.Conv2D(32, 1, strides=2, use_bias=False)(x)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.Conv2D(32, 3, strides=2, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(32, 3, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.add([shortcut, x])
x = layers.ReLU()(x)

for _ in range(2):
    shortcut = x
    y = layers.Conv2D(32, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(32, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

shortcut = layers.Conv2D(64, 1, strides=2, use_bias=False)(x)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.Conv2D(64, 3, strides=2, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(64, 3, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.add([shortcut, x])
x = layers.ReLU()(x)

for _ in range(2):
    shortcut = x
    y = layers.Conv2D(64, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(64, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(10, activation="softmax")(x)

model = models.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9, weight_decay=5e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "best_model.h5", monitor="val_accuracy", save_best_only=True, mode="max", verbose=1
)

lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 0.1 if epoch < 82 else (0.01 if epoch < 123 else 0.001)
)

model.fit(
    datagen.flow(x_train, y_train, batch_size=128),
    epochs=164,
    validation_data=(x_val, y_val),
    callbacks=[checkpoint, lr_schedule],
    verbose=2
)

model.load_weights("best_model.h5")

test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=100, verbose=2)
print(f"\n✅ Final test accuracy: {test_acc:.4f}")

#  Experiment 2 — Tuned Regularization

#     Augmentation: Standard flip + shift (ImageDataGenerator)

#     Loss: SparseCategoricalCrossentropy

#     Optimizer: SGD + momentum, weight_decay=5e-4

#     Checkpointing: Enabled

#     Accuracy: 90.90%

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

mean = [0.4914, 0.4822, 0.4465]
std = [0.2023, 0.1994, 0.2010]
x_train = (x_train - mean) / std
x_test = (x_test - mean) / std

# Split 5,000 samples from training set for validation
x_val = x_train[-5000:]
y_val = y_train[-5000:]
x_train = x_train[:-5000]
y_train = y_train[:-5000]

datagen = ImageDataGenerator(
    width_shift_range=4/32,
    height_shift_range=4/32,
    horizontal_flip=True,
    fill_mode="reflect"
)
datagen.fit(x_train)

inputs = layers.Input(shape=(32, 32, 3))
x = layers.Conv2D(16, 3, padding="same", use_bias=False)(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)

for _ in range(3):
    shortcut = x
    y = layers.Conv2D(16, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(16, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

shortcut = layers.Conv2D(32, 1, strides=2, use_bias=False)(x)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.Conv2D(32, 3, strides=2, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(32, 3, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.add([shortcut, x])
x = layers.ReLU()(x)

for _ in range(2):
    shortcut = x
    y = layers.Conv2D(32, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(32, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

shortcut = layers.Conv2D(64, 1, strides=2, use_bias=False)(x)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.Conv2D(64, 3, strides=2, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(64, 3, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.add([shortcut, x])
x = layers.ReLU()(x)

for _ in range(2):
    shortcut = x
    y = layers.Conv2D(64, 3, padding="same", use_bias=False)(x)
    y = layers.BatchNormalization()(y)
    y = layers.ReLU()(y)
    y = layers.Conv2D(64, 3, padding="same", use_bias=False)(y)
    y = layers.BatchNormalization()(y)
    x = layers.add([shortcut, y])
    x = layers.ReLU()(x)

x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(10, activation="softmax")(x)

model = models.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9, weight_decay=1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.fit(
    datagen.flow(x_train, y_train, batch_size=128),
    epochs=164,
    validation_data=(x_val, y_val),
    callbacks=[
        tf.keras.callbacks.LearningRateScheduler(
            lambda epoch: 0.1 if epoch < 82 else (0.01 if epoch < 123 else 0.001)
        )
    ],
    verbose=2
)

# Final test set evaluation
test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=100, verbose=2)
print(f"\n✅ Final test accuracy: {test_acc:.4f}")

#  Experiment 1 — Baseline ResNet-20

#     Augmentation: Standard flip + shift (ImageDataGenerator)

#     Loss: SparseCategoricalCrossentropy

#     Optimizer: SGD + momentum, weight_decay=1e-4

#     Accuracy: 90.39%