## (b) CNN

Here we make a model 

In [23]:
# Check number of available GPUs
import tensorflow as tf
from tensorflow import keras
import os
from sklearn.preprocessing import StandardScaler
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
seed = 900

Num GPUs Available:  1


In [43]:
def create_model(num_classes=10, optimizer=keras.optimizers.Adam(learning_rate=0.0001), shape = (pixel_size, pixel_size, 1)):
    lr_scheduler = keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.1,          # halve the learning rate if there is no improvement
        patience=5,          # Wait 5 epochs with no improvement before reducing
        min_lr=1e-6          # Set a minimum learning rate at 1e-6
    )
    early_stopper = keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=7,          # Wait 7 epochs for improvement before stopping
        restore_best_weights=True  # Automatically restore the weights from the best epoch
    )
    csv_logger = keras.callbacks.CSVLogger(
    filename=f"classification_log{num_classes}_classes.csv",
    separator=",",
    append=True)  #makes sure results are appended to same file if training stops and is resumed
    initializer = "glorot_uniform"# keras.initializers.Orthogonal(gain = 1.0, seed = seed)
    model= keras.models.Sequential([
        keras.Input(shape=shape),
        # Block 1
        keras.layers.Conv2D(32, (3,3), activation="relu", padding="same", kernel_initializer=initializer),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(32, (3,3), activation="relu", padding="same", kernel_initializer=initializer),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.1),
        # Block 2
        keras.layers.Conv2D(64, (3,3), activation="relu", padding="same", kernel_initializer=initializer),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(64, (3,3), activation="relu", padding="same", kernel_initializer=initializer),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D(2),
        keras.layers.Dropout(0.1),
        # Block 3
        keras.layers.Conv2D(128, (3,3), activation="relu", padding="same", kernel_initializer=initializer),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(128, (3,3), activation="relu", padding="same", kernel_initializer=initializer),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D(2),
        keras.layers.Dropout(0.1),
        # Block 4
        keras.layers.Conv2D(256, (3,3), activation="relu", padding="same", kernel_initializer=initializer),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(256, (3,3), activation="relu", padding="same", kernel_initializer=initializer),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D(2),
        keras.layers.Dropout(0.1),


        keras.layers.Flatten(),
        keras.layers.Dense(128, activation="leaky_relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(64, activation="leaky_relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(int(num_classes), activation="softmax")
    ])
    model.compile(loss='sparse_categorical_crossentropy',
    optimizer=optimizer,
    metrics=["Accuracy"
            ],
    )
    return model, lr_scheduler, early_stopper, csv_logger


## Model 1

In [None]:
# Import FashionMNIST data
fashion_mnist= keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
print(X_train_full.shape, y_train_full.shape)

# Preprocess data
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

X_test = X_test / 255.0
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

pixel_size = 28
model1, lr_scheduler, early_stopper, csv_logger  = create_model(optimizer=keras.optimizers.Adam(learning_rate=0.0001))

(60000, 28, 28) (60000,)
(55000, 28, 28) (55000,)
(5000, 28, 28) (5000,)


In [38]:
model1.summary()
model1.fit(
    X_train, y_train,
    epochs=30,
    validation_data=(X_valid, y_valid),
    callbacks=[lr_scheduler, early_stopper, csv_logger]
)

Epoch 1/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - Accuracy: 0.7429 - loss: 0.7413 - val_Accuracy: 0.8680 - val_loss: 0.3544 - learning_rate: 1.0000e-04
Epoch 2/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - Accuracy: 0.8489 - loss: 0.4286 - val_Accuracy: 0.8954 - val_loss: 0.2887 - learning_rate: 1.0000e-04
Epoch 3/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - Accuracy: 0.8785 - loss: 0.3453 - val_Accuracy: 0.8944 - val_loss: 0.2818 - learning_rate: 1.0000e-04
Epoch 4/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - Accuracy: 0.8947 - loss: 0.2974 - val_Accuracy: 0.9160 - val_loss: 0.2284 - learning_rate: 1.0000e-04
Epoch 5/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - Accuracy: 0.9056 - loss: 0.2642 - val_Accuracy: 0.9164 - val_loss: 0.2311 - learning_rate: 1.0000e-04
Epoch 6/30
[1m1719/1719[0m [32m

<keras.src.callbacks.history.History at 0x75d9507bde20>

In [39]:
# Evaluate the model on the test set
test_loss, test_acc = model1.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - Accuracy: 0.9235 - loss: 0.2242
Test accuracy: 0.9235000014305115
Test loss: 0.22418083250522614


### Application to CIFAR10

In [46]:
# Import CIFAR10 dataset
CIFAR10 = tf.keras.datasets.cifar10


# Preprocess data
(X_train_full, y_train_full), (X_test, y_test) = CIFAR10.load_data()
print(X_train_full.shape, y_train_full.shape)

X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

X_test = X_test / 255.0
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

(50000, 32, 32, 3) (50000, 1)
(45000, 32, 32, 3) (45000, 1)
(5000, 32, 32, 3) (5000, 1)


In [47]:
pixel_size = 32
modelA, lr_scheduler, early_stopper, csv_logger  = create_model(optimizer=keras.optimizers.Adam(learning_rate=0.0001),shape=(pixel_size, pixel_size, 3))

modelA.summary()
modelA.fit(
    X_train, y_train,
    epochs=30,
    validation_data=(X_valid, y_valid),
    callbacks=[lr_scheduler, early_stopper, csv_logger]
)

# Evaluate the model on the test set
test_loss, test_acc = modelA.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

Epoch 1/30


2025-11-08 14:12:27.873919: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.



[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 15ms/step - Accuracy: 0.3403 - loss: 1.8563 - val_Accuracy: 0.5112 - val_loss: 1.3628 - learning_rate: 1.0000e-04
Epoch 2/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.4856 - loss: 1.4439 - val_Accuracy: 0.5898 - val_loss: 1.1469 - learning_rate: 1.0000e-04
Epoch 3/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.5593 - loss: 1.2463 - val_Accuracy: 0.6384 - val_loss: 1.0286 - learning_rate: 1.0000e-04
Epoch 4/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - Accuracy: 0.6177 - loss: 1.0947 - val_Accuracy: 0.6714 - val_loss: 0.9373 - learning_rate: 1.0000e-04
Epoch 5/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.6632 - loss: 0.9697 - val_Accuracy: 0.6962 - val_loss: 0.8923 - learning_rate: 1.0000e-04
Epoch 6/30
[1m1407/1407[0m [32m━━━━━━━━━━━

## Model 2

In [50]:
# Import FashionMNIST data
fashion_mnist= keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
print(X_train_full.shape, y_train_full.shape)

# Preprocess data
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

X_test = X_test / 255.0
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

pixel_size = 28
model2, lr_scheduler, early_stopper, csv_logger  = create_model(optimizer=keras.optimizers.AdamW(learning_rate=0.0001))

(60000, 28, 28) (60000,)
(55000, 28, 28) (55000,)
(5000, 28, 28) (5000,)


In [51]:
model2.summary()
model2.fit(
    X_train, y_train,
    epochs=30,
    validation_data=(X_valid, y_valid),
    callbacks=[lr_scheduler, early_stopper, csv_logger]
)

Epoch 1/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 12ms/step - Accuracy: 0.7380 - loss: 0.7634 - val_Accuracy: 0.8702 - val_loss: 0.3589 - learning_rate: 1.0000e-04
Epoch 2/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - Accuracy: 0.8511 - loss: 0.4252 - val_Accuracy: 0.8932 - val_loss: 0.3069 - learning_rate: 1.0000e-04
Epoch 3/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - Accuracy: 0.8789 - loss: 0.3496 - val_Accuracy: 0.9070 - val_loss: 0.2700 - learning_rate: 1.0000e-04
Epoch 4/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - Accuracy: 0.8942 - loss: 0.3018 - val_Accuracy: 0.9178 - val_loss: 0.2471 - learning_rate: 1.0000e-04
Epoch 5/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - Accuracy: 0.9060 - loss: 0.2694 - val_Accuracy: 0.9138 - val_loss: 0.2461 - learning_rate: 1.0000e-04
Epoch 6/30
[1m1719/1719[0m [32m

<keras.src.callbacks.history.History at 0x75d7c8473080>

In [52]:
# Evaluate the model on the test set
test_loss, test_acc = model2.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - Accuracy: 0.9270 - loss: 0.2212
Test accuracy: 0.9269999861717224
Test loss: 0.2212357372045517


### Application to CIFAR10

In [53]:
# Import CIFAR10 dataset
CIFAR10 = tf.keras.datasets.cifar10


# Preprocess data
(X_train_full, y_train_full), (X_test, y_test) = CIFAR10.load_data()
print(X_train_full.shape, y_train_full.shape)

X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

X_test = X_test / 255.0
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

(50000, 32, 32, 3) (50000, 1)
(45000, 32, 32, 3) (45000, 1)
(5000, 32, 32, 3) (5000, 1)


In [None]:
pixel_size = 32
modelB, lr_scheduler, early_stopper, csv_logger  = create_model(optimizer=keras.optimizers.AdamW(learning_rate=0.0001),shape=(pixel_size, pixel_size, 3))

modelB.summary()
modelB.fit(
    X_train, y_train,
    epochs=30,
    validation_data=(X_valid, y_valid),
    callbacks=[lr_scheduler, early_stopper, csv_logger]
)

# Evaluate the model on the test set
test_loss, test_acc = modelB.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

Epoch 1/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 15ms/step - Accuracy: 0.3478 - loss: 1.8369 - val_Accuracy: 0.4796 - val_loss: 1.4420 - learning_rate: 1.0000e-04
Epoch 2/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.4907 - loss: 1.4267 - val_Accuracy: 0.5634 - val_loss: 1.2044 - learning_rate: 1.0000e-04
Epoch 3/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.5692 - loss: 1.2227 - val_Accuracy: 0.6196 - val_loss: 1.0700 - learning_rate: 1.0000e-04
Epoch 4/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.6216 - loss: 1.0768 - val_Accuracy: 0.6704 - val_loss: 0.9304 - learning_rate: 1.0000e-04
Epoch 5/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.6668 - loss: 0.9519 - val_Accuracy: 0.6966 - val_loss: 0.8544 - learning_rate: 1.0000e-04
Epoch 6/30
[1m1407/1407[0m [32m

## Model 3

In [55]:
# Import FashionMNIST data
fashion_mnist= keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
print(X_train_full.shape, y_train_full.shape)

# Preprocess data
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

X_test = X_test / 255.0
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

pixel_size = 28
model3, lr_scheduler, early_stopper, csv_logger  = create_model(optimizer=keras.optimizers.Nadam(learning_rate=0.0001))

(60000, 28, 28) (60000,)
(55000, 28, 28) (55000,)
(5000, 28, 28) (5000,)


In [56]:
model3.summary()
model3.fit(
    X_train, y_train,
    epochs=30,
    validation_data=(X_valid, y_valid),
    callbacks=[lr_scheduler, early_stopper, csv_logger]
)

Epoch 1/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 14ms/step - Accuracy: 0.7425 - loss: 0.7494 - val_Accuracy: 0.8714 - val_loss: 0.3514 - learning_rate: 1.0000e-04
Epoch 2/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - Accuracy: 0.8518 - loss: 0.4275 - val_Accuracy: 0.8890 - val_loss: 0.3105 - learning_rate: 1.0000e-04
Epoch 3/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - Accuracy: 0.8796 - loss: 0.3480 - val_Accuracy: 0.8928 - val_loss: 0.2953 - learning_rate: 1.0000e-04
Epoch 4/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - Accuracy: 0.8951 - loss: 0.3009 - val_Accuracy: 0.9158 - val_loss: 0.2398 - learning_rate: 1.0000e-04
Epoch 5/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - Accuracy: 0.9044 - loss: 0.2671 - val_Accuracy: 0.9128 - val_loss: 0.2376 - learning_rate: 1.0000e-04
Epoch 6/30
[1m1719/1719[0m [32m

<keras.src.callbacks.history.History at 0x75da1d1cfd40>

In [58]:
# Evaluate the model on the test set
test_loss, test_acc = model3.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - Accuracy: 0.9274 - loss: 0.2251
Test accuracy: 0.9273999929428101
Test loss: 0.2251185029745102


#### Application to CIFAR10

In [59]:
# Import CIFAR10 dataset
CIFAR10 = tf.keras.datasets.cifar10


# Preprocess data
(X_train_full, y_train_full), (X_test, y_test) = CIFAR10.load_data()
print(X_train_full.shape, y_train_full.shape)

X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

X_test = X_test / 255.0
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

(50000, 32, 32, 3) (50000, 1)
(45000, 32, 32, 3) (45000, 1)
(5000, 32, 32, 3) (5000, 1)


In [60]:
pixel_size = 32
modelC, lr_scheduler, early_stopper, csv_logger  = create_model(optimizer=keras.optimizers.AdamW(learning_rate=0.0001),shape=(pixel_size, pixel_size, 3))

modelC.summary()
modelC.fit(
    X_train, y_train,
    epochs=30,
    validation_data=(X_valid, y_valid),
    callbacks=[lr_scheduler, early_stopper, csv_logger]
)

# Evaluate the model on the test set
test_loss, test_acc = modelC.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

Epoch 1/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 13ms/step - Accuracy: 0.3510 - loss: 1.8362 - val_Accuracy: 0.5086 - val_loss: 1.3552 - learning_rate: 1.0000e-04
Epoch 2/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.4949 - loss: 1.4142 - val_Accuracy: 0.6094 - val_loss: 1.1364 - learning_rate: 1.0000e-04
Epoch 3/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - Accuracy: 0.5723 - loss: 1.2164 - val_Accuracy: 0.6406 - val_loss: 1.0368 - learning_rate: 1.0000e-04
Epoch 4/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - Accuracy: 0.6300 - loss: 1.0606 - val_Accuracy: 0.6838 - val_loss: 0.9000 - learning_rate: 1.0000e-04
Epoch 5/30
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - Accuracy: 0.6739 - loss: 0.9335 - val_Accuracy: 0.7190 - val_loss: 0.8091 - learning_rate: 1.0000e-04
Epoch 6/30
[1m1407/1407[0m [32m