In [4]:
"""
# Fashion-MNIST vs. MNIST Performance Comparison

Objective:
Compare the performance of a CNN model on the Fashion-MNIST dataset versus the MNIST digits dataset.
"""


import tensorflow as tf
from tensorflow.keras.datasets import mnist, fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
import time

# Function to load, preprocess, and train the model
def train_and_evaluate(dataset, dataset_name):
    print(f"Training on {dataset_name} dataset...")

    # Load Dataset
    (X_train, y_train), (X_test, y_test) = dataset.load_data()

    # Normalize Images (Scale to 0-1 range)
    X_train, X_test = X_train / 255.0, X_test / 255.0

    # Reshape Data for CNN Input (Adding channel dimension)
    X_train = X_train.reshape(-1, 28, 28, 1)
    X_test = X_test.reshape(-1, 28, 28, 1)

    # Convert Labels to One-Hot Encoding
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)

    # Define CNN Model
    cnn = Sequential([
        Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2)),
        Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
        MaxPooling2D(pool_size=(2,2)),
        Flatten(),
        Dense(units=128, activation='relu'),
        Dense(units=10, activation='softmax')
    ])

    # Compile Model
    cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train Model and Measure Time
    start_time = time.time()
    cnn.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=64, verbose=1)
    end_time = time.time()

    # Evaluate Model
    test_loss, test_acc = cnn.evaluate(X_test, y_test, verbose=0)

    # Print Results
    print(f"Test Accuracy on {dataset_name}: {test_acc:.4f}")
    print(f"Training Time on {dataset_name}: {end_time - start_time:.2f} seconds\n")

    return test_acc, end_time - start_time

# Train and Evaluate on Fashion-MNIST and MNIST
test_acc_fashion, train_time_fashion = train_and_evaluate(fashion_mnist, "Fashion-MNIST")
test_acc_mnist, train_time_mnist = train_and_evaluate(mnist, "MNIST")

# Step 11: Compare Results
print("\nComparison of CNN Performance:")
print(f"{'Dataset':<15}{'Test Accuracy':<15}{'Training Time (s)':<15}")
print(f"{'-'*45}")
print(f"{'Fashion-MNIST':<15}{test_acc_fashion:<15.4f}{train_time_fashion:<15.2f}")
print(f"{'MNIST':<15}{test_acc_mnist:<15.4f}{train_time_mnist:<15.2f}")


Training on Fashion-MNIST dataset...
Epoch 1/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 48ms/step - accuracy: 0.7527 - loss: 0.6869 - val_accuracy: 0.8700 - val_loss: 0.3632
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 48ms/step - accuracy: 0.8790 - loss: 0.3349 - val_accuracy: 0.8880 - val_loss: 0.3106
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 50ms/step - accuracy: 0.8950 - loss: 0.2868 - val_accuracy: 0.8989 - val_loss: 0.2822
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 47ms/step - accuracy: 0.9104 - loss: 0.2422 - val_accuracy: 0.9010 - val_loss: 0.2707
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 49ms/step - accuracy: 0.9183 - loss: 0.2222 - val_accuracy: 0.9044 - val_loss: 0.2619
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 47ms/step - accuracy: 0.9287 - loss: 0.1964 - val_accuracy: 0.909

In [6]:
## Exercise 16.4: Modifying ConvNet Layers

import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical

# Load the Fashion-MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Normalize pixel values (scale between 0 and 1)
X_train, X_test = X_train / 255.0, X_test / 255.0

# Reshape data for CNN input (28x28 images with 1 channel)
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Define CNN model WITHOUT the first Dense layer
cnn_no_dense = Sequential([
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Flatten(),
    Dense(units=10, activation='softmax')  # Directly to output layer
])

# Compile and Train
cnn_no_dense.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
start_time_no_dense = time.time()
cnn_no_dense.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=64)
end_time_no_dense = time.time()

test_loss, test_acc_no_dense = cnn_no_dense.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy without First Dense Layer: {test_acc_no_dense:.4f}")
print(f"Training Time without First Dense Layer: {end_time_no_dense - start_time_no_dense:.2f} seconds")

# Define CNN model WITH additional Dense(4096) layer
cnn_4096 = Sequential([
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Flatten(),
    Dense(units=4096, activation='relu'),  # Added large Dense layer
    Dense(units=128, activation='relu'),
    Dense(units=10, activation='softmax')
])

# Compile and Train
cnn_4096.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
start_time_4096 = time.time()
cnn_4096.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=64)
end_time_4096 = time.time()

test_loss, test_acc_4096 = cnn_4096.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy with Dense(4096): {test_acc_4096:.4f}")
print(f"Training Time with Dense(4096): {end_time_4096 - start_time_4096:.2f} seconds")

# Summary of CNN Model Modifications
print("\nSummary of CNN Model Modifications:")
print(f"{'Model':<30}{'Test Accuracy':<15}{'Training Time (s)':<15}")
print("-" * 60)
print(f"{'Original CNN (Fashion-MNIST)':<30}{'91.39%':<15}{'787.45':<15}")
print(f"{'CNN Without First Dense':<30}{test_acc_no_dense:<15.4f}{end_time_no_dense - start_time_no_dense:.2f}")
print(f"{'CNN with Dense(4096)':<30}{test_acc_4096:<15.4f}{end_time_4096 - start_time_4096:.2f}")

# Observations:
print("\n Observations:")
print("- Taking out the first Dense layer makes training faster but lowers accuracy.")
print("- Adding a Dense(4096) layer improves accuracy a little but makes training slower.")
print("- If your computer is slow, using a smaller Dense layer (512 or 1024 neurons) is a better choice.")


Epoch 1/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 47ms/step - accuracy: 0.7370 - loss: 0.7559 - val_accuracy: 0.8418 - val_loss: 0.4322
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 45ms/step - accuracy: 0.8681 - loss: 0.3717 - val_accuracy: 0.8688 - val_loss: 0.3583
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 46ms/step - accuracy: 0.8851 - loss: 0.3211 - val_accuracy: 0.8839 - val_loss: 0.3247
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 45ms/step - accuracy: 0.8934 - loss: 0.2961 - val_accuracy: 0.8927 - val_loss: 0.3013
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 46ms/step - accuracy: 0.9030 - loss: 0.2647 - val_accuracy: 0.8891 - val_loss: 0.3047
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 46ms/step - accuracy: 0.9085 - loss: 0.2510 - val_accuracy: 0.8979 - val_loss: 0.2830
Epoch 7/10
[1m9