# Imports and Environment Setup

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# Suppress TensorFlow warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

# GPU configuration for better performance
physical_devices = tf.config.list_physical_devices("GPU")
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print("GPU is available and configured")
else:
    print("Using CPU for training")

#  Data Loading and Preprocessing

In [None]:
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
print(f"Training data shape: {x_train.shape}, Labels shape: {y_train.shape}")
print(f"Test data shape: {x_test.shape}, Labels shape: {y_test.shape}")

# Data preprocessing
# Reshape to include channel dimension and normalize
x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0

# Convert labels to categorical one-hot encoding
y_train_cat = keras.utils.to_categorical(y_train, 10)
y_test_cat = keras.utils.to_categorical(y_test, 10)

# Data visualization

In [None]:
# Visualize some training examples
plt.figure(figsize=(10, 5))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(x_train[i].reshape(28, 28), cmap='gray')
    plt.title(f"Label: {y_train[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()


#  Data Augmentation Setup

In [None]:
# Data augmentation for training robustness
data_augmentation = keras.Sequential([
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(0.1, 0.1)
])


# Model Definition

In [None]:
# Define a CNN model with increasing complexity
def create_cnn_model():
    model = keras.Sequential([
        # Input layer
        keras.Input(shape=(28, 28, 1)),
        
        # Optional data augmentation (only applied during training)
        data_augmentation,
        
        # First convolutional block
        layers.Conv2D(32, kernel_size=3, padding="same", activation="relu"),
        layers.BatchNormalization(),
        layers.Conv2D(32, kernel_size=3, padding="same", activation="relu"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=2),
        layers.Dropout(0.25),
        
        # Second convolutional block
        layers.Conv2D(64, kernel_size=3, padding="same", activation="relu"),
        layers.BatchNormalization(),
        layers.Conv2D(64, kernel_size=3, padding="same", activation="relu"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=2),
        layers.Dropout(0.25),
        
        # Third convolutional block
        layers.Conv2D(128, kernel_size=3, padding="same", activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.25),
        
        # Flatten and dense layers
        layers.Flatten(),
        layers.Dense(256, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(10, activation="softmax")
    ])
    
    return model


#  Model Compilation

In [None]:
# Create and compile the model
model = create_cnn_model()
model.compile(
    loss="categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"]
)

# Model summary
model.summary()

# Callbacks for Training Optimization

In [None]:
# Learning rate scheduler for adaptive learning
def lr_scheduler(epoch, lr):
    if epoch % 5 == 0 and epoch > 0:
        return lr * 0.9
    return lr

lr_callback = keras.callbacks.LearningRateScheduler(lr_scheduler)
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True
)

# Model Training

In [None]:
# Train the model
history = model.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=20,
    validation_split=0.1,
    callbacks=[lr_callback, early_stopping],
    verbose=1
)

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test_cat, verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

# Training Visualization

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()

# Model Evaluation with Confusion Matrix

In [None]:
# Generate predictions
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
            xticklabels=range(10), yticklabels=range(10))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes))


# Prediction Visualization

In [None]:
# Visualize some predictions
def plot_predictions(x, y_true, y_pred, n=10):
    plt.figure(figsize=(15, 4))
    for i in range(n):
        plt.subplot(1, n, i+1)
        plt.imshow(x[i].reshape(28, 28), cmap='gray')
        predicted = np.argmax(y_pred[i])
        color = 'green' if predicted == y_true[i] else 'red'
        plt.title(f"True: {y_true[i]}\nPred: {predicted}", color=color)
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Show some correct and incorrect predictions
plot_predictions(x_test[:10], y_test[:10], y_pred[:10])


# Error Analysis

In [None]:
# Find and display some misclassified examples
misclassified_indices = np.where(y_pred_classes != y_test)[0]
print(f"Number of misclassified examples: {len(misclassified_indices)}")

if len(misclassified_indices) > 0:
    # Display some misclassified examples
    n_display = min(10, len(misclassified_indices))
    selected_indices = misclassified_indices[:n_display]
    plot_predictions(
        x_test[selected_indices], 
        y_test[selected_indices], 
        y_pred[selected_indices],
        n=n_display
    )
