In [None]:
# Importing necessary libraries
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from scipy.io import loadmat

# Load the data
data = loadmat('mnist_all.mat')

# Preprocess the data as done earlier
train_images = np.vstack([data[f'train{i}'] for i in range(10)]).reshape(-1, 28, 28, 1).astype('float32') / 255.0
train_labels = np.hstack([[i] * len(data[f'train{i}']) for i in range(10)])

test_images = np.vstack([data[f'test{i}'] for i in range(10)]).reshape(-1, 28, 28, 1).astype('float32') / 255.0
test_labels = np.hstack([[i] * len(data[f'test{i}']) for i in range(10)])

# Split the data for validation
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.1, random_state=42)

# Model building
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),  # More filters to capture complex features
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    
    Conv2D(64, (3, 3), activation='relu'),  # Increased filters
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    
    Conv2D(128, (3, 3), activation='relu'),  # Additional convolution layer with more filters
    MaxPooling2D((2, 2)),
    BatchNormalization(),

    Flatten(),
    Dense(256, activation='relu'),  # Increased the number of units in the dense layer
    Dropout(0.5),  # Increased Dropout rate for regularization
    Dense(10, activation='softmax')
])

# Compile the model with a reduced learning rate
model.compile(optimizer=Adam(learning_rate=0.0005), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Dynamic learning rate reduction
lr_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=3, factor=0.5, min_lr=1e-6)

# Train the model with 50 epochs (experiment with fewer epochs to prevent overfitting)
history = model.fit(train_images, train_labels, epochs=100, batch_size=64,  # Increased batch size for faster convergence
                    validation_data=(val_images, val_labels), callbacks=[lr_reduction])

# Evaluate on test data
test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Make predictions on the test set
predictions = model.predict(test_images)
cls_pred = np.argmax(predictions, axis=1)
cls_true = test_labels

# Compute the confusion matrix
conf_matrix = confusion_matrix(cls_true, cls_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Plotting functions
def plot_images(images, cls_true, cls_pred=None):
    fig, axes = plt.subplots(3, 3, figsize=(9, 9))
    fig.subplots_adjust(hspace=0.3, wspace=0.3)
    for i, ax in enumerate(axes.flat):
        ax.imshow(images[i].reshape(28, 28), cmap='binary')
        if cls_pred is None:
            xlabel = f"True: {cls_true[i]}"
        else:
            xlabel = f"True: {cls_true[i]}, Pred: {cls_pred[i]}"
        ax.set_xlabel(xlabel)
        ax.set_xticks([])
        ax.set_yticks([])

    plt.show()

# Example errors
incorrect = (cls_pred != cls_true)
incorrect_images = test_images[incorrect][:9]
incorrect_cls_true = cls_true[incorrect][:9]
incorrect_cls_pred = cls_pred[incorrect][:9]

print("Example errors:")
plot_images(images=incorrect_images, cls_true=incorrect_cls_true, cls_pred=incorrect_cls_pred)

# Plot training and validation accuracy over epochs
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m470/844[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m2s[0m 7ms/step - accuracy: 0.8058 - loss: 0.6274

In [2]:
pip install --upgrade tensorflow

Collecting keras>=3.5.0 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached keras-3.6.0-py3-none-any.whl.metadata (5.8 kB)
Using cached keras-3.6.0-py3-none-any.whl (1.2 MB)
Installing collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 2.12.0
    Uninstalling keras-2.12.0:
      Successfully uninstalled keras-2.12.0
Successfully installed keras-3.6.0
Note: you may need to restart the kernel to use updated packages.
