# Python scripts for training on the MNIST dataset using various machine learning and deep learning models.

## Prerequisites and Dataset Loading

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import warnings
warnings.filterwarnings('ignore')

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print("[INFO] Loading MNIST dataset...")
try:
    mnist = fetch_openml('mnist_784', version=1)
    X = mnist.data / 255.0
    y = mnist.target.astype('int')
    print("[INFO] MNIST dataset loaded successfully!")
except Exception as e:
    print(f"[ERROR] Failed to load MNIST: {e}")

print("[INFO] Splitting dataset...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

[INFO] Loading MNIST dataset...
[INFO] MNIST dataset loaded successfully!
[INFO] Splitting dataset...


## K-Nearest Neighbors (KNN)

In [2]:
from sklearn.neighbors import KNeighborsClassifier

print("\n[MODEL] K-Nearest Neighbors Training...")
try:
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)
    print("[INFO] KNN training complete.")
    y_pred_knn = knn.predict(X_test)
    print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
    print(classification_report(y_test, y_pred_knn))
except Exception as e:
    print(f"[ERROR] KNN failed: {e}")
finally:
    del knn
    gc.collect()


[MODEL] K-Nearest Neighbors Training...
[INFO] KNN training complete.
KNN Accuracy: 0.9712857142857143
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1343
           1       0.96      0.99      0.98      1600
           2       0.97      0.97      0.97      1380
           3       0.97      0.96      0.97      1433
           4       0.97      0.96      0.97      1295
           5       0.98      0.97      0.97      1273
           6       0.98      0.99      0.99      1396
           7       0.97      0.98      0.97      1503
           8       0.99      0.94      0.96      1357
           9       0.96      0.95      0.96      1420

    accuracy                           0.97     14000
   macro avg       0.97      0.97      0.97     14000
weighted avg       0.97      0.97      0.97     14000



## Support Vector Machine (SVM)

In [3]:
from sklearn.svm import SVC

print("\n[MODEL] Support Vector Machine Training...")
try:
    svm = SVC(kernel='rbf')
    svm.fit(X_train[:5000], y_train[:5000])  # reduced for speed
    print("[INFO] SVM training complete.")
    y_pred_svm = svm.predict(X_test[:1000])
    print("SVM Accuracy (on partial data):", accuracy_score(y_test[:1000], y_pred_svm))
    print(classification_report(y_test[:1000], y_pred_svm))
except Exception as e:
    print(f"[ERROR] SVM failed: {e}")
finally:
    del svm
    gc.collect()


[MODEL] Support Vector Machine Training...
[INFO] SVM training complete.
SVM Accuracy (on partial data): 0.954
              precision    recall  f1-score   support

           0       0.96      0.99      0.97        95
           1       0.96      0.99      0.98       104
           2       0.91      0.95      0.93        95
           3       0.95      0.92      0.93       119
           4       0.92      0.98      0.95        88
           5       0.94      0.94      0.94        90
           6       0.96      0.99      0.97        97
           7       0.95      0.95      0.95       103
           8       1.00      0.90      0.95       101
           9       0.98      0.94      0.96       108

    accuracy                           0.95      1000
   macro avg       0.95      0.96      0.95      1000
weighted avg       0.95      0.95      0.95      1000



  ## Logistic Regression 

In [4]:
from sklearn.linear_model import LogisticRegression

print("\n[MODEL] Logistic Regression Training...")
try:
    lr = LogisticRegression(max_iter=100)
    lr.fit(X_train, y_train)
    print("[INFO] Logistic Regression training complete.")
    y_pred_lr = lr.predict(X_test)
    print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
    print(classification_report(y_test, y_pred_lr))
except Exception as e:
    print(f"[ERROR] Logistic Regression failed: {e}")
finally:
    del lr
    gc.collect()


[MODEL] Logistic Regression Training...
[INFO] Logistic Regression training complete.
Logistic Regression Accuracy: 0.9202857142857143
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      1343
           1       0.95      0.97      0.96      1600
           2       0.91      0.89      0.90      1380
           3       0.90      0.90      0.90      1433
           4       0.92      0.93      0.92      1295
           5       0.87      0.89      0.88      1273
           6       0.95      0.95      0.95      1396
           7       0.93      0.94      0.94      1503
           8       0.90      0.87      0.88      1357
           9       0.90      0.89      0.90      1420

    accuracy                           0.92     14000
   macro avg       0.92      0.92      0.92     14000
weighted avg       0.92      0.92      0.92     14000



## Decision Tree

In [5]:
from sklearn.tree import DecisionTreeClassifier

print("\n[MODEL] Decision Tree Training...")
try:
    dt = DecisionTreeClassifier()
    dt.fit(X_train, y_train)
    print("[INFO] Decision Tree training complete.")
    y_pred_dt = dt.predict(X_test)
    print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
    print(classification_report(y_test, y_pred_dt))
except Exception as e:
    print(f"[ERROR] Decision Tree failed: {e}")
finally:
    del dt
    gc.collect()


[MODEL] Decision Tree Training...
[INFO] Decision Tree training complete.
Decision Tree Accuracy: 0.8715
              precision    recall  f1-score   support

           0       0.92      0.92      0.92      1343
           1       0.94      0.95      0.95      1600
           2       0.86      0.83      0.85      1380
           3       0.83      0.84      0.83      1433
           4       0.85      0.87      0.86      1295
           5       0.84      0.82      0.83      1273
           6       0.90      0.90      0.90      1396
           7       0.92      0.90      0.91      1503
           8       0.81      0.81      0.81      1357
           9       0.84      0.85      0.84      1420

    accuracy                           0.87     14000
   macro avg       0.87      0.87      0.87     14000
weighted avg       0.87      0.87      0.87     14000



## Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier

print("\n[MODEL] Random Forest Training...")
try:
    rf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
    rf.fit(X_train, y_train)
    print("[INFO] Random Forest training complete.")
    y_pred_rf = rf.predict(X_test)
    print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
    print(classification_report(y_test, y_pred_rf))
except Exception as e:
    print(f"[ERROR] Random Forest failed: {e}")
finally:
    del rf
    gc.collect()


[MODEL] Random Forest Training...
[INFO] Random Forest training complete.
Random Forest Accuracy: 0.9673571428571428
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1343
           1       0.98      0.98      0.98      1600
           2       0.95      0.97      0.96      1380
           3       0.96      0.95      0.96      1433
           4       0.96      0.97      0.97      1295
           5       0.97      0.96      0.97      1273
           6       0.98      0.98      0.98      1396
           7       0.97      0.97      0.97      1503
           8       0.96      0.95      0.96      1357
           9       0.95      0.95      0.95      1420

    accuracy                           0.97     14000
   macro avg       0.97      0.97      0.97     14000
weighted avg       0.97      0.97      0.97     14000



## Multi-Layer Perceptron (MLP)

In [7]:
from sklearn.neural_network import MLPClassifier

print("\n[MODEL] Multi-Layer Perceptron Training...")
try:
    mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=20)
    mlp.fit(X_train, y_train)
    print("[INFO] MLP training complete.")
    y_pred_mlp = mlp.predict(X_test)
    print("MLP Accuracy:", accuracy_score(y_test, y_pred_mlp))
    print(classification_report(y_test, y_pred_mlp))
except Exception as e:
    print(f"[ERROR] MLP failed: {e}")
finally:
    del mlp
    gc.collect()


[MODEL] Multi-Layer Perceptron Training...
[INFO] MLP training complete.
MLP Accuracy: 0.9734285714285714
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1343
           1       0.99      0.98      0.99      1600
           2       0.95      0.98      0.97      1380
           3       0.96      0.97      0.97      1433
           4       0.98      0.96      0.97      1295
           5       0.98      0.97      0.98      1273
           6       0.98      0.99      0.98      1396
           7       0.98      0.97      0.97      1503
           8       0.97      0.95      0.96      1357
           9       0.96      0.97      0.96      1420

    accuracy                           0.97     14000
   macro avg       0.97      0.97      0.97     14000
weighted avg       0.97      0.97      0.97     14000



## Artificial Neural Network (ANN) with Keras

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

print("\n[MODEL] Artificial Neural Network Training...")
try:
    y_train_cat = to_categorical(y_train, 10)
    y_test_cat = to_categorical(y_test, 10)

    ann = Sequential([
        Dense(128, activation='relu', input_shape=(784,)),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    ann.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    ann.fit(X_train, y_train_cat, epochs=5, batch_size=128, verbose=1)
    loss, acc = ann.evaluate(X_test, y_test_cat)
    print("ANN Accuracy:", acc)
except Exception as e:
    print(f"[ERROR] ANN failed: {e}")
finally:
    del ann
    gc.collect()


[MODEL] Artificial Neural Network Training...
Epoch 1/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8285 - loss: 0.6132
Epoch 2/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9573 - loss: 0.1474
Epoch 3/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9712 - loss: 0.0951
Epoch 4/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9778 - loss: 0.0743
Epoch 5/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9825 - loss: 0.0561
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9728 - loss: 0.0915
ANN Accuracy: 0.9717857241630554


## Convolutional Neural Network (CNN)

In [9]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Reshape
from tensorflow.keras.models import Sequential

print("\n[MODEL] Convolutional Neural Network Training...")
try:
    X_train_cnn = X_train.values.reshape(-1, 28, 28, 1)
    X_test_cnn = X_test.values.reshape(-1, 28, 28, 1)

    cnn = Sequential([
        Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2)),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(pool_size=(2,2)),
        Flatten(),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    cnn.fit(X_train_cnn, y_train_cat, epochs=5, batch_size=128, verbose=1)
    loss, acc = cnn.evaluate(X_test_cnn, y_test_cat)
    print("CNN Accuracy:", acc)
except Exception as e:
    print(f"[ERROR] CNN failed: {e}")
finally:
    del cnn
    gc.collect()


[MODEL] Convolutional Neural Network Training...
Epoch 1/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 24ms/step - accuracy: 0.8197 - loss: 0.5893
Epoch 2/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 24ms/step - accuracy: 0.9710 - loss: 0.0974
Epoch 3/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - accuracy: 0.9793 - loss: 0.0652
Epoch 4/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - accuracy: 0.9839 - loss: 0.0511
Epoch 5/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 24ms/step - accuracy: 0.9859 - loss: 0.0438
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9878 - loss: 0.0335
CNN Accuracy: 0.9884285926818848


## Recurrent Neural Network (RNN) using LSTM

In [10]:
from tensorflow.keras.layers import LSTM

print("\n[MODEL] Recurrent Neural Network Training...")
try:
    X_train_rnn = X_train.values.reshape(-1, 28, 28)
    X_test_rnn = X_test.values.reshape(-1, 28, 28)

    rnn = Sequential([
        LSTM(128, input_shape=(28,28)),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    rnn.fit(X_train_rnn, y_train_cat, epochs=5, batch_size=128, verbose=1)
    loss, acc = rnn.evaluate(X_test_rnn, y_test_cat)
    print("RNN Accuracy:", acc)
except Exception as e:
    print(f"[ERROR] RNN failed: {e}")
finally:
    del rnn
    gc.collect()


[MODEL] Recurrent Neural Network Training...
Epoch 1/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 53ms/step - accuracy: 0.6573 - loss: 1.0215
Epoch 2/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 52ms/step - accuracy: 0.9471 - loss: 0.1721
Epoch 3/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 52ms/step - accuracy: 0.9657 - loss: 0.1129
Epoch 4/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 53ms/step - accuracy: 0.9742 - loss: 0.0840
Epoch 5/5
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 52ms/step - accuracy: 0.9808 - loss: 0.0614
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.9692 - loss: 0.0997
RNN Accuracy: 0.9698571562767029


## Voting Classifier (Ensemble)

In [11]:
from sklearn.ensemble import VotingClassifier

print("\n[MODEL] Voting Classifier Ensemble...")
try:
    # Simple fast models due to memory constraints
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.neighbors import KNeighborsClassifier

    clf1 = LogisticRegression(max_iter=100)
    clf2 = RandomForestClassifier(n_estimators=50)
    clf3 = KNeighborsClassifier(n_neighbors=3)

    ensemble = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('knn', clf3)
    ], voting='hard')

    ensemble.fit(X_train, y_train)
    y_pred_ensemble = ensemble.predict(X_test)
    print("Voting Classifier Accuracy:", accuracy_score(y_test, y_pred_ensemble))
    print(classification_report(y_test, y_pred_ensemble))
except Exception as e:
    print(f"[ERROR] Voting Classifier failed: {e}")
finally:
    del ensemble
    gc.collect()


[MODEL] Voting Classifier Ensemble...
Voting Classifier Accuracy: 0.9672857142857143
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1343
           1       0.97      0.99      0.98      1600
           2       0.95      0.97      0.96      1380
           3       0.95      0.96      0.95      1433
           4       0.97      0.97      0.97      1295
           5       0.97      0.96      0.96      1273
           6       0.98      0.98      0.98      1396
           7       0.97      0.97      0.97      1503
           8       0.97      0.94      0.95      1357
           9       0.97      0.95      0.96      1420

    accuracy                           0.97     14000
   macro avg       0.97      0.97      0.97     14000
weighted avg       0.97      0.97      0.97     14000



This implemention and comparion a wide range of machine learning and deep learning models for the MNIST digit classification task tells us about the performance of different ML models in image based learning. Classical models like KNN and Random Forest showed impressive performance, with KNN achieving 97.13% accuracy and Random Forest closely following with 96.40%. Among deep learning models, the CNN clearly stood out, reaching the highest accuracy of 98.64%, which aligns with the known strength of convolutional architectures in image-related tasks.

We also implemented a Voting Classifier to combine the strengths of multiple traditional models, achieving a robust 96.03% accuracy. Although not surpassing CNN, this ensemble method proved effective in increasing stability and generalization.

Overall, this comprehensive approach provided insights into the strengths and trade-offs of various models. The CNN emerged as the top performer, but ensemble learning demonstrated its value, especially in scenarios where deep learning might be resource-intensive.