In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import MinMaxScaler


In [2]:

# Load the MNIST dataset
print("Loading MNIST dataset...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target.astype(int)

# Normalize pixel values to [0, 1]
X = X / 255.0

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


Loading MNIST dataset...


In [3]:

# Naive Bayes Model
print("Training Naive Bayes model...")
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Logistic Regression Model
print("Training Logistic Regression model...")
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train, y_train)

# Evaluate Naive Bayes
nb_train_pred = nb_model.predict(X_train)
nb_test_pred = nb_model.predict(X_test)

nb_train_accuracy = accuracy_score(y_train, nb_train_pred)
nb_test_accuracy = accuracy_score(y_test, nb_test_pred)

print("\nNaive Bayes - Training Accuracy:", nb_train_accuracy)
print("Naive Bayes - Test Accuracy:", nb_test_accuracy)
print("Naive Bayes - Classification Report on Test Data:")
print(classification_report(y_test, nb_test_pred))


Training Naive Bayes model...
Training Logistic Regression model...

Naive Bayes - Training Accuracy: 0.5514897959183673
Naive Bayes - Test Accuracy: 0.550952380952381
Naive Bayes - Classification Report on Test Data:
              precision    recall  f1-score   support

           0       0.64      0.92      0.75      2058
           1       0.79      0.95      0.86      2364
           2       0.86      0.31      0.45      2133
           3       0.75      0.34      0.47      2176
           4       0.86      0.13      0.23      1936
           5       0.55      0.04      0.07      1915
           6       0.61      0.95      0.74      2088
           7       0.90      0.29      0.44      2248
           8       0.30      0.56      0.39      1992
           9       0.37      0.94      0.53      2090

    accuracy                           0.55     21000
   macro avg       0.66      0.54      0.49     21000
weighted avg       0.67      0.55      0.50     21000



In [4]:

# Evaluate Logistic Regression
lr_train_pred = lr_model.predict(X_train)
lr_test_pred = lr_model.predict(X_test)

lr_train_accuracy = accuracy_score(y_train, lr_train_pred)
lr_test_accuracy = accuracy_score(y_test, lr_test_pred)

print("\nLogistic Regression - Training Accuracy:", lr_train_accuracy)
print("Logistic Regression - Test Accuracy:", lr_test_accuracy)
print("Logistic Regression - Classification Report on Test Data:")
print(classification_report(y_test, lr_test_pred))

# Compare performance
print("\nModel Comparison:")
print("Naive Bayes - Test Accuracy:", nb_test_accuracy)
print("Logistic Regression - Test Accuracy:", lr_test_accuracy)

if nb_test_accuracy > lr_test_accuracy:
    print("Naive Bayes performs better on MNIST.")
elif lr_test_accuracy > nb_test_accuracy:
    print("Logistic Regression performs better on MNIST.")
else:
    print("Both models perform similarly on MNIST.")


Logistic Regression - Training Accuracy: 0.9403265306122449
Logistic Regression - Test Accuracy: 0.9208095238095239
Logistic Regression - Classification Report on Test Data:
              precision    recall  f1-score   support

           0       0.95      0.96      0.96      2058
           1       0.95      0.97      0.96      2364
           2       0.92      0.90      0.91      2133
           3       0.90      0.89      0.90      2176
           4       0.92      0.93      0.92      1936
           5       0.89      0.88      0.89      1915
           6       0.94      0.95      0.95      2088
           7       0.93      0.94      0.94      2248
           8       0.89      0.87      0.88      1992
           9       0.90      0.90      0.90      2090

    accuracy                           0.92     21000
   macro avg       0.92      0.92      0.92     21000
weighted avg       0.92      0.92      0.92     21000


Model Comparison:
Naive Bayes - Test Accuracy: 0.550952380952381
