In [None]:

# Task 1 - Handwritten Digit Classification using Gaussian Naive Bayes
# Using separate MNIST training and testing datasets
# Strictly analytical version (no plots of digits)

import os
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:

# Load MNIST training and test datasets with path checking

def load_csv(filename):
    if not os.path.exists(filename):
        print(f"⚠️ File '{filename}' not found in current directory:", os.getcwd())
        filepath = input(f"Enter full path for {filename}: ").strip()
        return pd.read_csv(filepath)
    else:
        return pd.read_csv(filename)

train_data = load_csv('mnist_train.csv')
test_data = load_csv('mnist_test.csv')

print("Training data shape:", train_data.shape)
print("Test data shape:", test_data.shape)

print("\nFirst few rows of training data:")
display(train_data.head())


In [None]:

# Split features and labels
X_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values

X_test = test_data.iloc[:, 1:].values
y_test = test_data.iloc[:, 0].values

print("Training set:", X_train.shape, "Testing set:", X_test.shape)


In [None]:

# Normalize pixel values (0–255 → 0–1)
X_train = X_train / 255.0
X_test = X_test / 255.0


In [None]:

# Initialize and train Gaussian Naive Bayes model
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Predictions
y_pred = gnb.predict(X_test)


In [None]:

# Evaluate the model
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", round(acc, 4))
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", report)
