
### import Neseccary packages


In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    precision_score, recall_score, accuracy_score, f1_score, 
    mean_absolute_error, mean_squared_error, r2_score, 
    roc_curve, roc_auc_score, confusion_matrix, classification_report
)
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import ConfusionMatrixDisplay
from warnings import filterwarnings

# Suppress warnings
filterwarnings('ignore')



### Fetch the MNIST dataset



In [None]:
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist["data"], mnist["target"]



### Display dataset info



In [None]:

print(mnist.frame.info())
print(mnist.keys())




### Visualize an example image



In [None]:
plt.figure(figsize=(5, 5))
some_digit = 150
some_digit_image = X.iloc[some_digit].to_numpy().reshape(28, 28)
plt.imshow(some_digit_image, cmap=plt.cm.gray)
plt.title(f"Label: {y[some_digit]}")
plt.show()




#### Function to visualize multiple images



In [42]:

    #, cmap='gray'

def print_image(row, df):
    temp = df.iloc[row, :].values
    temp = temp.reshape(28, 28).astype('uint8')
    plt.imshow(temp)


In [None]:
plt.figure(figsize=(10,10))
for i in range(30):
    plt.subplot(5, 6, i+1)
    print_image(i,X )



### Class distribution



In [None]:
sns.countplot(x=mnist['target'], palette="viridis")
plt.title("Class Distribution in MNIST")
plt.show()

print(mnist['target'].value_counts())



### Split dataset into training and test sets



In [46]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 42)



### use Logistic Regression



In [47]:
l_r=LogisticRegression()



#### Cross-validation with 5 folds



In [None]:
cv_scores = cross_val_score(l_r, x_train, y_train, cv=5, scoring='accuracy')
print(f"Cross-validation Accuracy Scores: {cv_scores}")
print(f"Mean CV Accuracy: {np.mean(cv_scores):.4f}")

In [None]:
l_r.fit(x_train,y_train)




#### Evaluate model performance




In [None]:
train_accuracy = l_r.score(x_train, y_train)
test_accuracy = l_r.score(x_test, y_test)
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")



### Predictions



In [None]:
test_predictions = l_r.predict(x_train)





#### Confusion Matrix



In [None]:
c_m = confusion_matrix(y_test, test_predictions)
plt.figure(figsize=(8, 6))
disp = ConfusionMatrixDisplay(confusion_matrix=c_m, display_labels=l_r.classes_)
disp.plot(cmap=plt.cm.Blues, values_format='d')
plt.title('Confusion Matrix')
plt.show()



#### Classification Report



In [None]:
print("Classification Report:")
print(classification_report(y_test, test_predictions))