## Imports 

In [13]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.metrics import accuracy_score
from IPython.display import display

## Load MNIST CSVs without header and assign column names

In [15]:
train_df = pd.read_csv('./mnist_train.csv', header=None)
test_df  = pd.read_csv('./mnist_test.csv',  header=None)
train_df.columns = ['label'] + [f'pixel{i}' for i in range(train_df.shape[1]-1)]
test_df.columns  = ['label'] + [f'pixel{i}' for i in range(test_df.shape[1]-1)]

## Split into features and labels

In [16]:
X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].values
X_test,  y_test  = test_df.drop('label', axis=1).values,  test_df['label'].values

## Scale once

In [17]:
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)


## Define and train models

In [None]:
models = {
    'KNN (k=3)': KNeighborsClassifier(n_neighbors=3, n_jobs=-1),
    'LogisticRegression': LogisticRegression(
        solver='lbfgs', multi_class='multinomial',
        max_iter=1000, random_state=42
    ),
    'Perceptron (SGD)': SGDClassifier(
        loss='perceptron', max_iter=1000,
        tol=1e-3, random_state=42
    )
} 

results = {}
for name, clf in models.items():
    clf.fit(X_train_s, y_train)
    preds = clf.predict(X_test_s)
    results[name] = accuracy_score(y_test, preds)



## Display outcomes

In [21]:
results_df = pd.DataFrame.from_dict(results, orient='index', columns=['Accuracy'])
display(results_df)
best = max(results, key=results.get)
print(f"Best: {best} -> {results[best]:.4%}")

Unnamed: 0,Accuracy
KNN (k=3),0.9452
LogisticRegression,0.9216
Perceptron (SGD),0.8914


Best: KNN (k=3) -> 94.5200%
