In [4]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load the dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist['data'], mnist['target']
y = y.astype(np.uint8)


In [9]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float64))
X_test_scaled = scaler.transform(X_test.astype(np.float64))

# Define the parameter grid
param_grid = {
    'n_neighbors': [2,3],
    'weights': ['distance'],
    'p': [1],
}

# Create a KNeighborsClassifier instance
knn_clf = KNeighborsClassifier()

# Perform grid search
grid_search = GridSearchCV(knn_clf, param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Get the best estimator
best_knn_clf = grid_search.best_estimator_
print("Best parameters found: ", grid_search.best_params_)


Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best parameters found:  {'n_neighbors': 3, 'p': 1, 'weights': 'distance'}


In [10]:

# Predict on the test set
y_test_pred = best_knn_clf.predict(X_test_scaled)

# Evaluate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test set accuracy: {test_accuracy}")


Test set accuracy: 0.9622857142857143
