In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import time

# 1. Tải MNIST từ openml
print("Downloading MNIST dataset...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X = mnist.data
y = mnist.target.astype(int)

# 2. Giảm kích thước để chạy nhanh (5000 train, 1000 test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=60000, test_size=10000
)

# 3. Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 4. Huấn luyện KNN với k-D tree
knn = KNeighborsClassifier(
    n_neighbors=12,
    algorithm='kd_tree',  # Thay brute-force bằng k-D tree
    p=2                   # Khoảng cách Euclidean
)

start_time = time.time()
knn.fit(X_train, y_train)
train_time = time.time() - start_time

# 5. Dự đoán
start_time = time.time()
y_pred = knn.predict(X_test)
predict_time = time.time() - start_time

# 6. Đánh giá
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Training time: {train_time:.4f} seconds")
print(f"Prediction time: {predict_time:.4f} seconds")


Downloading MNIST dataset...
Accuracy: 94.26%
Training time: 9.3904 seconds
Prediction time: 709.6422 seconds
