In [1]:
import numpy as np
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris

# Load an example dataset (replace with your actual data)
iris = load_iris()
X = iris.data
y = iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the range of k values to test
k_values = range(1, 26)

# Initialize lists to store cross-validation scores
cv_scores = []

# Perform 3-fold cross-validation for each k
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k, metric='euclidean')
    scores = cross_val_score(knn, X_train, y_train, cv=3, scoring='accuracy')  # Use 3-fold CV
    cv_scores.append(np.mean(scores))

# Find the best k value
best_k = k_values[np.argmax(cv_scores)]
best_cv_score = max(cv_scores)

print(f"Best k: {best_k}")
print(f"Best Cross-Validation Score: {best_cv_score}")

# Optional: Retrain and evaluate on the test set
best_knn = KNeighborsClassifier(n_neighbors=best_k, metric='euclidean')
best_knn.fit(X_train, y_train)
test_accuracy = best_knn.score(X_test, y_test)
print(f"Test Set Accuracy with best k: {test_accuracy}")

Best k: 1
Best Cross-Validation Score: 0.9500000000000001
Test Set Accuracy with best k: 1.0
