In [None]:
#On all features with validation set
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# define the KNN model
knn = KNeighborsClassifier()

# define the hyperparameter grid
param_grid = {
    'n_neighbors': [5, 10, 15],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

# define the GridSearchCV object with 5-fold cross validation
grid_search_knn = GridSearchCV(knn, param_grid, cv=5, scoring='f1')

# fit the GridSearchCV object to the data
grid_search_knn.fit(X_train, y_train)

# get the best estimator and its hyperparameters
best_knn = grid_search_knn.best_estimator_
print('Best hyperparameters:', grid_search_knn.best_params_)

# predict on the test set using the best estimator
y_pred_knn = best_knn.predict(X_val)

# get the classification report and confusion matrix
print(classification_report(y_val, y_pred_knn))
print(confusion_matrix(y_val, y_pred_knn))

conf_matrix = confusion_matrix(y_val, y_pred_knn)
sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='g')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

fpr, tpr, thresholds = roc_curve(y_val, best_knn.predict_proba(X_val)[:,1])
auc = roc_auc_score(y_val, best_knn.predict_proba(X_val)[:,1])
plt.plot(fpr, tpr, label=f'AUC = {auc:.3f}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

train_sizes, train_scores, test_scores = learning_curve(best_knn, X_train, y_train, cv=5, scoring='f1', n_jobs=-1, train_sizes=np.linspace(0.1, 1.0, 10))
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)
plt.plot(train_sizes, train_mean, label='Training score')
plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.2)
plt.plot(train_sizes, test_mean, label='Cross-validation score')
plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.2)
plt.xlabel('Number of training examples')
plt.ylabel('F1 score')
plt.title('Learning Curve')
plt.legend()
plt.show()

precision, recall, thresholds = precision_recall_curve(y_val, best_knn.predict_proba(X_val)[:,1])
plt.plot(recall, precision, label=f'AP = {average_precision:.3f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()