## Model Evaluation Using Test Set

In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np
from art import text2art
import pandas as pd

def load_data(file_path):
    df = pd.read_csv(file_path, header=None)
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values
    return X, y


X_train, y_train = load_data("train_set.txt")
X_val, y_val = load_data("validation_set.txt")
X_test, y_test = load_data("test_set.txt")

y_train = np.where(y_train == "g", 1, 0)
y_val = np.where(y_val == "g", 1, 0)
y_test = np.where(y_test == "g", 1, 0)

best_k = 10

knn = KNeighborsClassifier(n_neighbors=best_k)

knn.fit(X_train, y_train)

print(text2art(f"k={best_k}", font="slant"))

# Calculate accuracy on the training set
test_predictions = knn.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print(f"Test error: {1 - test_accuracy:.3f}\n")

# Calculate confusion matrix
cm = confusion_matrix(y_test, test_predictions)
print("Confusion Matrix:")
print(cm)

# Classification report
print("\nClassification Report on Test Set:")
print(classification_report(y_test, test_predictions, target_names=["h", "g"]))

    __            ___   ____ 
   / /__  _____  <  /  / __ \
  / //_/ /____/  / /  / / / /
 / ,<   /____/  / /  / /_/ / 
/_/|_|         /_/   \____/  
                             

Test error: 0.240

Confusion Matrix:
[[711 275]
 [206 815]]

Classification Report on Test Set:
              precision    recall  f1-score   support

           h       0.78      0.72      0.75       986
           g       0.75      0.80      0.77      1021

    accuracy                           0.76      2007
   macro avg       0.76      0.76      0.76      2007
weighted avg       0.76      0.76      0.76      2007

