Fitting a k-nearest neighbours (KNN) machine learning model involves once the data has been preprocessed.

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import h5py
import joblib
import json

In [None]:
# Load the training and testing datasets
with h5py.File('../data/splits/train_test_split.h5', 'r') as f:
    X_train = f['X_train'][:]
    X_test = f['X_test'][:]
    y_train = f['y_train'][:]
    y_test = f['y_test'][:]

print("Training and testing sets loaded successfully.")

In [None]:
# Model KNN, classification
model = KNeighborsClassifier()

In [None]:
# Setting hyperparameters
params = {
    'n_neighbors': 5,  # Number of neighbors to use by default for kneighbors queries
    'weights': 'uniform',  # Uniform weights, all points in each neighborhood are weighted equally
    'algorithm': 'auto',  # Algorithm used to compute the nearest neighbors
    'p': 2  # Power parameter for the Minkowski metric
}

model.set_params(**params)

In [None]:
# Train the Model
model.fit(X_train, y_train)

In [None]:
# Evaluate the Model
y_pred = model.predict(X_test)

In [None]:
# For classification
accuracy = accuracy_score(y_test, y_pred)
classification_report_dict = classification_report(y_test, y_pred, output_dict=True)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Save the KNN model to a .pkl file using joblib
model_save_path = '../models/knn_model.pkl'
joblib.dump(model, model_save_path)
print(f"KNN model saved to {model_save_path}.")

In [None]:
# Create a dictionary to store the metrics
metrics = {
    "accuracy": accuracy,
    "classification_report": classification_report_dict  # Already a dictionary
}

# Specify the path where the metrics will be saved
metrics_save_path = '../models/knn_metrics.json'

# Save the metrics dictionary to a JSON file
with open(metrics_save_path, 'w') as f:
    json.dump(metrics, f)

print(f"Model performance metrics saved to {metrics_save_path}.")