In [1]:
# Import the necessary libraries
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

In [2]:
# Load the dataset into a pandas dataframe
df = pd.read_csv("../samples.csv")

In [3]:
# Split the dataset into features and labels
X = df.iloc[:, 1:-1]  # select all columns except the first and last ones (id and appliance_id)
y = df.iloc[:, -1]  # select the last column as the target variable (appliance_id)

In [4]:
# Convert labels to numeric values using one-hot encoding
y = pd.get_dummies(y)

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True)

In [6]:
# Define the k-NN classifier model
knn = KNeighborsClassifier()

In [7]:
# Define the hyperparameters to tune using grid search
params = {
    "n_neighbors": [1, 3, 5, 7, 9],
    "weights": ["uniform", "distance"],
    "p": [1, 2, 3, 4, 5]
}

In [8]:
# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(knn, params, cv=5, n_jobs=-1, scoring="accuracy")
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found by grid search
print("Best hyperparameters: ", grid_search.best_params_)

Best hyperparameters:  {'n_neighbors': 9, 'p': 1, 'weights': 'distance'}


In [9]:
# Train the k-NN classifier model with the best hyperparameters found by grid search
knn = KNeighborsClassifier(**grid_search.best_params_)
knn.fit(X_train, y_train)

In [10]:
# Evaluate the model's performance on the testing set
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(
    y_test.values.argmax(axis=1), y_pred.argmax(axis=1))
report = classification_report(
    y_test.values.argmax(axis=1), y_pred.argmax(axis=1))
print("Accuracy:", accuracy)
print("Confusion matrix:\n", conf_matrix)
print("Classification report:\n", report)

Accuracy: 0.9306930693069307
Confusion matrix:
 [[135   0   1   0   0   4]
 [  0  73   0   0   0   0]
 [  1   0  80   0   0   0]
 [  0   0   0  63  13   0]
 [  0   0   0  13  54   0]
 [  3   0   0   0   0  65]]
Classification report:
               precision    recall  f1-score   support

           0       0.97      0.96      0.97       140
           1       1.00      1.00      1.00        73
           2       0.99      0.99      0.99        81
           3       0.83      0.83      0.83        76
           4       0.81      0.81      0.81        67
           5       0.94      0.96      0.95        68

    accuracy                           0.93       505
   macro avg       0.92      0.92      0.92       505
weighted avg       0.93      0.93      0.93       505



In [11]:
# Export the trained model to a file using pickle
with open("knn_model.pkl", "wb") as f:
    pickle.dump(knn, f)