In [1]:
# Import the necessary libraries
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelBinarizer
import pickle

In [2]:
# Load the dataset into a pandas dataframe
df = pd.read_csv("../samples.csv")

In [3]:
# Split the dataset into features and labels
X = df.iloc[:, 1:-1]  # select all columns except the first and last ones (id and appliance_id)
y = df.iloc[:, -1]  # select the last column as the target variable (appliance_id)

In [4]:
# Convert labels to numeric values using one-hot encoding
y = pd.get_dummies(y)

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True)

In [6]:
# Convert one-hot encoded labels to numeric values
y_train = y_train.values.argmax(axis=1)
y_test = y_test.values.argmax(axis=1)

In [7]:
# Define the SVM model
svm = SVC(random_state=42)

In [8]:
# Define the hyperparameters to tune using grid search
params = {
    "C": [0.1, 1, 10, 100, 500],
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
    "gamma": ["scale", "auto"]
}

In [9]:
# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(svm, params, cv=5, n_jobs=-1, scoring="accuracy")
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found by grid search
print("Best hyperparameters: ", grid_search.best_params_)

Best hyperparameters:  {'C': 500, 'gamma': 'auto', 'kernel': 'rbf'}


In [10]:
# Train the SVM model with the best hyperparameters found by grid search
svm = SVC(**grid_search.best_params_, random_state=42)
svm.fit(X_train, y_train)

In [11]:
# Evaluate the model's performance on the testing set
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

lb = LabelBinarizer()
y_test_onehot = lb.fit_transform(y_test)
y_pred_onehot = lb.transform(y_pred)
report = classification_report(y_test_onehot, y_pred_onehot)

print("Accuracy:", accuracy)
print("Confusion matrix:\n", conf_matrix)
print("Classification report:\n", report)


Accuracy: 0.9346534653465347
Confusion matrix:
 [[135   0   1   0   0   4]
 [  0  73   0   0   0   0]
 [  1   0  80   0   0   0]
 [  0   0   0  63  13   0]
 [  0   0   0  10  57   0]
 [  4   0   0   0   0  64]]
Classification report:
               precision    recall  f1-score   support

           0       0.96      0.96      0.96       140
           1       1.00      1.00      1.00        73
           2       0.99      0.99      0.99        81
           3       0.86      0.83      0.85        76
           4       0.81      0.85      0.83        67
           5       0.94      0.94      0.94        68

   micro avg       0.93      0.93      0.93       505
   macro avg       0.93      0.93      0.93       505
weighted avg       0.93      0.93      0.93       505
 samples avg       0.93      0.93      0.93       505



In [12]:
# Export the trained model to a file using pickle
with open("svm_model.pkl", "wb") as f:
    pickle.dump(svm, f)