In [1]:
# Import the necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

In [2]:
# Load the dataset into a pandas dataframe
df = pd.read_csv("../samples.csv")

In [3]:
# Split the dataset into features and labels
X = df.iloc[:, 1:-1]  # select all columns except the first and last ones (id and appliance_id)
y = df.iloc[:, -1]  # select the last column as the target variable (appliance_id)

In [4]:
# Convert labels to numeric values using one-hot encoding
y = pd.get_dummies(y)

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True)

In [6]:
# Define the random forest classifier model
rf = RandomForestClassifier(random_state=42)

In [7]:
# Define the hyperparameters to tune using grid search
params = {
    "n_estimators": [100, 200, 300],
    "max_depth": [None, 5, 10],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

In [8]:
# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(rf, params, cv=5, n_jobs=-1, scoring="accuracy")
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found by grid search
print("Best hyperparameters: ", grid_search.best_params_)

Best hyperparameters:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}


In [9]:
# Train the random forest model with the best hyperparameters found by grid search
rf = RandomForestClassifier(**grid_search.best_params_, random_state=42)
rf.fit(X_train, y_train)

In [10]:
# Evaluate the model's performance on the testing set
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(
    y_test.values.argmax(axis=1), y_pred.argmax(axis=1))
report = classification_report(
    y_test.values.argmax(axis=1), y_pred.argmax(axis=1))
print("Accuracy:", accuracy)
print("Confusion matrix:\n", conf_matrix)
print("Classification report:\n", report)

Accuracy: 0.9188118811881189
Confusion matrix:
 [[140   0   0   0   0   0]
 [  0  73   0   0   0   0]
 [  1   0  80   0   0   0]
 [  8   0   0  61   7   0]
 [  6   0   0   9  52   0]
 [  7   0   0   0   0  61]]
Classification report:
               precision    recall  f1-score   support

           0       0.86      1.00      0.93       140
           1       1.00      1.00      1.00        73
           2       1.00      0.99      0.99        81
           3       0.87      0.80      0.84        76
           4       0.88      0.78      0.83        67
           5       1.00      0.90      0.95        68

    accuracy                           0.92       505
   macro avg       0.94      0.91      0.92       505
weighted avg       0.93      0.92      0.92       505



In [11]:
# Export the trained model to a file using pickle
with open("rf_model.pkl", "wb") as f:
    pickle.dump(rf, f)