In [21]:
# Import the necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
import pickle

In [22]:
# Load the dataset into a pandas dataframe
df = pd.read_csv("../samples.csv")

In [23]:
# Split the dataset into features and labels
X = df.iloc[:, 1:-1]  # select all columns except the first and last ones (id and appliance_id)
y = df.iloc[:, -1]  # select the last column as the target variable (appliance_id)

In [24]:
# Convert labels to numeric values using one-hot encoding
y = pd.get_dummies(y)

In [25]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True)

In [26]:
# Define the random forest classifier model
rf = RandomForestClassifier(random_state=42)

In [27]:
# Define the hyperparameters to tune using grid search
params = {
    "n_estimators": [100, 200, 300],
    "max_depth": [None, 5, 10, 15],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

In [28]:
# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(rf, params, cv=5, n_jobs=-1, scoring="accuracy")
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found by grid search
print("Best hyperparameters: ", grid_search.best_params_)

Best hyperparameters:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}


In [32]:
# Train the random forest model with the best hyperparameters found by grid search
rf = RandomForestClassifier(**grid_search.best_params_, random_state=42)
rf.fit(X_train, y_train)

In [33]:
# Evaluate the model's performance on the testing set
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(
    y_test.values.argmax(axis=1), y_pred.argmax(axis=1))
print("Accuracy:", accuracy)
print("Confusion matrix:\n", conf_matrix)


Accuracy: 0.9114583333333334
Confusion matrix:
 [[151   0   0   0   0   0   1]
 [  0  68   0   0   0   0   0]
 [  2   0  63   0   1   0   0]
 [  3   0   0  58   0   8   0]
 [  0   0   0   0  74   0   0]
 [  6   0   0  17   0  51   0]
 [ 12   0   0   0   0   0  61]]


In [34]:
# Export the trained model to a file using pickle
with open("rf_model.pkl", "wb") as f:
    pickle.dump(rf, f)