In [76]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from itertools import permutations



# List of possible algorithms
algorithms = ["RandomForest", "DecisionTree", "SVM", "XGBoost", "AdaBoost", "LogisticRegression", 
              "NaiveBayes", "kNN", "K-Means", "GradientBoosting", "GBM", "LightGBM", "1D-CNN", "Simpler_Sequential"]

algorithm_mapping = {combo: i for i, combo in enumerate(algorithms)}


In [77]:
algorithm_mapping

{'RandomForest': 0,
 'DecisionTree': 1,
 'SVM': 2,
 'XGBoost': 3,
 'AdaBoost': 4,
 'LogisticRegression': 5,
 'NaiveBayes': 6,
 'kNN': 7,
 'K-Means': 8,
 'GradientBoosting': 9,
 'GBM': 10,
 'LightGBM': 11,
 '1D-CNN': 12,
 'Simpler_Sequential': 13}

In [78]:
df = pd.read_csv("database_with_metaFeatures_and_resourceSpecifications.csv")

In [79]:
df.head()

Unnamed: 0,n_samples,n_features,pca_expl_ratio_1,pca_expl_ratio_2,pca_expl_ratio_3,attr_conc.mean,attr_conc.sd,attr_ent.mean,attr_ent.sd,cor.mean,...,var.sd,Filename,Device,CPU,GPU,RAM,Memory,Power Consumption,Energy Level,Best_Model
0,8238,63,0.225916,0.735604,0.276468,0.095292,155.487712,4.981057,0.742487,0.446204,...,0.0,bank-additional-full_normalised_1.csv,CCTVCamera1,ARM Cortex-M4,,256MB,512MB,3W,Low,1D-CNN
1,8238,63,0.225916,0.735604,0.276468,0.095292,155.487712,4.981057,0.742487,0.446204,...,0.0,bank-additional-full_normalised_1.csv,CCTVCamera2,ARM Cortex-A7,,512MB,1GB,5W,Low,1D-CNN
2,8238,63,0.225916,0.735604,0.276468,0.095292,155.487712,4.981057,0.742487,0.446204,...,0.0,bank-additional-full_normalised_1.csv,SmartSensor1,ARM Cortex-M0,,128KB,256KB,1W,Very Low,1D-CNN
3,8238,63,0.225916,0.735604,0.276468,0.095292,155.487712,4.981057,0.742487,0.446204,...,0.0,bank-additional-full_normalised_1.csv,SmartSensor2,ARM Cortex-A5,,256KB,512KB,2W,Very Low,1D-CNN
4,8238,63,0.225916,0.735604,0.276468,0.095292,155.487712,4.981057,0.742487,0.446204,...,0.0,bank-additional-full_normalised_1.csv,SmartPlug1,ARM Cortex-M3,,64KB,128KB,0.5W,Very Low,1D-CNN


In [80]:
# Encode the target variable to represent the ranking
label_encoder = LabelEncoder()
encoder = label_encoder.fit_transform(algorithms)

In [95]:
# Get the mapping from original labels to encoded labels
label_mapping = {label: encoded_label for label, encoded_label in zip(algorithms, encoder)}

# Display the mapping
print(label_mapping)

{'RandomForest': 9, 'DecisionTree': 2, 'SVM': 10, 'XGBoost': 12, 'AdaBoost': 1, 'LogisticRegression': 7, 'NaiveBayes': 8, 'kNN': 13, 'K-Means': 5, 'GradientBoosting': 4, 'GBM': 3, 'LightGBM': 6, '1D-CNN': 0, 'Simpler_Sequential': 11}


In [81]:
# Separate features and target variables
X = df.drop(columns=['Best_Model','Filename'])
y = df['Best_Model']


In [82]:
y = label_encoder.transform(y)

In [83]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import joblib


# Separate numeric and categorical features
numeric_features = X.select_dtypes(include=[np.number])
categorical_features = X.select_dtypes(exclude=[np.number])

# Perform preprocessing on numeric features
scaler = MinMaxScaler()
X_numeric_scaled = pd.DataFrame(scaler.fit_transform(numeric_features), columns=numeric_features.columns)

# Perform one-hot encoding on categorical features
X_categorical_encoded = pd.get_dummies(categorical_features)

# Combine numeric and categorical features
X = pd.concat([X_numeric_scaled, X_categorical_encoded], axis=1)




In [92]:


# Assuming numeric_features is your DataFrame containing numeric features
min_max_values = numeric_features.agg(['min', 'max'])

# Save the min_max_values DataFrame to a joblib file
joblib.dump(min_max_values, 'min_max_values.joblib')


['min_max_values.joblib']

In [91]:
X

Unnamed: 0,n_samples,n_features,pca_expl_ratio_1,pca_expl_ratio_2,pca_expl_ratio_3,attr_conc.mean,attr_conc.sd,attr_ent.mean,attr_ent.sd,cor.mean,...,Power Consumption_20W,Power Consumption_2W,Power Consumption_3W,Power Consumption_50W,Power Consumption_5W,Power Consumption_80W,Energy Level_High,Energy Level_Low,Energy Level_Moderate,Energy Level_Very Low
0,0.018644,0.465517,5.684094e-19,0.572653,3.390122e-19,7.882969e-37,0.007780,0.124340,0.970527,0.170996,...,0,0,1,0,0,0,0,1,0,0
1,0.018644,0.465517,5.684094e-19,0.572653,3.390122e-19,7.882969e-37,0.007780,0.124340,0.970527,0.170996,...,0,0,0,0,1,0,0,1,0,0
2,0.018644,0.465517,5.684094e-19,0.572653,3.390122e-19,7.882969e-37,0.007780,0.124340,0.970527,0.170996,...,0,0,0,0,0,0,0,0,0,1
3,0.018644,0.465517,5.684094e-19,0.572653,3.390122e-19,7.882969e-37,0.007780,0.124340,0.970527,0.170996,...,0,1,0,0,0,0,0,0,0,1
4,0.018644,0.465517,5.684094e-19,0.572653,3.390122e-19,7.882969e-37,0.007780,0.124340,0.970527,0.170996,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4495,0.056822,0.293103,1.021645e-07,0.532122,2.914487e-08,3.943231e-16,0.052403,0.280712,0.929105,0.231789,...,0,0,0,0,0,0,0,0,1,0
4496,0.056822,0.293103,1.021645e-07,0.532122,2.914487e-08,3.943231e-16,0.052403,0.280712,0.929105,0.231789,...,0,0,0,0,0,0,0,0,0,1
4497,0.056822,0.293103,1.021645e-07,0.532122,2.914487e-08,3.943231e-16,0.052403,0.280712,0.929105,0.231789,...,0,0,0,0,0,0,0,1,0,0
4498,0.056822,0.293103,1.021645e-07,0.532122,2.914487e-08,3.943231e-16,0.052403,0.280712,0.929105,0.231789,...,0,0,0,0,0,0,0,1,0,0


In [84]:



# Save the one-hot encoding mapping
joblib.dump(X_categorical_encoded.columns, 'one_hot_encoding_mapping.pkl')


['one_hot_encoding_mapping.pkl']

In [85]:
# Check for NaN values
nan_indices = np.isnan(X)

# Print the indices of NaN values
print("Indices of NaN values:", np.where(nan_indices)[0])

# Count the number of NaN values
num_nans = np.sum(nan_indices)
print("Number of NaN values:", num_nans)

# Check if there are any NaN values in the array
if np.any(nan_indices):
    print("There are NaN values in the array.")
else:
    print("There are no NaN values in the array.")

Indices of NaN values: []
Number of NaN values: n_samples                0
n_features               0
pca_expl_ratio_1         0
pca_expl_ratio_2         0
pca_expl_ratio_3         0
                        ..
Power Consumption_80W    0
Energy Level_High        0
Energy Level_Low         0
Energy Level_Moderate    0
Energy Level_Very Low    0
Length: 157, dtype: int64
There are no NaN values in the array.


In [86]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [87]:
from sklearn.svm import SVC
# Initialize and train the SVM classifier
svm_classifier = SVC(kernel='linear', decision_function_shape='ovr')
svm_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"SVM Accuracy: {accuracy:.4f}")



SVM Accuracy: 0.7400


In [88]:
from sklearn.ensemble import RandomForestClassifier


# Initialize Random Forest classifier
rf_classifier = RandomForestClassifier()

# Train the Random Forest classifier
rf_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = rf_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
#meta-learner

Accuracy: 0.8811111111111111


In [89]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers


num_classes = len(y)
# One-hot encode the target variable
y_train_encoded = to_categorical(y_train, num_classes=num_classes)
y_test_encoded = to_categorical(y_test, num_classes=num_classes)



model = Sequential([
    Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    BatchNormalization(),
    Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train_encoded, epochs=10, batch_size=8, verbose=1, validation_data=(X_test, y_test_encoded))

# Evaluate the model on the testing set
loss, accuracy = model.evaluate(X_test, y_test_encoded, verbose=0)
print(f'Test Accuracy: {accuracy:.4f}')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.7156


In [90]:

# Evaluate models
rf_accuracy = rf_classifier.score(X_test, y_test)
svm_accuracy = svm_classifier.score(X_test, y_test)
nn_loss, nn_accuracy = model.evaluate(X_test, y_test_encoded, verbose=0)

# Store the best performing model
best_model_name = None
best_accuracy = 0
if rf_accuracy > best_accuracy:
    best_model_name = "RandomForest"
    best_accuracy = rf_accuracy
if svm_accuracy > best_accuracy:
    best_model_name = "SVM"
    best_accuracy = svm_accuracy
if nn_accuracy > best_accuracy:
    best_model_name = "Simple_Sequential"
    best_accuracy = nn_accuracy

# Convert best_model_name to encoded model name
encoded_best_model = algorithms.index(best_model_name)

# Save the best performing model
if best_model_name == "RandomForest":
    joblib.dump(rf_classifier, 'best_model.pkl')
elif best_model_name == "SVM":
    joblib.dump(svm_classifier, 'best_model.pkl')
else:
    model.save('best_model.h5')




['label_encoder.pkl']

In [96]:
# Save the label encoder
joblib.dump(label_mapping, 'label_encoder.pkl')

['label_encoder.pkl']