In [3]:
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
import pandas as pd
import math
from bayes_opt import BayesianOptimization
import random
import sklearn
from sklearn.metrics import mean_squared_error
import tensorflow as tf

In [4]:

# Step 1: Load your dataset. Make sure you have a CSV file with relevant features and effort (target) column.
# Replace 'your_dataset.csv' with your actual dataset file name and adjust the column names accordingly.
maxwell=pd.read_csv("C:\\Users\\Asus\\Desktop\\Tehran university\\Seminar\\Datasets\\maxwell_dataset.csv",header=None)
columns_maxwell=['Syear','App','Har','Dba','Ifc','Source','Telonuse','Nlan','T01','T02','T03','T04','T05','T06','T07','T08','T09','T10','T11','T12','T13','T14','T15','Duration','Size','Time','Effort']
maxwell.set_axis(columns_maxwell,axis='columns',inplace=True)
maxwell.set_axis(range(1,63),axis=0 ,inplace=True)
maxwell.rename_axis("Features", axis=1,inplace=True)
maxwell.rename_axis("Projects", axis=0,inplace=True)
dataset = maxwell

# Step 2: Prepare the data.
X = dataset.drop(columns=['Effort'])  # Features
y = dataset['Effort']  # Target (effort)

seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Step 2: Preprocess Data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Feature Selection using Harmony Search
def objective_function(features):
    num_folds = 5
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed_value)
    X_selected = X_scaled[:,features]
    error_list=[]
    
    for train_index, val_index in kf.split(X_selected):
        X_train, X_test = X_selected[train_index], X_selected[val_index]
        y_train, y_test = y[train_index], y[val_index]
        svr = SVR(kernel='linear')
        svr.fit(X_train, y_train)
        y_pred = svr.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        error_list.append(mae)
    return 1 / (1 + np.mean(error_list))
#====================================================================================================================

# Harmony Search Parameters
num_features = X.shape[1]
hms = 20  # Harmony memory size
iterations = 100  # Number of iterations

# Initialize harmony memory
harmony_memory = np.random.randint(0, 2, size=(hms, num_features))

# Harmony Search Algorithm
for _ in range(iterations):
    rnd_choice=np.random.choice(hms)
    new_harmony = np.copy(harmony_memory[rnd_choice])
    for i in range(num_features):
        if np.random.rand() < 0.5:  # Adjust this probability based on your problem
            new_harmony[i] = 1 - new_harmony[i]
    current_obj = objective_function(harmony_memory[rnd_choice])
    new_obj = objective_function(new_harmony)
    if new_obj < current_obj:
        harmony_memory[rnd_choice] = new_harmony

# Get selected features based on final harmony memory
selected_features = harmony_memory[np.argmax([objective_function(i) for i in harmony_memory])]
print(selected_features)
    
#======================================================================================================================

        
pbounds = {'C': (0.1, 10),
           'epsilon': (0.01, 1),
           'gamma': (0.01, 100),
           'kernel': (0, 3)  # Use indices for categorical parameter
}
kernel_mapping = {0: 'linear', 1: 'rbf', 2: 'poly', 3: 'sigmoid'}

def svr_model(kernel,C, epsilon, gamma):
    model = SVR( kernel = kernel_mapping[int(kernel)], C=C, epsilon=epsilon, gamma=gamma)

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    return -rmse  # Minimize the negative RMSE for Bayesian optimization.
    
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed_value)
X_selected = X[:, selected_features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_selected)
mean_MAE=[]
mean_MMRE=[]
mean_RMSE=[]
    
for train_index, val_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[val_index]
    y_train, y_test = y[train_index], y[val_index]    
    # Perform Bayesian optimization.
    optimizer = BayesianOptimization(
        f=svr_model,
        pbounds=pbounds,
        random_state=42,
        verbose=2,
    )


    optimizer.maximize(init_points=10, n_iter=30)  # Adjust the number of initial points and iterations.

    # Print the best hyperparameters found.
    best_params = optimizer.max['params']
    print("Best Hyperparameters:")
    print(best_params)

    best_C = best_params['C']
    best_epsilon = best_params['epsilon']
    best_gamma = best_params['gamma']
    best_kernel= kernel_mapping[int(best_params['kernel'])]

    # Train the final SVR model with the best parameters
    final_model = SVR(C=best_C, epsilon=best_epsilon, gamma=best_gamma,kernel= best_kernel)
    final_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = final_model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    mean_MAE.append(mae)

    # Calculate Mean Magnitude of Relative Error (MMRE)
    mmre = np.mean(np.abs((y_test - y_pred) / y_test))
    mean_MMRE.append(mmre)

    # Calculate the Root Mean Squared Error (RMSE) to assess the model's performance.
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mean_RMSE.append(rmse)
    
    
print(f"Mean Absolute Error: {np.mean(mean_MAE)}") 
print(f"Mean Magnitude of Relative Error (MMRE): {np.mean(mean_MMRE):.2f}")
print(f"Root Mean Squared Error (RMSE): {np.mean(mean_RMSE)}")


|   iter    |  target   | harmon... |    hms    | iterat... |
-------------------------------------------------------------
|   iter    |  target   |     C     |  epsilon  |   gamma   |  kernel   |
-------------------------------------------------------------------------
| [0m1        [0m | [0m-591.6   [0m | [0m3.808    [0m | [0m0.9512   [0m | [0m73.2     [0m | [0m1.796    [0m |
| [95m2        [0m | [95m-515.2   [0m | [95m1.645    [0m | [95m0.1644   [0m | [95m5.818    [0m | [95m2.599    [0m |
| [95m3        [0m | [95m-513.6   [0m | [95m6.051    [0m | [95m0.711    [0m | [95m2.068    [0m | [95m2.91     [0m |
| [0m4        [0m | [0m-533.6   [0m | [0m8.341    [0m | [0m0.2202   [0m | [0m18.19    [0m | [0m0.5502   [0m |
| [0m5        [0m | [0m-550.4   [0m | [0m3.112    [0m | [0m0.5295   [0m | [0m43.2     [0m | [0m0.8737   [0m |
| [0m6        [0m | [0m-590.3   [0m | [0m6.157    [0m | [0m0.1481   [0m | [0m29.22    [0m | [0m

| [0m23       [0m | [0m-3.248e+0[0m | [0m4.539    [0m | [0m0.09703  [0m | [0m5.181    [0m | [0m2.511    [0m |
| [0m24       [0m | [0m-3.248e+0[0m | [0m2.469    [0m | [0m0.6832   [0m | [0m5.818    [0m | [0m2.217    [0m |
| [0m25       [0m | [0m-3.57e+03[0m | [0m3.213    [0m | [0m0.01     [0m | [0m5.814    [0m | [0m3.0      [0m |
| [0m26       [0m | [0m-3.593e+0[0m | [0m1.893    [0m | [0m0.07946  [0m | [0m5.808    [0m | [0m1.744    [0m |
| [0m27       [0m | [0m-3.59e+03[0m | [0m7.168    [0m | [0m0.7501   [0m | [0m4.341    [0m | [0m1.208    [0m |
| [0m28       [0m | [0m-3.59e+03[0m | [0m8.469    [0m | [0m0.7207   [0m | [0m29.55    [0m | [0m1.622    [0m |
| [0m29       [0m | [0m-3.247e+0[0m | [0m1.772    [0m | [0m0.7097   [0m | [0m6.069    [0m | [0m2.55     [0m |
| [0m30       [0m | [0m-3.247e+0[0m | [0m1.399    [0m | [0m0.5763   [0m | [0m5.417    [0m | [0m2.437    [0m |
| [0m31       [0m | [

| [0m9        [0m | [0m-574.7   [0m | [0m0.744    [0m | [0m0.9494   [0m | [0m96.56    [0m | [0m2.425    [0m |
| [0m10       [0m | [0m-601.9   [0m | [0m3.116    [0m | [0m0.1067   [0m | [0m68.43    [0m | [0m1.32     [0m |
| [95m11       [0m | [95m-514.4   [0m | [95m10.0     [0m | [95m1.0      [0m | [95m51.05    [0m | [95m3.0      [0m |
| [0m12       [0m | [0m-598.3   [0m | [0m0.1      [0m | [0m1.0      [0m | [0m53.13    [0m | [0m3.0      [0m |
| [0m13       [0m | [0m-541.8   [0m | [0m10.0     [0m | [0m0.01     [0m | [0m88.09    [0m | [0m0.0      [0m |
| [0m14       [0m | [0m-541.8   [0m | [0m10.0     [0m | [0m0.01     [0m | [0m100.0    [0m | [0m0.0      [0m |
| [0m15       [0m | [0m-541.8   [0m | [0m10.0     [0m | [0m0.01     [0m | [0m42.38    [0m | [0m0.0      [0m |
| [0m16       [0m | [0m-541.6   [0m | [0m10.0     [0m | [0m1.0      [0m | [0m56.23    [0m | [0m0.0      [0m |
| [0m17       [0

| [0m35       [0m | [0m-1.941e+0[0m | [0m0.708    [0m | [0m0.5208   [0m | [0m42.71    [0m | [0m2.859    [0m |
| [0m36       [0m | [0m-1.955e+0[0m | [0m3.538    [0m | [0m0.2252   [0m | [0m66.69    [0m | [0m0.4801   [0m |
| [0m37       [0m | [0m-2.017e+0[0m | [0m1.675    [0m | [0m0.5449   [0m | [0m31.52    [0m | [0m1.832    [0m |
| [0m38       [0m | [0m-1.919e+0[0m | [0m1.021    [0m | [0m0.7537   [0m | [0m65.63    [0m | [0m2.533    [0m |
| [0m39       [0m | [0m-1.952e+0[0m | [0m7.191    [0m | [0m0.06195  [0m | [0m47.45    [0m | [0m0.5273   [0m |
| [0m40       [0m | [0m-1.94e+03[0m | [0m9.01     [0m | [0m0.218    [0m | [0m46.14    [0m | [0m0.4312   [0m |
Best Hyperparameters:
{'C': 0.9223550589073464, 'epsilon': 0.800428932803595, 'gamma': 92.20482744230344, 'kernel': 2.914866488703156}
Mean Absolute Error: 639.9463559454293
Mean Magnitude of Relative Error (MMRE): 5.05
Root Mean Squared Error (RMSE): 1227.92270777793