In [1]:
import numpy as np
import random
import tensorflow as tf  # Import TensorFlow if you're using it
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization

# Load your dataset and preprocess it
# Make sure you've properly loaded the dataset and assigned it to the 'dataset' variable

# Step 1: Load your dataset. Make sure you have a CSV file with relevant features and effort (target) column.
# Replace 'your_dataset.csv' with your actual dataset file name and adjust the column names accordingly.
desharnais=pd.read_csv("C:\\Users\\Asus\\Desktop\\Tehran university\\Seminar\\Datasets\\desharnais_dataset.csv",header=None,index_col=0)
columns_desharnais=['TeamExp','ManagerExp','YearEnd','Length','Effort','Transactions','Entities','PointsAdjust','Envergure','PointsNonAjust','Language']
desharnais.set_axis(columns_desharnais,axis='columns',inplace=True)
desharnais.rename_axis("Features", axis=1,inplace=True)
desharnais.rename_axis("Projects", axis=0,inplace=True)
dataset = desharnais

# Step 2: Prepare the data.
X = dataset.drop(columns=['Effort'])  # Features
y = dataset['Effort']  # Target (effort)

seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)


# Standardize the features
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

pbounds = {'C': (0.1, 10),
           'epsilon': (0.01, 1),
           'gamma': (0.01, 100),
           'kernel': (0, 3)  # Use indices for categorical parameter
}
kernel_mapping = {0: 'linear', 1: 'rbf', 2: 'poly', 3: 'sigmoid'}


def svr_model(C, epsilon, gamma, kernel):
    model = SVR(kernel=kernel_mapping[int(kernel)], C=C, epsilon=epsilon, gamma=gamma)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    return -rmse  # Minimize the negative RMSE for Bayesian optimization.

# Perform Bayesian optimization with k-fold cross-validation.
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed_value)
mean_RMSE=[]
mean_MMRE=[]
mean_MAE=[]

for train_index, val_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[val_index]
    y_train, y_test = y.iloc[train_index], y.iloc[val_index]
    
    optimizer = BayesianOptimization(
        f=svr_model,
        pbounds=pbounds,
        random_state=42,
        verbose=0,
    )
    
    optimizer.maximize(init_points=10, n_iter=30)  # Adjust the number of initial points and iterations.
    
    best_params = optimizer.max['params']
    best_C = best_params['C']
    best_epsilon = best_params['epsilon']
    best_gamma = best_params['gamma']
    best_kernel = kernel_mapping[int(best_params['kernel'])]
    
    final_model = SVR(C=best_C, epsilon=best_epsilon, gamma=best_gamma, kernel=best_kernel)
    final_model.fit(X_train, y_train)
    
    y_pred = final_model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    mean_MAE.append(mae)

    # Calculate Mean Magnitude of Relative Error (MMRE)
    mmre = np.mean(np.abs((y_test - y_pred) / y_test))
    mean_MMRE.append(mmre)

    # Calculate the Root Mean Squared Error (RMSE) to assess the model's performance.
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mean_RMSE.append(rmse)
    
print(f"Mean Absolute Error mean: {np.mean(mean_MAE)}")   
print(f"Mean Magnitude of Relative Error mean (MMRE): {np.mean(mean_MMRE):.2f}")
print(f"Root Mean Squared Error (RMSE) mean: {np.mean(mean_RMSE)}")


Mean Absolute Error mean: 2140.109933748764
Mean Magnitude of Relative Error mean (MMRE): 0.60
Root Mean Squared Error (RMSE) mean: 3510.038849012163
