In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold
from sklearn.svm import SVR
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
import math
from sklearn.preprocessing import StandardScaler
import random
import sklearn
from bayes_opt import BayesianOptimization
from sklearn.metrics import mean_squared_error
import tensorflow as tf

In [2]:
# Step 1: Load your dataset. Make sure you have a CSV file with relevant features and effort (target) column.
# Replace 'your_dataset.csv' with your actual dataset file name and adjust the column names accordingly.
maxwell=pd.read_csv("C:\\Users\\Asus\\Desktop\\Tehran university\\Seminar\\Datasets\\maxwell_dataset.csv",header=None)
columns_maxwell=['Syear','App','Har','Dba','Ifc','Source','Telonuse','Nlan','T01','T02','T03','T04','T05','T06','T07','T08','T09','T10','T11','T12','T13','T14','T15','Duration','Size','Time','Effort']
maxwell.set_axis(columns_maxwell,axis='columns',inplace=True)
maxwell.set_axis(range(1,63),axis=0 ,inplace=True)
maxwell.rename_axis("Features", axis=1,inplace=True)
maxwell.rename_axis("Projects", axis=0,inplace=True)
dataset = maxwell

# Step 2: Prepare the data.
X = dataset.drop(columns=['Effort']).values  # Features
y = dataset['Effort'].values  # Target (effort)

seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)
# Split data into train and test sets


# Create a classifier
estimator = SVR(kernel="linear")

# Create SFS model
sfs = SequentialFeatureSelector(estimator, n_features_to_select='auto', direction='forward', cv=5 , scoring='neg_mean_absolute_error',tol=0.1)

# Fit SFS model on training data
sfs.fit(X, y)
print(sfs.get_feature_names_out())
# Transform the data to selected features
X_selected = sfs.transform(X)

print(X_selected)
#=====================================================================================================================

scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X_selected)

    
# Perform Bayesian optimization with k-fold cross-validation.
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed_value)
mean_RMSE=[]
mean_MMRE=[]
mean_MAE=[]

for train_index, val_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[val_index]
    y_train, y_test = y[train_index], y[val_index]
    
    
    final_model = SVR(kernel='linear')
    final_model.fit(X_train, y_train)
    
    y_pred = final_model.predict(X_test)
  
    mae = mean_absolute_error(y_test, y_pred)
    mean_MAE.append(mae)

    # Calculate Mean Magnitude of Relative Error (MMRE)
    mmre = np.mean(np.abs((y_test - y_pred) / y_test))
    mean_MMRE.append(mmre)

    # Calculate the Root Mean Squared Error (RMSE) to assess the model's performance.
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mean_RMSE.append(rmse)
    
print(f"Mean Absolute Error mean: {np.mean(mean_MAE)}")   
print(f"Mean Magnitude of Relative Error mean (MMRE): {np.mean(mean_MMRE):.2f}")
print(f"Root Mean Squared Error (RMSE) mean: {np.mean(mean_RMSE)}")

['x1' 'x2' 'x5' 'x11' 'x14' 'x16' 'x17' 'x18' 'x20' 'x23' 'x24']
[[   2    2    2    3    4    4    5    4    4   16  647]
 [   2    2    2    3    2    4    3    4    4    5  130]
 [   1    2    2    3    2    4    5    4    2    8  254]
 [   3    2    2    2    3    4    4    5    3   16 1056]
 [   2    2    2    4    4    4    4    3    5   12  383]
 [   2    3    2    3    2    4    4    4    4   12  345]
 [   2    2    2    4    3    5    5    3    4   27  209]
 [   2    2    2    2    4    4    3    3    3   24  366]
 [   2    4    1    2    4    3    2    2    3   54 1181]
 [   1    2    2    3    2    3    4    2    2   13  181]
 [   2    5    1    2    3    3    2    2    1   21  739]
 [   3    1    2    4    1    4    3    4    1    7  108]
 [   3    5    2    4    2    4    3    5    3   10   48]
 [   1    2    2    4    3    4    3    3    2   19  249]
 [   2    2    2    3    3    5    5    2    3   11  371]
 [   2    2    2    4    4    4    3    2    3   13  211]
 [   2 