In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import random
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from bayes_opt import BayesianOptimization
from sklearn.metrics import mean_absolute_error


# Step 1: Load your dataset. Make sure you have a CSV file with relevant features and effort (target) column.
# Replace 'your_dataset.csv' with your actual dataset file name and adjust the column names accordingly.
Nasa=pd.read_csv("C:\\Users\\Asus\\Desktop\\Tehran university\\Seminar\\Datasets\\cocomonasa_2_dataset.csv",header=None,index_col=0)
columns_Nasa=['ProjName','CatofApp','ForG','Center','YearofDev','DevMode','rely','data','cplx','time','stor','virt','turn','acap','aexp','pcap','vexp','lexp','modp','tool','sced','loc','Effort']
Nasa.set_axis(columns_Nasa,axis='columns',inplace=True)
Nasa.rename_axis("Features", axis=1,inplace=True)
Nasa.rename_axis("Projects ID", axis=0,inplace=True)
Nasa.reset_index(inplace=True)
Nasa.drop(columns=['Projects ID'], inplace=True)

#Nasa dataset preprocessing
def NASA_quantifier(x):
    if x=='vl':
        return 0
    elif x=='l':
        return 1
    elif x=='n':
        return 2
    elif x=='h':
        return 3
    elif x=='vh':
        return 4
    elif x=='xh':
        return 5
    else:
        return x

Nasa=Nasa.applymap(NASA_quantifier)
Nasa=pd.get_dummies(Nasa)
dataset = Nasa


# Step 2: Prepare the data.
X = dataset.drop(columns=['Effort'])  # Features
y = dataset['Effort']  # Target (effort)


seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)


# Standardize the features to have zero mean and unit variance.
# Standardize the features
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

# Define the ANN model to be optimized.
def ann_model(neurons_input, neurons_hidden, num_hidden_layers, learning_rate):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units=int(neurons_input), activation='relu', input_shape=(X_train.shape[1],)))

    for _ in range(int(num_hidden_layers)):
        model.add(tf.keras.layers.Dense(units=int(neurons_hidden), activation='relu'))

    model.add(tf.keras.layers.Dense(units=1))  # Output layer with a single unit for regression.

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    return model

# Define the search space for Bayesian optimization.
pbounds = {
    'neurons_input': (10, 50),
    'neurons_hidden': (10, 50),
    'num_hidden_layers': (1, 5),
    'learning_rate': (1e-5, 1e-2),
    'batch_size': (8, 32),
    'epochs': (5, 20)
}

# Define the function to optimize (minimize RMSE).
def optimize_effort_estimation(neurons_input, neurons_hidden, num_hidden_layers, learning_rate, batch_size, epochs):
    model = ann_model(neurons_input, neurons_hidden, num_hidden_layers, learning_rate)

    model.fit(X_train, y_train, batch_size=int(batch_size), epochs=int(epochs), verbose=0)

    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    return -rmse  # Minimize the negative RMSE for Bayesian optimization.

num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed_value)
mean_MAE=[]
mean_MMRE=[]
mean_RMSE=[]


for train_index, val_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[val_index]
    y_train, y_test = y.iloc[train_index], y.iloc[val_index]
    
    # Perform Bayesian optimization.
    optimizer = BayesianOptimization(
        f=optimize_effort_estimation,
        pbounds=pbounds,
        random_state=42,
        verbose=2,
    )
    optimizer.maximize(init_points=10, n_iter=30)  # Adjust the number of initial points and iterations.
    
    best_params = optimizer.max['params']
    #build the model
    model = ann_model(best_params['neurons_input'],best_params['neurons_hidden'],best_params['num_hidden_layers'],best_params['learning_rate'])
    # Train the model.
    model.fit(X_train, y_train, epochs=int(best_params['epochs']), batch_size=int(best_params['batch_size']), verbose=0)


    # You can now use the trained model for prediction or further evaluation
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    mean_MAE.append(mae)

    # Calculate the Root Mean Squared Error (RMSE) to assess the model's performance.
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mean_RMSE.append(rmse)
    
    # Convert y_pred and y_test to NumPy arrays for MMRE calculation
    y_pred = np.array(y_pred).flatten()
    y_test = np.array(y_test)
    # Calculate Mean Magnitude of Relative Error (MMRE)
    mmre = np.mean(np.abs((y_test - y_pred) / y_test))
    mean_MMRE.append(mmre)
    
print(f"Mean Absolute Error mean: {np.mean(mean_MAE)}")   
print(f"Mean Magnitude of Relative Error mean (MMRE): {np.mean(mean_MMRE):.2f}")
print(f"Root Mean Squared Error (RMSE) mean: {np.mean(mean_RMSE)}")

|   iter    |  target   | batch_... |  epochs   | learni... | neuron... | neuron... | num_hi... |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m-424.9   [0m | [0m16.99    [0m | [0m19.26    [0m | [0m0.007323 [0m | [0m33.95    [0m | [0m16.24    [0m | [0m1.624    [0m |
| [95m2        [0m | [95m-188.5   [0m | [95m9.394    [0m | [95m17.99    [0m | [95m0.006015 [0m | [95m38.32    [0m | [95m10.82    [0m | [95m4.88     [0m |
| [0m3        [0m | [0m-460.4   [0m | [0m27.98    [0m | [0m8.185    [0m | [0m0.001826 [0m | [0m17.34    [0m | [0m22.17    [0m | [0m3.099    [0m |
| [0m4        [0m | [0m-429.4   [0m | [0m18.37    [0m | [0m9.368    [0m | [0m0.006122 [0m | [0m15.58    [0m | [0m21.69    [0m | [0m2.465    [0m |
| [0m5        [0m | [0m-451.5   [0m | [0m18.95    [0m | [0m16.78    [0m | [0m0.002005 [0m | [0m30.57    [0m | [0m33.7     [0m | [0m1.

| [0m32       [0m | [0m-254.5   [0m | [0m9.867    [0m | [0m15.77    [0m | [0m0.004208 [0m | [0m37.24    [0m | [0m13.58    [0m | [0m4.228    [0m |
| [0m33       [0m | [0m-386.8   [0m | [0m9.62     [0m | [0m15.13    [0m | [0m0.003579 [0m | [0m40.26    [0m | [0m11.66    [0m | [0m3.805    [0m |
| [0m34       [0m | [0m-277.0   [0m | [0m8.124    [0m | [0m19.07    [0m | [0m0.006999 [0m | [0m37.46    [0m | [0m12.83    [0m | [0m4.032    [0m |
| [0m35       [0m | [0m-270.7   [0m | [0m9.576    [0m | [0m16.7     [0m | [0m0.003991 [0m | [0m38.02    [0m | [0m13.08    [0m | [0m4.877    [0m |
| [0m36       [0m | [0m-267.0   [0m | [0m8.685    [0m | [0m16.12    [0m | [0m0.005912 [0m | [0m37.47    [0m | [0m12.86    [0m | [0m4.2      [0m |
| [0m37       [0m | [0m-272.9   [0m | [0m8.078    [0m | [0m18.13    [0m | [0m0.009336 [0m | [0m36.52    [0m | [0m12.14    [0m | [0m4.75     [0m |
| [0m38       [0m | [0m-3

| [0m29       [0m | [0m-567.2   [0m | [0m12.92    [0m | [0m14.96    [0m | [0m0.002982 [0m | [0m17.08    [0m | [0m34.0     [0m | [0m4.599    [0m |
| [0m30       [0m | [0m-594.3   [0m | [0m9.128    [0m | [0m18.03    [0m | [0m0.001733 [0m | [0m38.56    [0m | [0m11.6     [0m | [0m4.9      [0m |
| [0m31       [0m | [0m-586.2   [0m | [0m9.249    [0m | [0m18.9     [0m | [0m0.008678 [0m | [0m39.86    [0m | [0m11.55    [0m | [0m2.224    [0m |
| [0m32       [0m | [0m-581.2   [0m | [0m11.08    [0m | [0m14.49    [0m | [0m0.004566 [0m | [0m16.36    [0m | [0m37.16    [0m | [0m3.875    [0m |
| [0m33       [0m | [0m-472.7   [0m | [0m20.58    [0m | [0m8.984    [0m | [0m0.007567 [0m | [0m41.28    [0m | [0m46.56    [0m | [0m4.179    [0m |
| [0m34       [0m | [0m-525.3   [0m | [0m12.24    [0m | [0m14.93    [0m | [0m0.006543 [0m | [0m16.87    [0m | [0m35.56    [0m | [0m3.713    [0m |
| [0m35       [0m | [0m-4

| [0m26       [0m | [0m-1.348e+0[0m | [0m8.0      [0m | [0m20.0     [0m | [0m0.01     [0m | [0m17.75    [0m | [0m40.25    [0m | [0m5.0      [0m |
| [0m27       [0m | [0m-1.411e+0[0m | [0m8.0      [0m | [0m20.0     [0m | [0m0.01     [0m | [0m10.62    [0m | [0m32.93    [0m | [0m1.0      [0m |
| [0m28       [0m | [0m-1.94e+03[0m | [0m8.0      [0m | [0m5.584    [0m | [0m0.01     [0m | [0m18.42    [0m | [0m45.87    [0m | [0m1.0      [0m |
| [0m29       [0m | [0m-1.398e+0[0m | [0m8.171    [0m | [0m15.48    [0m | [0m0.009074 [0m | [0m15.47    [0m | [0m38.76    [0m | [0m2.05     [0m |
| [0m30       [0m | [0m-1.461e+0[0m | [0m8.478    [0m | [0m17.99    [0m | [0m0.002921 [0m | [0m27.6     [0m | [0m46.59    [0m | [0m4.288    [0m |
| [0m31       [0m | [0m-1.456e+0[0m | [0m8.0      [0m | [0m17.43    [0m | [0m0.01     [0m | [0m46.98    [0m | [0m10.0     [0m | [0m1.0      [0m |
| [0m32       [0m | [0m-1

| [0m23       [0m | [0m-1.01e+03[0m | [0m8.0      [0m | [0m13.14    [0m | [0m0.01     [0m | [0m10.0     [0m | [0m34.87    [0m | [0m5.0      [0m |
| [0m24       [0m | [0m-1.15e+03[0m | [0m13.67    [0m | [0m12.41    [0m | [0m0.01     [0m | [0m12.72    [0m | [0m32.47    [0m | [0m5.0      [0m |
| [0m25       [0m | [0m-1.186e+0[0m | [0m17.86    [0m | [0m18.73    [0m | [0m0.006072 [0m | [0m10.22    [0m | [0m34.03    [0m | [0m4.903    [0m |
| [0m26       [0m | [0m-1.192e+0[0m | [0m9.569    [0m | [0m18.7     [0m | [0m0.003495 [0m | [0m18.77    [0m | [0m31.5     [0m | [0m4.376    [0m |
| [0m27       [0m | [0m-1.356e+0[0m | [0m8.277    [0m | [0m13.73    [0m | [0m0.009698 [0m | [0m35.55    [0m | [0m10.18    [0m | [0m1.492    [0m |
| [95m28       [0m | [95m-739.9   [0m | [95m8.0      [0m | [95m11.05    [0m | [95m0.01     [0m | [95m17.49    [0m | [95m33.18    [0m | [95m5.0      [0m |
| [0m29       [0m 

| [0m20       [0m | [0m-664.9   [0m | [0m29.01    [0m | [0m10.02    [0m | [0m0.003366 [0m | [0m47.63    [0m | [0m31.9     [0m | [0m1.897    [0m |
| [0m21       [0m | [0m-522.4   [0m | [0m18.53    [0m | [0m19.72    [0m | [0m0.002655 [0m | [0m30.99    [0m | [0m21.28    [0m | [0m3.285    [0m |
| [0m22       [0m | [0m-610.2   [0m | [0m10.85    [0m | [0m10.28    [0m | [0m0.007454 [0m | [0m37.26    [0m | [0m32.57    [0m | [0m2.429    [0m |
| [0m23       [0m | [0m-669.0   [0m | [0m16.34    [0m | [0m15.16    [0m | [0m0.004878 [0m | [0m13.67    [0m | [0m10.61    [0m | [0m1.883    [0m |
| [0m24       [0m | [0m-576.8   [0m | [0m23.59    [0m | [0m18.05    [0m | [0m0.005597 [0m | [0m23.23    [0m | [0m48.02    [0m | [0m3.229    [0m |
| [0m25       [0m | [0m-662.3   [0m | [0m20.57    [0m | [0m7.095    [0m | [0m0.004777 [0m | [0m31.34    [0m | [0m14.44    [0m | [0m2.192    [0m |
| [0m26       [0m | [0m-6