In [1]:
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import StandardScaler
import random
from sklearn.model_selection import train_test_split, KFold
import sklearn
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
import tensorflow as tf
reduce_retracing=True

In [2]:


Nasa=pd.read_csv("C:\\Users\\Asus\\Desktop\\Tehran university\\Seminar\\Datasets\\cocomonasa_2_dataset.csv",header=None,index_col=0)
columns_Nasa=['ProjName','CatofApp','ForG','Center','YearofDev','DevMode','rely','data','cplx','time','stor','virt','turn','acap','aexp','pcap','vexp','lexp','modp','tool','sced','loc','Effort']
Nasa.set_axis(columns_Nasa,axis='columns',inplace=True)
Nasa.rename_axis("Features", axis=1,inplace=True)
Nasa.rename_axis("Projects ID", axis=0,inplace=True)
Nasa.reset_index(inplace=True)
Nasa.drop(columns=['Projects ID'], inplace=True)

#Nasa dataset preprocessing
def NASA_quantifier(x):
    if x=='vl':
        return 0
    elif x=='l':
        return 1
    elif x=='n':
        return 2
    elif x=='h':
        return 3
    elif x=='vh':
        return 4
    elif x=='xh':
        return 5
    else:
        return x

Nasa=Nasa.applymap(NASA_quantifier)
dataset = Nasa

# Step 2: Prepare the data.
X = dataset.drop(columns=['Effort'])  # Features
y = dataset['Effort'].values  # Target (effort)

seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)
tf.function(reduce_retracing=True)

# Genetic Algorithm Parameters
population_size = 10
num_generations = 100
crossover_rate = 0.8
mutation_rate = 0.1

# Create an initial population of feature subsets
population = []
for _ in range(population_size):
    subset = [random.randint(0, 1) for _ in range(X.shape[1])]
    population.append(subset)

# Define the fitness function
def fitness_function(subset):
    selected_features = [feature for feature, is_selected in zip(range(X.shape[1]), subset) if is_selected]
    if len(selected_features) == 0:
        return float('-inf')  # Penalize subsets with no selected features
    
    
    num_folds = 5
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed_value)
    cross_val_rmse = []
    
    X_selected = X.iloc[:, selected_features]
    X_selected = pd.get_dummies(X_selected)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_selected)
    error_list=[]
    
    for train_index, val_index in kf.split(X_scaled):
        X_train, X_test = X_scaled[train_index], X_scaled[val_index]
        y_train, y_test = y[train_index], y[val_index]
    
        # Step 3: Build the ANN model.
        model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(units=32, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(units=16, activation='relu'),
        tf.keras.layers.Dense(units=1)  # Output layer with a single unit for regression.
    ])

        # Step 4: Compile the model.
        model.compile(optimizer='adam', loss='mean_squared_error')

        # Step 5: Train the model.
        model.fit(X_train, y_train, epochs=5, batch_size=8, verbose=0)

        # Step 6: Evaluate the model.
        y_pred = model.predict(X_test)
    
        error = np.mean(np.abs(y_pred - y_test))
        error_list.append(error)
        
    return 1 / (1 + np.mean(error_list))  # Fitness is the inverse of the error (higher is better)

#====================================================================================================================

max_fitness_score=0
max_score_features=0

# Genetic Algorithm
for generation in range(num_generations):
    # Evaluate fitness for each feature subset in the population
   # fitness_scores = [fitness_function(subset) for subset in population]
   # if max(fitness_scores) > max_fitness_score:
    #    max_fitness_score = max(fitness_scores)
     #   max_score_features =population[fitness_scores.index(max(fitness_scores))]
    
    # Selection
    selected_population = sorted(population, key=lambda x: fitness_function(x), reverse=True)
    
    # Crossover
    offspring_population = selected_population[:2] # Elitism, preserving the top two individuals
    for i in range(0, population_size, 2):
        parent1 = selected_population[i]
        parent2 = selected_population[i + 1]
        
        if random.random() < crossover_rate: # single point crossover
            crossover_point = random.randint(1, len(parent1) - 1)
            child1 = parent1[:crossover_point] + parent2[crossover_point:]
            child2 = parent2[:crossover_point] + parent1[crossover_point:]
        else:
            child1 = parent1
            child2 = parent2
        
        offspring_population.append(child1)
        offspring_population.append(child2)
    
    # Mutation
    for i in range(2,population_size):
        if random.random() < mutation_rate:
            mutated_feature = random.randint(0, X.shape[1] - 1)
            offspring_population[i][mutated_feature] = 1 - offspring_population[i][mutated_feature]
    
    # Replace the old population with the new offspring population
    population = offspring_population

# Select the best feature subset from the final population
best_subset = max(population, key=fitness_function)
selected_features = [feature for feature, is_selected in zip(range(X.shape[1]), best_subset) if is_selected]
print(selected_features)

#======================================================================================================================

# Train the final model using the selected features
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed_value)
X_selected = X.iloc[:, selected_features]
X_selected = pd.get_dummies(X_selected)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_selected)
mean_MAE=[]
mean_MMRE=[]
mean_RMSE=[]
    
for train_index, val_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[val_index]
    y_train, y_test = y[train_index], y[val_index]
    
    # Step 3: Build the ANN model.
    model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(units=32, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(units=16, activation='relu'),
    tf.keras.layers.Dense(units=1)  # Output layer with a single unit for regression.
])

    # Step 4: Compile the model.
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Step 5: Train the model.
    model.fit(X_train, y_train, epochs=5, batch_size=8, verbose=1)

    # Step 6: Evaluate the model.
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    mean_MAE.append(mae)

    # Calculate the Root Mean Squared Error (RMSE) for this fold.
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mean_RMSE.append(rmse)

     # Convert y_pred and y_test to NumPy arrays for MMRE calculation
    y_pred = np.array(y_pred).flatten()
    y_test = np.array(y_test)
    
    # Calculate the Mean Magnitude of Relative Error (MMRE) for this fold.
    mmre = np.mean(np.abs((y_test - y_pred) / y_test))
    mean_MMRE.append(mmre)

print(f"Mean Absolute Error mean: {np.mean(mean_MAE)}")   
print(f"Mean Magnitude of Relative Error mean (MMRE): {np.mean(mean_MMRE):.2f}")
print(f"Root Mean Squared Error (RMSE) mean: {np.mean(mean_RMSE)}")

#===================================================================================================================

# train the model using max_features

# X_selected = X[:, max_score_features]
# X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# model3 = tf.keras.models.Sequential([
#     tf.keras.layers.Dense(units=32, activation='relu', input_shape=(X_train.shape[1],)),
#     tf.keras.layers.Dense(units=16, activation='relu'),
#     tf.keras.layers.Dense(units=1)  # Output layer with a single unit for regression.
# ])

#  # Step 4: Compile the model.
# model3.compile(optimizer='adam', loss='mean_squared_error')

#  # Step 5: Train the model.
# model3.fit(X_train_scaled, y_train, epochs=10, batch_size=8, verbose=1)

# # You can now use the trained model for prediction or further evaluation
# y_pred = model3.predict(X_test_scaled)

# # Calculate the Root Mean Squared Error (RMSE) to assess the model's performance.
# rmse3 = np.sqrt(mean_squared_error(y_test, y_pred))
# print(f"Root Mean Squared Error (RMSE): {rmse3}")

# # Convert y_pred and y_test to NumPy arrays
# y_pred = np.array(y_pred).flatten()
# y_test = np.array(y_test)

# # Calculate Mean Magnitude of Relative Error (MMRE)
# mmre = np.mean(np.abs((y_test - y_pred) / y_test))
# print(f"Mean Magnitude of Relative Error (MMRE): {mmre:.2f}")




# print(best_subset," \n ", max_score_features)
















































































[0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 17, 18, 20, 21]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Mean Absolute Error mean: 626.2836019653936
Mean Magnitude of Relative Error mean (MMRE): 0.99
Root Mean Squared Error (RMSE) mean: 1124.789970232996
