In [1]:
import numpy as np
import pandas as pd
from deap import base, creator, tools, algorithms
import xgboost as xgb
import random


In [2]:
# Load the new dataset
file_path = 'new_dataset/output.csv'
data = pd.read_csv(file_path)

In [3]:
# Extract the first 12 rows to determine the range of each parameter
parameter_ranges = data.iloc[:12][['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']]

In [4]:
# Determine the min and max values for each parameter
min_values = parameter_ranges.min().values
max_values = parameter_ranges.max().values

In [5]:
# Correct the range for the 'F' column
F_min, F_max = 1.30e-3, 1.50e-3  # Correct range for F
min_values[5] = F_min
max_values[5] = F_max

In [6]:
# The remaining rows are the actual data for model training
data_cleaned = data.iloc[12:].reset_index(drop=True)
data_cleaned = data_cleaned.drop(columns=['Unnamed: 0', 'sim_status', 'final_neck_diameter'])
data_cleaned = data_cleaned.dropna(subset=['MAPE'])


In [7]:
# Define features and target
X = data_cleaned[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']]
y = data_cleaned['MAPE']

In [8]:
# Ensure there are no missing values in the features
X = X.dropna()

In [9]:
# Train an XGBoost model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
xgb_model.fit(X, y)

In [10]:
# Define the fitness function
def fitness_function(individual):
    # Create a DataFrame with a single row containing the individual's parameters
    params = pd.DataFrame([individual], columns=['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M'])
    params['Q3'] = params['Q1'] ** 2  # Maintain the relationship Q3 = Q1^2
    predicted_mape = xgb_model.predict(params[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']])
    return (predicted_mape[0],)


In [11]:
# Set up the DEAP framework
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))  # We want to minimize MAPE
creator.create("Individual", list, fitness=creator.FitnessMin)

In [12]:
toolbox = base.Toolbox()


In [13]:
# Attribute generator: define how each parameter is generated
for i in range(len(min_values)):
    toolbox.register(f"attr_{i}", random.uniform, min_values[i], max_values[i])

In [14]:
# Structure initializers: define how individuals and population are created
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_0, toolbox.attr_1, toolbox.attr_2,
                  toolbox.attr_3, toolbox.attr_4, toolbox.attr_5, toolbox.attr_6), n=1)


In [15]:
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [16]:
# Genetic operators with constraints
def mate_and_clip(ind1, ind2):
    tools.cxBlend(ind1, ind2, alpha=0.5)
    # Enforce constraints by clipping
    for i in range(len(ind1)):
        ind1[i] = max(min(ind1[i], max_values[i]), min_values[i])
        ind2[i] = max(min(ind2[i], max_values[i]), min_values[i])
    return ind1, ind2

In [17]:
def mutate_and_clip(ind):
    tools.mutGaussian(ind, mu=0, sigma=0.1, indpb=0.2)
    # Enforce constraints by clipping
    for i in range(len(ind)):
        ind[i] = max(min(ind[i], max_values[i]), min_values[i])
    return ind,

In [18]:
toolbox.register("mate", mate_and_clip)
toolbox.register("mutate", mutate_and_clip)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", fitness_function)

In [19]:
# Parameters for the genetic algorithm
population_size = 100
num_generations = 50
cxpb = 0.5  # Crossover probability
mutpb = 0.2  # Mutation probability


In [20]:
# Create an initial population
population = toolbox.population(n=population_size)


In [21]:
# Run the genetic algorithm
algorithms.eaSimple(population, toolbox, cxpb=cxpb, mutpb=mutpb, ngen=num_generations, verbose=True)


gen	nevals
0  	100   
1  	68    
2  	57    
3  	58    
4  	66    
5  	56    
6  	42    
7  	51    
8  	59    
9  	61    
10 	64    
11 	60    
12 	66    
13 	63    
14 	47    
15 	68    
16 	56    
17 	59    
18 	52    
19 	59    
20 	68    
21 	71    
22 	59    
23 	55    
24 	61    
25 	52    
26 	60    
27 	55    
28 	61    
29 	56    
30 	64    
31 	66    
32 	61    
33 	58    
34 	69    
35 	50    
36 	59    
37 	53    
38 	76    
39 	59    
40 	61    
41 	63    
42 	64    
43 	39    
44 	71    
45 	61    
46 	55    
47 	55    
48 	62    
49 	64    
50 	60    


([[1.102488196161121,
   0.9044994597546034,
   0.3692643315161235,
   0.1708494973129411,
   0.05335093043579857,
   0.001328069026438597,
   1059.508276965571],
  [1.1746216753450982,
   0.9036949586596205,
   0.3680773139867605,
   0.170364652268974,
   0.053239383484226306,
   0.001327991374344381,
   1056.9720882297233],
  [1.1748432534835542,
   1.041482623660166,
   0.3696513921629154,
   0.17057960619690882,
   0.0536653615895744,
   0.001328746720166618,
   1056.3006676159807],
  [1.1750417963248847,
   0.9037061402035464,
   0.3636042651419519,
   0.17075241520679219,
   0.05314305086244331,
   0.0013279747776746155,
   1056.3396088009818],
  [1.174553184219886,
   0.9035653362726128,
   0.3681915984523518,
   0.170744694186792,
   0.05286089461104531,
   0.0013282671330503864,
   1055.753891645257],
  [1.1746433216564665,
   0.9036883011349572,
   0.3681405985483599,
   0.17055079918528004,
   0.05333528764041343,
   0.0013273850684675991,
   1055.3496716148952],
  [1.174677

In [22]:
# Extracting the best individuals
top_individuals = tools.selBest(population, k=10)


In [23]:
# Convert the top individuals to a DataFrame for easier analysis
top_individuals_df = pd.DataFrame(top_individuals, columns=['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M'])
top_individuals_df['Q3'] = top_individuals_df['Q1'] ** 2  # Maintain the relationship Q3 = Q1^2


In [24]:
# Predict their MAPE values
top_individuals_df['Predicted_MAPE'] = xgb_model.predict(top_individuals_df[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']])


In [26]:
# Sort by MAPE and display the top 10 results
top_individuals_df = top_individuals_df.sort_values(by='Predicted_MAPE')
print(top_individuals_df.head(10))


         Q1        Q2        EN        SN        FN         F            M  \
0  1.174622  0.903695  0.368077  0.170365  0.053239  0.001328  1056.972088   
1  1.175042  0.903706  0.363604  0.170752  0.053143  0.001328  1056.339609   
2  1.174553  0.903565  0.368192  0.170745  0.052861  0.001328  1055.753892   
3  1.174643  0.903688  0.368141  0.170551  0.053335  0.001327  1055.349672   
4  1.174677  0.904663  0.368330  0.170723  0.053006  0.001328  1056.566752   
5  1.174848  0.904267  0.368698  0.170606  0.053106  0.001328  1056.919933   
6  1.174792  0.904553  0.368293  0.170350  0.053250  0.001328  1056.544627   
7  1.174687  0.903891  0.368400  0.170148  0.053426  0.001328  1055.931112   
8  1.174852  0.904508  0.368381  0.170101  0.053268  0.001328  1056.497019   
9  1.174542  0.903474  0.368177  0.170138  0.053275  0.001328  1056.160237   

         Q3  Predicted_MAPE  
0  1.379736        1.875581  
1  1.380723        1.875581  
2  1.379575        1.875581  
3  1.379787        1.

In [27]:
# Optionally save to a CSV for further analysis
top_individuals_df.to_csv('top_genetic_algorithm_results_constrained.csv', index=False)