In [22]:
import numpy as np
import pandas as pd
from deap import base, creator, tools, algorithms
import xgboost as xgb
import random


In [23]:
# Load the new dataset
file_path = 'new_dataset/output.csv'
data = pd.read_csv(file_path)

In [24]:
# Extract the first 12 rows to determine the range of each parameter
parameter_ranges = data.iloc[:12][['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']]

In [25]:
# Determine the min and max values for each parameter
min_values = parameter_ranges.min().values
max_values = parameter_ranges.max().values

In [26]:
# Correct the range for the 'F' column
F_min, F_max = 1.30e-3, 1.50e-3  # Correct range for F
min_values[5] = F_min
max_values[5] = F_max

In [27]:
# The remaining rows are the actual data for model training
data_cleaned = data.iloc[12:].reset_index(drop=True)
data_cleaned = data_cleaned.drop(columns=['Unnamed: 0', 'sim_status', 'final_neck_diameter'])
data_cleaned = data_cleaned.dropna(subset=['MAPE'])


In [28]:
# Define features and target
X = data_cleaned[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']]
y = data_cleaned['MAPE']

In [29]:
# Ensure there are no missing values in the features
X = X.dropna()



In [30]:
# Train an XGBoost model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
xgb_model.fit(X, y)

In [31]:
# Define the fitness function
def fitness_function(individual):
    # Create a DataFrame with a single row containing the individual's parameters
    params = pd.DataFrame([individual], columns=['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M'])
    params['Q3'] = params['Q1'] ** 2  # Maintain the relationship Q3 = Q1^2
    predicted_mape = xgb_model.predict(params[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']])
    return (predicted_mape[0],)


In [32]:
# Set up the DEAP framework
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))  # We want to minimize MAPE
creator.create("Individual", list, fitness=creator.FitnessMin)



In [33]:
toolbox = base.Toolbox()


In [34]:
# Attribute generator: define how each parameter is generated
for i in range(len(min_values)):
    toolbox.register(f"attr_{i}", random.uniform, min_values[i], max_values[i])


In [35]:
# Structure initializers: define how individuals and population are created
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_0, toolbox.attr_1, toolbox.attr_2,
                  toolbox.attr_3, toolbox.attr_4, toolbox.attr_5, toolbox.attr_6), n=1)


In [36]:
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [37]:
# Genetic operators with constraints
def mate_and_clip(ind1, ind2):
    tools.cxBlend(ind1, ind2, alpha=0.5)
    # Enforce constraints by clipping
    for i in range(len(ind1)):
        ind1[i] = max(min(ind1[i], max_values[i]), min_values[i])
        ind2[i] = max(min(ind2[i], max_values[i]), min_values[i])
    return ind1, ind2

In [38]:
def mutate_and_clip(ind):
    tools.mutGaussian(ind, mu=0, sigma=0.1, indpb=0.2)
    # Enforce constraints by clipping
    for i in range(len(ind)):
        ind[i] = max(min(ind[i], max_values[i]), min_values[i])
    return ind,

In [39]:

toolbox.register("mate", mate_and_clip)
toolbox.register("mutate", mutate_and_clip)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", fitness_function)

In [40]:
# Increase population size and number of generations
population_size = 300  # Increase population size
num_generations = 150  # Increase the number of generations
cxpb = 0.6  # Crossover probability
mutpb = 0.3  # Mutation probability


In [41]:
# Create an initial population
population = toolbox.population(n=population_size)


In [42]:
# Run the genetic algorithm
algorithms.eaSimple(population, toolbox, cxpb=cxpb, mutpb=mutpb, ngen=num_generations, verbose=True)


gen	nevals
0  	300   
1  	224   
2  	228   
3  	210   
4  	208   
5  	207   
6  	226   
7  	229   
8  	220   
9  	209   
10 	218   
11 	223   
12 	229   
13 	195   
14 	208   
15 	223   
16 	205   
17 	218   
18 	230   
19 	212   
20 	223   
21 	215   
22 	224   
23 	217   
24 	211   
25 	206   
26 	214   
27 	206   
28 	217   
29 	236   
30 	228   
31 	225   
32 	228   
33 	231   
34 	226   
35 	223   
36 	201   
37 	231   
38 	226   
39 	211   
40 	207   
41 	223   
42 	227   
43 	216   
44 	223   
45 	194   
46 	228   
47 	201   
48 	228   
49 	198   
50 	210   
51 	215   
52 	207   
53 	223   
54 	214   
55 	209   
56 	207   
57 	222   
58 	207   
59 	202   
60 	227   
61 	224   
62 	225   
63 	213   
64 	231   
65 	204   
66 	198   
67 	228   
68 	220   
69 	206   
70 	206   
71 	196   
72 	221   
73 	200   
74 	209   
75 	225   
76 	224   
77 	211   
78 	223   
79 	217   
80 	227   
81 	222   
82 	214   
83 	213   
84 	209   
85 	228   
86 	200   
87 	216   
88 	215   
89 	211   

([[1.1805467120383497,
   0.9111312686254077,
   0.36557268727397524,
   0.1702749203077263,
   0.034357348639508074,
   0.0013281500266324507,
   1052.9652444418475],
  [1.1805549455172173,
   0.9108295508802343,
   0.3658147196450895,
   0.17029393943652904,
   0.04835239352001023,
   0.0013282792760992799,
   1058.268930246126],
  [1.1806448265355627,
   0.9108591202448098,
   0.3644964781904314,
   0.17042371591308053,
   0.04768535847351282,
   0.001328032630487603,
   1049.7789428199458],
  [1.1811685481394354,
   0.9110107978501156,
   0.36567058847472644,
   0.17042916333191607,
   0.047349810346331137,
   0.0013280442283743346,
   1059.2959172551446],
  [1.1807477688021097,
   0.9106708194110197,
   0.36580956108237805,
   0.17029569222174878,
   0.04694408175540584,
   0.0013279377264302315,
   1055.3438579066485],
  [1.1804231610912446,
   0.9105612545209759,
   0.3657636856013742,
   0.17037985824406457,
   0.04702853464684064,
   0.0013278638743560956,
   1058.265152480919

In [43]:
# Extracting the best individuals
top_individuals = tools.selBest(population, k=10)


In [44]:
# Convert the top individuals to a DataFrame for easier analysis
top_individuals_df = pd.DataFrame(top_individuals, columns=['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M'])
top_individuals_df['Q3'] = top_individuals_df['Q1'] ** 2  # Maintain the relationship Q3 = Q1^2


In [45]:
# Predict their MAPE values
top_individuals_df['Predicted_MAPE'] = xgb_model.predict(top_individuals_df[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']])


In [46]:

# Sort by MAPE and display the top 10 results
top_individuals_df = top_individuals_df.sort_values(by='Predicted_MAPE')
print(top_individuals_df.head(10))


         Q1        Q2        EN        SN        FN         F            M  \
0  1.180555  0.910830  0.365815  0.170294  0.048352  0.001328  1058.268930   
1  1.180645  0.910859  0.364496  0.170424  0.047685  0.001328  1049.778943   
2  1.181169  0.911011  0.365671  0.170429  0.047350  0.001328  1059.295917   
3  1.180748  0.910671  0.365810  0.170296  0.046944  0.001328  1055.343858   
4  1.180423  0.910561  0.365764  0.170380  0.047029  0.001328  1058.265152   
5  1.180493  0.910977  0.365749  0.170332  0.047257  0.001328  1054.815615   
6  1.181027  0.910840  0.366917  0.170282  0.047336  0.001328  1053.988856   
7  1.180476  0.910960  0.365626  0.170278  0.047272  0.001328  1050.842497   
8  1.180673  0.910995  0.366359  0.170508  0.047273  0.001328  1054.353925   
9  1.180574  0.910809  0.365754  0.170584  0.047027  0.001328  1052.235196   

         Q3  Predicted_MAPE  
0  1.393710        1.543544  
1  1.393922        1.543544  
2  1.395159        1.543544  
3  1.394165        1.

In [47]:
# Optionally save to a CSV for further analysis
top_individuals_df.to_csv('top_genetic_algorithm_results_constrained_2.csv', index=False)