In [56]:
!pip install pandas numpy xgboost scikit-learn joblib




[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [57]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV
import joblib

In [58]:
# Load the dataset
file_path = 'new_dataset/output.csv'
data = pd.read_csv(file_path)

In [59]:
# Extract the first 12 rows to determine the range of each parameter
parameter_ranges = data.iloc[:12][['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']]
# Determine the min and max values for each parameter based on the first 12 rows
min_values = parameter_ranges.min().values
max_values = parameter_ranges.max().values

In [60]:
parameter_ranges

Unnamed: 0,Q1,Q2,EN,SN,FN,F,M
0,0.9,0.9,0.25,0.1,0.03,0.0013,0.0
1,0.9,0.9,0.25,0.1,0.09,0.0015,1100.0
2,0.9,0.9,0.4,0.2,0.03,0.0013,0.0
3,0.9,1.1,0.4,0.2,0.03,0.0015,1100.0
4,0.9,1.1,0.25,0.2,0.09,0.0013,1100.0
5,0.9,1.1,0.4,0.1,0.09,0.0015,0.0
6,1.6,0.9,0.4,0.1,0.03,0.0015,1100.0
7,1.6,0.9,0.25,0.2,0.09,0.0015,0.0
8,1.6,0.9,0.4,0.2,0.09,0.0013,1100.0
9,1.6,1.1,0.25,0.1,0.03,0.0013,1100.0


In [61]:
min_values

array([0.9   , 0.9   , 0.25  , 0.1   , 0.03  , 0.0013, 0.    ])

In [62]:
max_values

array([1.6e+00, 1.1e+00, 4.0e-01, 2.0e-01, 9.0e-02, 1.5e-03, 1.1e+03])

In [63]:
# Extract the relevant portion for training
data_after_12_rows = data.iloc[12:].reset_index(drop=True)
data_after_12_rows = data_after_12_rows.drop(columns=['Unnamed: 0', 'sim_status', 'final_neck_diameter'])

In [64]:
# Define features and target
X = data_after_12_rows[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']]
y = data_after_12_rows['MAPE']

In [65]:
# Clean data by dropping NaN values
X = X.dropna()
y = y.dropna()

In [66]:
# Define the minimum MAPE threshold from your dataset
min_mape_threshold = y.min()

In [67]:
# Define the XGBoost model
xgboost_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)

In [68]:
# Define the parameter grid for Randomized Search
param_dist = {
    'n_estimators': [100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 10],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2, 0.3]
}

In [69]:
# Set up the Randomized Search with cross-validation
random_search = RandomizedSearchCV(
    estimator=xgboost_model,
    param_distributions=param_dist,
    n_iter=50,  # Number of different combinations to try
    scoring='neg_mean_absolute_percentage_error',
    cv=3,
    verbose=2,
    n_jobs=-1,
    random_state=42
)

In [70]:
# Fit the Randomized Search model
random_search.fit(X, y)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


In [71]:
# Best model from Randomized Search
best_xgb_model = random_search.best_estimator_

In [72]:
# Save the trained model to a file
model_file_path = 'best_xgb_model.joblib'
joblib.dump(best_xgb_model, model_file_path)

['best_xgb_model.joblib']

In [73]:
# Function to generate new parameters
def generate_parameters(n_samples, min_values, max_values):
    min_values = np.array(min_values)
    max_values = np.array(max_values)
    random_parameters = np.random.rand(n_samples, len(min_values)) * (max_values - min_values) + min_values
    parameter_df = pd.DataFrame(random_parameters, columns=['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M'])
    parameter_df['Q3'] = parameter_df['Q1'] ** 2  # Enforcing the new relationship
    return parameter_df

In [84]:
# Generate new parameter sets within the specified ranges
n_samples = 1000  # Generate a larger set to increase chances of finding lower MAPE

In [90]:
parameter_ranges = data[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']]

In [91]:
# Determine the min and max values for each parameter based on the first 12 rows
min_values = parameter_ranges.min().values
max_values = parameter_ranges.max().values

In [92]:
new_parameters = generate_parameters(n_samples, min_values, max_values)

In [93]:
# Predict MAPE using the trained XGBoost model
predicted_mape_new_xgb = best_xgb_model.predict(new_parameters[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']])

In [94]:
# Filter the new parameter sets to only those with MAPE lower than the minimum threshold
lower_mape_parameters_xgb = new_parameters[predicted_mape_new_xgb < min_mape_threshold]
lower_mape_parameters_xgb['Predicted_MAPE'] = predicted_mape_new_xgb[predicted_mape_new_xgb < min_mape_threshold]


In [95]:
# Display the lower MAPE parameter sets from XGBoost
lower_mape_parameters_sorted_xgb = lower_mape_parameters_xgb.sort_values(by='Predicted_MAPE').reset_index(drop=True)


In [96]:
lower_mape_parameters_sorted_xgb

Unnamed: 0,Q1,Q2,EN,SN,FN,F,M,Q3,Predicted_MAPE


In [80]:
# Save the lower MAPE parameters to a CSV file for further analysis
lower_mape_parameters_sorted_xgb.to_csv('lower_mape_parameters_xgb.csv', index=False)


In [81]:

# Print out the top 10 results
print(lower_mape_parameters_sorted_xgb.head(10))

Empty DataFrame
Columns: [Q1, Q2, EN, SN, FN, F, M, Q3, Predicted_MAPE]
Index: []


In [83]:
# Increase the number of generated samples
n_samples = 1000  # or higher

# Optionally, manually adjust the min and max values if you want to explore a larger space
# Example: Slightly expanding the ranges
min_values = parameter_ranges.min().values * 0.9
max_values = parameter_ranges.max().values * 1.1

# Generate new parameters again with these adjusted ranges
new_parameters = generate_parameters(n_samples, min_values, max_values)

# Predict MAPE using the trained XGBoost model
predicted_mape_new_xgb = best_xgb_model.predict(new_parameters[['Q1', 'Q2', 'EN', 'SN', 'FN', 'F', 'M']])

# Filter the new parameter sets to only those with MAPE lower than the minimum threshold
lower_mape_parameters_xgb = new_parameters[predicted_mape_new_xgb < min_mape_threshold]
lower_mape_parameters_xgb['Predicted_MAPE'] = predicted_mape_new_xgb[predicted_mape_new_xgb < min_mape_threshold]

# Display and save the lower MAPE parameter sets from XGBoost
lower_mape_parameters_sorted_xgb = lower_mape_parameters_xgb.sort_values(by='Predicted_MAPE').reset_index(drop=True)

# Save to CSV
lower_mape_parameters_sorted_xgb.to_csv('lower_mape_parameters_xgb_1.csv', index=False)

# Print the top 10 results
print(lower_mape_parameters_sorted_xgb.head(10))


Empty DataFrame
Columns: [Q1, Q2, EN, SN, FN, F, M, Q3, Predicted_MAPE]
Index: []
