<a href="https://colab.research.google.com/github/rana14ce/Dataset-of-the-Multinational-Pile/blob/main/GitHub_XGBoost_ChatGPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install necessary packages
!pip install xgboost
!pip install pyswarm

Collecting pyswarm
  Downloading pyswarm-0.6.tar.gz (4.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyswarm
  Building wheel for pyswarm (setup.py) ... [?25l[?25hdone
  Created wheel for pyswarm: filename=pyswarm-0.6-py3-none-any.whl size=4463 sha256=9e6486cc897a674ebf5f42fb117535b113c9e17141e814603082bf31e8e5162c
  Stored in directory: /root/.cache/pip/wheels/bb/4f/ec/8970b83323e16aa95034da175454843947376614d6d5e9627f
Successfully built pyswarm
Installing collected packages: pyswarm
Successfully installed pyswarm-0.6


In [3]:
# Import libraries
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error
from pyswarm import pso

In [4]:
url = 'https://raw.githubusercontent.com/rana14ce/Dataset-of-the-Multinational-Pile/main/Dataset%20of%20the%20Multinational%20Pile.csv'
data = pd.read_csv(url)

df = pd.DataFrame(data)
X = df.drop('Qm_KN', axis=1)
y = df['Qm_KN']

# Normalize input features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


In [5]:
# Train-Test Split (80-20 split, random state fixed for reproducibility)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [6]:
# Define the PSO objective function for XGB
def pso_xgb(params):
    max_depth = int(params[0])
    gamma = params[1]

    model = xgb.XGBRegressor(
        objective='reg:squarederror',
        max_depth=max_depth,
        gamma=gamma,
        learning_rate=0.1,
        n_estimators=100,
        random_state=42
    )

    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_root_mean_squared_error')
    return -scores.mean()  # Minimize RMSE

# PSO bounds for max_depth and gamma
lb = [3, 0.0]     # Lower bounds
ub = [10, 1.0]    # Upper bounds

# Perform PSO
best_params, _ = pso(pso_xgb, lb, ub, swarmsize=20, maxiter=30)

# Extract best parameters
best_max_depth = int(best_params[0])
best_gamma = best_params[1]

print(f"Best max_depth: {best_max_depth}, Best gamma: {best_gamma}")

Stopping search: maximum iterations reached --> 30
Best max_depth: 7, Best gamma: 0.7337608339913682


In [7]:
# Train XGB model with best parameters
final_model = xgb.XGBRegressor(
    objective='reg:squarederror',
    max_depth=best_max_depth,
    gamma=best_gamma,
    learning_rate=0.1,
    n_estimators=100,
    random_state=42
)

final_model.fit(X_train, y_train)

# Evaluate on the test set
y_pred = final_model.predict(X_test)

r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"Test R²: {r2:.4f}")
print(f"Test RMSE: {rmse:.4f}")

y_train_pred = final_model.predict(X_train)
y_test_pred = final_model.predict(X_test)

# Metrics
def calculate_metrics(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    vaf = (1 - np.var(y_true - y_pred) / np.var(y_true)) * 100
    return r2, rmse, vaf

r2_train, rmse_train, vaf_train = calculate_metrics(y_train, y_train_pred)
r2_test, rmse_test, vaf_test = calculate_metrics(y_test, y_test_pred)

print(f"Training R²: {r2_train:.4f}, RMSE: {rmse_train:.4f}, VAF: {vaf_train:.4f}")
print(f"Testing R²: {r2_test:.4f}, RMSE: {rmse_test:.4f}, VAF: {vaf_test:.4f}")

Test R²: 0.3711
Test RMSE: 1091.9985
Training R²: 0.9999, RMSE: 10.5668, VAF: 99.9915
Testing R²: 0.3711, RMSE: 1091.9985, VAF: 39.4594
