## Illustration of Xgboost method

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
import multiprocessing
from sklearn.model_selection import GridSearchCV
import os
import zipfile

In [2]:
zip_path = 'dataLSTM_SR.txt.zip'
# Extract the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall()

In [3]:
ls

EWRI_2025_workshop.pdf      dataLSTM_SR.txt
README.md                   dataLSTM_SR.txt.zip
Xgboost_illustration.ipynb  requirements.txt
Xgboost_tutorial.ipynb


In [5]:
def XgboostParamTuning(X,y):
    if __name__ == "__main__":
        print("Parallel Parameter optimization")
        xgb_model = xgb.XGBRegressor(n_jobs=10, tree_method="hist",
                                    objective = 'reg:quantileerror', quantile_alpha=0.5, verbosity=0)
        clf = GridSearchCV(
            xgb_model,
            {"max_depth": [2, 4, 6, 8, 10, 15, 20], "n_estimators": [50, 100, 200], 'booster': ['gbtree', 'gblinear'],
             'eta': [0.10, 0.30, 0.50, 0.80, 1.00], 'subsample': [0.5, 0.75, 1.00]},
            verbose = 1,
            n_jobs = 2,
            cv = 3
        )
        clf.fit(X, y)
    
    return clf

In [6]:
# A plot to show how xgboost works
xtrain = np.arange(0, 100, 0.5).reshape(-1,1)
ytrain= (xtrain-50)**2 + 10

# Optimal hyperparameters
clf = XgboostParamTuning(xtrain, ytrain)
param = clf.best_params_

# Train the model
xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count(), tree_method="hist", 
                             objective = 'reg:quantileerror', quantile_alpha=0.5, booster = param['booster'], max_depth = param['max_depth'], 
                             n_estimators = param['n_estimators'], eta = param['eta'], subsample = param['subsample'])
xgb_model.fit(xtrain, ytrain)

Parallel Parameter optimization
Fitting 3 folds for each of 630 candidates, totalling 1890 fits


In [8]:
#os.makedirs(os.path.join('xgb_fit_frames'), exist_ok=True)

# Get the predictions for different estimators


for i in range(1, xgb_model.n_estimators + 1):
    y_pred_stage = xgb_model.predict(xtrain, iteration_range=(0, i))

    plt.figure(figsize=(10, 6))
    plt.rcParams['font.family'] = 'Arial'
    plt.scatter(xtrain, ytrain, color='black')
    plt.scatter(xtrain, y_pred_stage, label=f"Boost {i}", s=6)
    plt.legend(['Observed', 'Predicted'], frameon = False)

    # Save frame
    frame_path = f"frame_{i:03d}.png"
    plt.tight_layout()
    plt.savefig(frame_path)
    plt.close()