In [1]:
import os
import glob
import torch
import random
import numpy as np

from cuml.preprocessing.LabelEncoder import LabelEncoder

import cupy as cp
import cudf as cd

import ray
from ray import tune, train as raytrain
from ray.train                    import Checkpoint
from ray.tune.schedulers          import ASHAScheduler
from ray.tune.integration.xgboost import TuneReportCheckpointCallback
from ray.tune.search.hyperopt     import HyperOptSearch

import xgboost as xgb

# Set Seed For Reproducibillity

In [2]:
def set_reproducibillity(seed=None):
    if seed is None:
        seed = 3407
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    cp.random.seed(seed)
    torch.cuda.manual_seed_all(seed)
    return seed
seed = set_reproducibillity(8120)

# preprocessisng data

In [3]:
LOG_PATH = os.path.join(os.path.abspath(os.getcwd()), "ray_results")
train_url = "../data/titanic/train.csv"
test_url  = "../data/titanic/test_augmented.csv"
features = ["Pclass", "Sex", "SibSp", "Parch", "Age", "Embarked_l", "Fare"]


train = cd.read_csv(train_url)
test  = cd.read_csv(test_url)

train['Age'].fillna(train['Age'].median(), inplace=True)
test['Age'].fillna(test['Age'].median(), inplace=True)
train['Embarked'].fillna(train['Embarked'].mode()[0], inplace=True)
test['Embarked'].fillna(test['Embarked'].mode()[0], inplace=True)
train['Fare'].fillna(train['Fare'].mean(), inplace=True)
test['Fare'].fillna(test['Fare'].mean(), inplace=True)

le = LabelEncoder()
train["Embarked_l"] = le.fit_transform(train.Embarked)
test["Embarked_l"]  = le.fit_transform(test.Embarked)

train_y = train['Survived']
train_x = cd.get_dummies(train[features]).astype(cp.float32)
test_y  = test['Survived']
test_x  = cd.get_dummies(test[features]).astype(cp.float32)
model = None

# XGBoost settings

In [4]:
dtest  = xgb.DMatrix(test_x, test_y)
dtrain = xgb.DMatrix(train_x, train_y)

# Default XGBoost

In [5]:
config = {
     'verbosity': 2,
    'objective':  'binary:logistic',
    'num_class': 1,
    'grow_policy': 'lossguide',
    'learning_rate': 0.001,
    'max_leaves': 64,
    'max_depth': 6,
    'eval_metric':  'auc',
    'tree_method': 'hist',
    'device':"cuda",
    'min_child_weight': 7,
    'colsample_bytree': 0.5,
    'lambda': 0.5,
    'alpha': 1, 
    'seed' : seed
}

model   = xgb.train(config, dtrain, num_boost_round=100, evals=[[dtest, 'train']], verbose_eval=False)

# Predict For New Params
predict = model.predict(dtest)
print(cp.mean(cp.array(predict > 0.5).astype(cp.float32) == cp.array(test_y)).item())
del model

[13:52:34] INFO: /home/conda/feedstock_root/build_artifacts/xgboost-split_1732221635527/work/src/data/simple_dmatrix.cc:139: Generating new Ellpack page.
0.6220095693779905


In [6]:
def train_price(config):
    # This is a simple training function to be passed into Tune
    # Split into train and test set    
    dtest  = xgb.DMatrix(test_x, test_y)
    dtrain = xgb.DMatrix(train_x, train_y)
    model   = xgb.train(config, dtrain, num_boost_round=100, evals=[[dtest, 'train']], verbose_eval=False)

    # Predict For New Params
    predict = model.predict(dtest)
    mean = cp.asnumpy(cp.mean(cp.array(predict > 0.5).astype(cp.float32) == cp.array(test_y)))
    raytrain.report(metrics={"mean_accuracy": float(mean)})#, checkpoint=Checkpoint.from_directory(LOG_PATH))
    
def tune_xgboost():
    search_space = {
        # You can mix constants with search space objects.
        "objective"       : 'binary:logistic',
        "eval_metric"     : 'auc',
        'learning_rate'   : tune.loguniform(1e-5, 1e-2),
        'max_leaves'      : tune.lograndint(16,128),
        "max_depth"       : tune.lograndint(1,10),
        "min_child_weight": tune.uniform(1,10),
        'colsample_bytree': tune.uniform(0.5, 1.0),
        'lambda'          : tune.uniform(0, 10),
        'alpha'           : tune.uniform(0, 1),
        'grow_policy': 'lossguide',
        'tree_method': 'hist',
        'device':"cuda",
        'num_class': 1,
        'seed' : seed
    }
    # This will enable aggressive early stopping of bad trials.
    scheduler = ASHAScheduler(
        max_t=100,  # 100 training iterations
        grace_period=4,
        reduction_factor=2)
    tpe_search =  HyperOptSearch(metric="mean_accuracy", mode="max", random_state_seed=seed)
    
    analysis = tune.run(
        train_price,
        metric="mean_accuracy",
        mode="max",
        search_alg=tpe_search,
        # You can add "gpu": 0.1 to allocate GPUs
        resources_per_trial={"cpu": 2, "gpu": 0.1},
        config=search_space,
        num_samples=80,
        storage_path=LOG_PATH,
        scheduler=scheduler)

    return analysis

# Init RAY and Run

In [7]:
ray.shutdown()  # Restart Ray defensively in case the ray connection is lost. 
ray.init(include_dashboard=True, log_to_driver=False)

2025-04-30 13:52:36,288	INFO worker.py:1812 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.16
Ray version:,2.40.0
Dashboard:,http://127.0.0.1:8265


In [8]:
Checkpoint.from_directory('.')

Checkpoint(filesystem=local, path=.)

In [9]:
analysis = tune_xgboost()

2025-04-30 13:52:37,024	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-04-30 13:55:54
Running for:,00:03:16.96
Memory:,14.8/31.3 GiB

Trial name,status,loc,alpha,colsample_bytree,device,eval_metric,grow_policy,lambda,learning_rate,max_depth,max_leaves,min_child_weight,num_class,objective,seed,tree_method,acc,iter,total time (s)
train_price_4cfd2d8f,TERMINATED,192.168.0.69:791126,0.901835,0.593276,cuda,auc,lossguide,6.47138,9.50451e-05,1,19,4.05069,1,binary:logistic,8120,hist,0.62201,1,0.340755
train_price_554242d1,TERMINATED,192.168.0.69:791251,0.950537,0.748898,cuda,auc,lossguide,4.5018,2.90063e-05,3,33,4.18323,1,binary:logistic,8120,hist,0.62201,1,0.380777
train_price_ec2e3656,TERMINATED,192.168.0.69:791399,0.739256,0.939403,cuda,auc,lossguide,2.73415,0.00239453,4,79,9.73079,1,binary:logistic,8120,hist,0.62201,1,0.49566
train_price_f2c56475,TERMINATED,192.168.0.69:791514,0.599823,0.994651,cuda,auc,lossguide,5.13491,0.000126682,8,87,5.86094,1,binary:logistic,8120,hist,0.62201,1,0.779449
train_price_7401178c,TERMINATED,192.168.0.69:791642,0.188708,0.71984,cuda,auc,lossguide,1.05253,9.01036e-05,1,19,3.88174,1,binary:logistic,8120,hist,0.62201,1,0.317026
train_price_c4dacec9,TERMINATED,192.168.0.69:791762,0.484099,0.687463,cuda,auc,lossguide,0.527978,0.00174359,4,45,2.55206,1,binary:logistic,8120,hist,0.62201,1,0.413423
train_price_e7468b8c,TERMINATED,192.168.0.69:791926,0.744542,0.999895,cuda,auc,lossguide,3.48253,1.31302e-05,4,17,4.02788,1,binary:logistic,8120,hist,0.62201,1,0.396399
train_price_c0dfe65a,TERMINATED,192.168.0.69:792045,0.907324,0.923972,cuda,auc,lossguide,5.08229,5.00143e-05,2,19,1.49117,1,binary:logistic,8120,hist,0.62201,1,0.325876
train_price_5e38ed6c,TERMINATED,192.168.0.69:792160,0.387956,0.930464,cuda,auc,lossguide,6.57548,0.000429847,2,27,9.82948,1,binary:logistic,8120,hist,0.62201,1,0.319091
train_price_d2127390,TERMINATED,192.168.0.69:792277,0.80713,0.649782,cuda,auc,lossguide,1.29515,0.000168432,6,86,7.59371,1,binary:logistic,8120,hist,0.62201,1,0.407666


Trial name,mean_accuracy
train_price_02dac4bf,0.62201
train_price_038c84a0,0.770335
train_price_07b95e33,0.62201
train_price_0d0fb185,0.62201
train_price_13c04194,0.779904
train_price_14d6688d,0.667464
train_price_158ef4db,0.62201
train_price_15c24c1e,0.62201
train_price_1a6e2715,0.77512
train_price_1d80d0fb,0.77512


2025-04-30 13:55:54,011	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/media/HDD/JeYoung/Fundamentals_of_GPU_DataScience_with_RAPIDS/Chapter07/ray_results/train_price_2025-04-30_13-52-37' in 0.0397s.
2025-04-30 13:55:54,039	INFO tune.py:1041 -- Total run time: 197.02 seconds (196.92 seconds for the tuning loop).


In [10]:
from ray.tune import ExperimentAnalysis

analysis = ExperimentAnalysis(glob.glob(os.path.join(LOG_PATH,"*"))[-1], default_metric="mean_accuracy", default_mode="max")
analysis.best_result_df

Unnamed: 0_level_0,mean_accuracy,timestamp,checkpoint_dir_name,done,training_iteration,date,time_this_iter_s,time_total_s,pid,hostname,...,config/max_depth,config/min_child_weight,config/colsample_bytree,config/lambda,config/alpha,config/grow_policy,config/tree_method,config/device,config/num_class,config/seed
trial_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7fa207eb,0.779904,1745988933,,True,1,2025-04-30_13-55-33,0.471396,0.471396,799604,cvmi-jeyoung-System-Product-Name,...,5,3.629607,0.945509,3.380119,0.649328,lossguide,hist,cuda,1,8120
