In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import time
import xgboost as xgb
from xgboost import XGBRegressor

#  Random generated dataset
np.random.seed(42)
X = np.random.rand(1000000, 100)
y = np.random.rand(1000000)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing the data: StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [2]:
# GPU version hyperparameters
params_gpu = {
    'objective': 'binary:logistic',
    'tree_method': 'hist',
    'device': 'cuda',
    'eval_metric': 'logloss',
    'verbosity': 1,
    'random_state': 42
}


dtrain_gpu = xgb.DMatrix(X_train_scaled, label=y_train)
dtest_gpu = xgb.DMatrix(X_test_scaled, label=y_test)

# Training time on GPU
start_time = time.time()
model_gpu = xgb.train(params_gpu, dtrain_gpu, num_boost_round=20000)
gpu_train_time = time.time() - start_time

# Prediction time on GPU
start_time = time.time()
train_pred_gpu = model_gpu.predict(dtrain_gpu)
test_pred_gpu = model_gpu.predict(dtest_gpu)
gpu_pred_time = time.time() - start_time

# Evaluation
train_mae_gpu = mean_absolute_error(y_train, train_pred_gpu)
test_mae_gpu = mean_absolute_error(y_test, test_pred_gpu)


print(f"{'Training Time':<20}: {gpu_train_time:.4f} seconds")
print(f"{'Prediction Time':<20}: {gpu_pred_time:.4f} seconds")
print(f"Train MAE: {train_mae_gpu}, Test MAE: {test_mae_gpu}")


Training Time       : 257.7513 seconds
Prediction Time     : 5.9108 seconds
Train MAE: 0.013324174326933148, Test MAE: 0.26988500367555446


In [None]:
model_cpu = XGBRegressor(n_estimators=20000)

# Training time on CPU
start_time = time.time()
model_cpu.fit(X_train_scaled, y_train)
cpu_train_time = time.time() - start_time

# Prediction time on CPU
start_time = time.time()
train_pred_cpu = model_cpu.predict(X_train_scaled)
test_pred_cpu = model_cpu.predict(X_test_scaled)
cpu_pred_time = time.time() - start_time

# Evaluation
train_mae_cpu = mean_absolute_error(y_train, train_pred_cpu)
test_mae_cpu = mean_absolute_error(y_test, test_pred_cpu)


print(f"{'Training Time':<20}: {cpu_train_time:.4f} seconds")
print(f"{'Prediction Time':<20}: {cpu_pred_time:.4f} seconds")
print(f"Train MAE: {train_mae_cpu}, Test MAE: {test_mae_cpu}")


Training Time       : 965.4000 seconds
Prediction Time     : 22.9494 seconds
Train MAE: 0.007832585042845792, Test MAE: 0.2671084348938496
