In [13]:
import time
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score,  roc_auc_score
import xgboost as xgb



In [17]:
from sklearn.datasets import make_classification


X, y = make_classification(
    n_samples=5000000,           # liczba próbek
    n_features=50,            # liczba cech (dopasowana do breast cancer)
    n_informative=15,         # liczba cech informacyjnych
    n_redundant=5,            # liczba cech redundantnych
    n_repeated=0,             # liczba cech powtórzonych
    n_classes=2,              # klasy: 0 i 1
    weights=[0.6, 0.4],       # proporcje klas
    flip_y=0.01,              # szum etykiet
    class_sep=1.0,            # separacja klas
    random_state=42           # replikowalność
)


# 🔀 Podział na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test, label=y_test)

# ⚙️ Model CPU
params_cpu ={
  "colsample_bytree": 0.7,
  'booster':'gbtree',
  "learning_rate": 0.1,
  "n_estimators":1000,
  "max_depth": 5,
  'objective': 'binary:logistic',
  "subsample": 0.8,
  "tree_method": "hist"
}

start_cpu = time.time()
model_cpu = xgb.train(params=params_cpu, dtrain=dtrain, num_boost_round=50)
cpu_time = time.time() - start_cpu

# 🚀 Model GPU
params_gpu ={
  "colsample_bytree": 0.7,
  'booster':'gbtree',
  "learning_rate": 0.1,
  "n_estimators":1000,
  "max_depth": 5,
  'objective': 'binary:logistic',
  "subsample": 0.8,
  "tree_method": "hist"
}
start_gpu = time.time()
model_gpu = xgb.train(params=params_gpu, dtrain=dtrain, num_boost_round=50)
gpu_time = time.time() - start_gpu


# 🔍 Predykcja i ewaluacja
y_pred_cpu_proba = model_cpu.predict(dtest)
y_pred_gpu_proba = model_gpu.predict(dtest)

# Konwersja na klasy
y_pred_cpu = (y_pred_cpu_proba > 0.5).astype(int)
y_pred_gpu = (y_pred_gpu_proba > 0.5).astype(int)

# Metryki
acc_cpu = accuracy_score(y_test, y_pred_cpu)
acc_gpu = accuracy_score(y_test, y_pred_gpu)

f1_cpu = f1_score(y_test, y_pred_cpu)
f1_gpu = f1_score(y_test, y_pred_gpu)

roc_cpu = roc_auc_score(y_test, y_pred_cpu_proba)
roc_gpu = roc_auc_score(y_test, y_pred_gpu_proba)


# 📊 Wyniki
print(f"🔧 CPU: time={cpu_time:.3f}s | acc={acc_cpu:.4f} | f1={f1_cpu:.4f} | roc_auc={roc_cpu:.4f}")
print(f"🚀 GPU: time={gpu_time:.3f}s | acc={acc_gpu:.4f} | f1={f1_gpu:.4f} | roc_auc={roc_gpu:.4f}")


Parameters: { "n_estimators" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "n_estimators" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🔧 CPU: time=23.026s | acc=0.9485 | f1=0.9345 | roc_auc=0.9856
🚀 GPU: time=20.999s | acc=0.9485 | f1=0.9345 | roc_auc=0.9856


In [9]:
import xgboost

build_info = xgboost.build_info()
for name in sorted(build_info.keys()):
    print(f'{name}: {build_info[name]}')

BUILTIN_PREFETCH_PRESENT: False
CUDA_VERSION: [12, 5]
DEBUG: False
MM_PREFETCH_PRESENT: True
THRUST_VERSION: [2, 6, 1]
USE_CUDA: True
USE_DLOPEN_NCCL: False
USE_FEDERATED: False
USE_NCCL: False
USE_OPENMP: True
USE_RMM: False
libxgboost: c:\Users\lkacz\myenv\.venv\Lib\site-packages\xgboost\lib\xgboost.dll


In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import time
import xgboost as xgb

# Generate an even larger dataset
np.random.seed(42)
X = np.random.rand(1000000, 100)
y = np.random.rand(1000000)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data: standard scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
# GPU version with xgb.train using tree_method='hist' and device='cuda'
params_gpu = {
    'objective': 'binary:logistic',
    'tree_method': 'hist',
    'device': 'cuda',
    'eval_metric': 'logloss',
    'verbosity': 1,
    'random_state': 42
}


dtrain_gpu = xgb.DMatrix(X_train_scaled, label=y_train)
dtest_gpu = xgb.DMatrix(X_test_scaled, label=y_test)

# Training time on GPU
start_time = time.time()
model_gpu = xgb.train(params_gpu, dtrain_gpu, num_boost_round=20000)
gpu_train_time = time.time() - start_time

# Prediction time on GPU
start_time = time.time()
train_pred_gpu = model_gpu.predict(dtrain_gpu)
test_pred_gpu = model_gpu.predict(dtest_gpu)
gpu_pred_time = time.time() - start_time

# Evaluation
train_mae_gpu = mean_absolute_error(y_train, train_pred_gpu)
test_mae_gpu = mean_absolute_error(y_test, test_pred_gpu)


print(f"{'Training Time':<20}: {gpu_train_time:.4f} seconds")
print(f"{'Prediction Time':<20}: {gpu_pred_time:.4f} seconds")
print(f"Train MAE: {train_mae_gpu}, Test MAE: {test_mae_gpu}")


Training Time       : 149.3458 seconds
Prediction Time     : 8.9648 seconds
Train MAE: 0.013324174326933147, Test MAE: 0.2698850036755545


In [14]:
from xgboost import XGBRegressor
model_cpu = XGBRegressor(n_estimators=20000)

# Training time on CPU
start_time = time.time()
model_cpu.fit(X_train_scaled, y_train)
cpu_train_time = time.time() - start_time

# Prediction time on CPU
start_time = time.time()
train_pred_cpu = model_cpu.predict(X_train_scaled)
test_pred_cpu = model_cpu.predict(X_test_scaled)
cpu_pred_time = time.time() - start_time

# Evaluation
train_mae_cpu = mean_absolute_error(y_train, train_pred_cpu)
test_mae_cpu = mean_absolute_error(y_test, test_pred_cpu)


print(f"{'Training Time':<20}: {cpu_train_time:.4f} seconds")
print(f"{'Prediction Time':<20}: {cpu_pred_time:.4f} seconds")
print(f"Train MAE: {train_mae_cpu}, Test MAE: {test_mae_cpu}")


Training Time       : 965.4000 seconds
Prediction Time     : 22.9494 seconds
Train MAE: 0.007832585042845792, Test MAE: 0.2671084348938496
