https://github.com/dmlc/xgboost/blob/master/demo/gpu_acceleration/cover_type.py

In [1]:
import time

import cupy as cp
from cuml.model_selection import train_test_split
from sklearn.datasets import fetch_covtype

import xgboost as xgb

ModuleNotFoundError: No module named 'cupy'

Loads the Covertype dataset from sklearn.datasets.

Returns:

- X: Features (forest cover attributes)
- y: Target labels (forest cover types: integers 1–7 by default)
- return_X_y=True means you get just the data arrays, not a Bunch object.

In [5]:
X, y = fetch_covtype(return_X_y=True)

type(X), type(y)

(numpy.ndarray, numpy.ndarray)

In [6]:
X.shape, y.shape

((581012, 54), (581012,))

In [8]:
X[:3]

array([[ 2.596e+03,  5.100e+01,  3.000e+00,  2.580e+02,  0.000e+00,
         5.100e+02,  2.210e+02,  2.320e+02,  1.480e+02,  6.279e+03,
         1.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  1.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00],
       [ 2.590e+03,  5.600e+01,  2.000e+00,  2.120e+02, -6.000e+00,
         3.900e+02,  2.200e+02,  2.350e+02,  1.510e+02,  6.225e+03,
         1.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,
         0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.

In [9]:
y[:3]

array([5, 5, 2], dtype=int32)

In [10]:
X = cp.array(X)
y = cp.array(y)

type(X), type(y)

(cupy.ndarray, cupy.ndarray)

In [11]:
cp.unique(y)

array([1, 2, 3, 4, 5, 6, 7], dtype=int32)

The original labels in y range from 1 to 7.

This line shifts them to start from 0, making the classes 0 through 6.

Some ML libraries (especially GPU-based or custom loss functions) expect zero-based class indexing.

In [12]:
y -= y.min()

cp.unique(y)

array([0, 1, 2, 3, 4, 5, 6], dtype=int32)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, train_size=0.75, random_state=42
)

X_train.shape, X_test.shape

((435759, 54), (145253, 54))

In [21]:
clf = xgb.XGBClassifier(device="cpu", n_estimators=1000, objective='multi:softprob')

start = time.time()

clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])

cpu_time_taken = str(time.time() - start)

print(f"CPU Training Time: {cpu_time_taken} seconds")

[0]	validation_0-mlogloss:1.42212
[1]	validation_0-mlogloss:1.17151
[2]	validation_0-mlogloss:1.00764
[3]	validation_0-mlogloss:0.89319
[4]	validation_0-mlogloss:0.81078
[5]	validation_0-mlogloss:0.74744
[6]	validation_0-mlogloss:0.69943
[7]	validation_0-mlogloss:0.65986
[8]	validation_0-mlogloss:0.62999
[9]	validation_0-mlogloss:0.60633
[10]	validation_0-mlogloss:0.58626
[11]	validation_0-mlogloss:0.56916
[12]	validation_0-mlogloss:0.55507
[13]	validation_0-mlogloss:0.54300
[14]	validation_0-mlogloss:0.53071
[15]	validation_0-mlogloss:0.52178
[16]	validation_0-mlogloss:0.51422
[17]	validation_0-mlogloss:0.50683
[18]	validation_0-mlogloss:0.49752
[19]	validation_0-mlogloss:0.49255
[20]	validation_0-mlogloss:0.48770
[21]	validation_0-mlogloss:0.48294
[22]	validation_0-mlogloss:0.47707
[23]	validation_0-mlogloss:0.47306
[24]	validation_0-mlogloss:0.47077
[25]	validation_0-mlogloss:0.46587
[26]	validation_0-mlogloss:0.46285
[27]	validation_0-mlogloss:0.45837
[28]	validation_0-mlogloss:0.4

In [22]:
clf = xgb.XGBClassifier(device="cuda", n_estimators=1000, objective='multi:softprob')

start = time.time()

clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])

gpu_time_taken = str(time.time() - start)

print(f"GPU Training Time: {gpu_time_taken} seconds")

[0]	validation_0-mlogloss:1.42213
[1]	validation_0-mlogloss:1.17151
[2]	validation_0-mlogloss:1.00775
[3]	validation_0-mlogloss:0.89335
[4]	validation_0-mlogloss:0.81089
[5]	validation_0-mlogloss:0.74795
[6]	validation_0-mlogloss:0.69960
[7]	validation_0-mlogloss:0.66128
[8]	validation_0-mlogloss:0.63137
[9]	validation_0-mlogloss:0.60679
[10]	validation_0-mlogloss:0.58733
[11]	validation_0-mlogloss:0.57010
[12]	validation_0-mlogloss:0.55405
[13]	validation_0-mlogloss:0.54224
[14]	validation_0-mlogloss:0.53155
[15]	validation_0-mlogloss:0.52321
[16]	validation_0-mlogloss:0.51385
[17]	validation_0-mlogloss:0.50331
[18]	validation_0-mlogloss:0.49750
[19]	validation_0-mlogloss:0.49014
[20]	validation_0-mlogloss:0.48568
[21]	validation_0-mlogloss:0.48186
[22]	validation_0-mlogloss:0.47825
[23]	validation_0-mlogloss:0.47408
[24]	validation_0-mlogloss:0.47003
[25]	validation_0-mlogloss:0.46684
[26]	validation_0-mlogloss:0.46377
[27]	validation_0-mlogloss:0.46178
[28]	validation_0-mlogloss:0.4