# Speed comparison of CPU vs. GPU on XGBoost with MNIST

## 0. imports

In [31]:
import time

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

## 1. Data

In [32]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)

## 3. Data preparation

In [33]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y 
)

## 4. XGBoost

In [35]:
import xgboost as xgb

param_dist = {
    'objective': 'multi:softmax', 
    'n_estimators': 2
}

clf = xgb.XGBClassifier(**param_dist)

start_time = time.time()

clf.fit(X_train, y_train,
        eval_metric='logloss',
        verbose=True)

print('Execution fit time: %i s' % int(time.time()-start_time) )



Execution fit time: 19 s


In [11]:
y_pred = clf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[1318    0    6    4    6    8    9    3   22    5]
 [   0 1517   17    9    4    4    8    5    9    2]
 [  29   15 1242   24   12    6    8   12   45    5]
 [  10   15   48 1225    8   42    9   17   35   19]
 [   4    6    7    7 1214   15    9   12   23   68]
 [  21    9    5   74   10 1021   32    8   44   39]
 [  18    3    5    5   26   27 1268    0   22    1]
 [   2   10   25   15   19    5    0 1288   19   75]
 [   8   19   22   33   17   22    6    9 1195   34]
 [   5    5    5   26   37   16    0   42   13 1243]]
              precision    recall  f1-score   support

           0       0.93      0.95      0.94      1381
           1       0.95      0.96      0.96      1575
           2       0.90      0.89      0.89      1398
           3       0.86      0.86      0.86      1428
           4       0.90      0.89      0.89      1365
           5       0.88      0.81      0.84      1263
           6       0.94      0.92      0.93      1375
           7       0.92      0.88   

## 5. XGBoost with Cuda (not work)

In [36]:
import xgboost as xgb

param_dist = {
    'objective': 'multi:softmax', 
    'n_estimators': 2,
    'tree_method': 'gpu_hist',
    'gpu_id': 0
}

clf = xgb.XGBClassifier(**param_dist)

start_time = time.time()

clf.fit(X_train, y_train,
        eval_metric='logloss',
        verbose=True)

print('Execution fit time: %i s' % int(time.time()-start_time) )

XGBoostError: [20:22:37] ../src/tree/updater_gpu_hist.cu:793: Exception in gpu_hist: NCCL failure :unhandled system error ../src/common/device_helpers.cu(71)

Stack trace:
  [bt] (0) /home/eunchong/.local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x9133f) [0x7fb51a8fc33f]
  [bt] (1) /home/eunchong/.local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x4e9a88) [0x7fb51ad54a88]
  [bt] (2) /home/eunchong/.local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x18c862) [0x7fb51a9f7862]
  [bt] (3) /home/eunchong/.local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x18ead8) [0x7fb51a9f9ad8]
  [bt] (4) /home/eunchong/.local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x1b9b93) [0x7fb51aa24b93]
  [bt] (5) /home/eunchong/.local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x50) [0x7fb51a8ebed0]
  [bt] (6) /lib/x86_64-linux-gnu/libffi.so.7(+0x6ff5) [0x7fb5ff00cff5]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.7(+0x640a) [0x7fb5ff00c40a]
  [bt] (8) /usr/lib/python3.8/lib-dynload/_ctypes.cpython-38-x86_64-linux-gnu.so(_ctypes_callproc+0x5b6) [0x7fb5ff025316]

