# Test GPU-accelerated XGB

In [2]:
import xgboost
import numpy as np
import sklearn

In [3]:
!which python

/home/users/rmovva/anaconda2/bin/python


In [4]:
!module load cuda
!module load cudnn

In [5]:
X_train = np.random.random((1000000, 10))
y_train = np.random.random(1000000)

In [6]:
model_cpu = xgboost.XGBRegressor(max_depth = 6, 
                             learning_rate = 0.05,
                             n_estimators = 25,
                             objective = 'reg:linear',
                             silent = False,
                             random_state = 0,
                             reg_lambda=0,
                             reg_alpha=0
                            )

model_gpu = xgboost.XGBRegressor(max_depth = 6, 
                             learning_rate = 0.05,
                             n_estimators = 25,
                             objective = 'reg:linear',
                             silent = False,
                             updater = 'grow_gpu', # train on GPU
                             random_state = 0,
                             reg_lambda=0,
                             reg_alpha=0,
                             gpu_id=3
                            )

model_cpu_hist = xgboost.XGBRegressor(max_depth = 6, 
                             learning_rate = 0.05,
                             n_estimators = 25,
                             objective = 'reg:linear',
                             silent = False,
                             tree_method = 'hist',
                             random_state = 0,
                             reg_lambda=0,
                             reg_alpha=0
                            )

model_gpu_hist = xgboost.XGBRegressor(max_depth = 6, 
                             learning_rate = 0.05,
                             n_estimators = 25,
                             objective = 'reg:linear',
                             silent = False,
                             updater = 'grow_gpu', # train on GPU
                             tree_method = 'hist',
                             random_state = 0,
                             reg_lambda=0,
                             reg_alpha=0,
                             gpu_id=3
                            )

In [7]:
import time

t0 = time.time()
model_cpu.fit(X_train, y_train)
print("Training on CPU (tree_method = exact) took %.3f s" % (time.time() - t0))

t0 = time.time()
model_gpu.fit(X_train, y_train)
print("Training on GPU (tree_method = exact) took %.3f s" % (time.time() - t0))
    
t0 = time.time()
model_cpu_hist.fit(X_train, y_train)
print("Training on CPU (tree_method = hist) took %.3f s" % (time.time() - t0))

t0 = time.time()
model_gpu_hist.fit(X_train, y_train)
print("Training on GPU (tree_method = hist) took %.3f s" % (time.time() - t0))

Training on CPU (tree_method = exact) took 110.519 s
Training on GPU (tree_method = exact) took 6.564 s
Training on CPU (tree_method = hist) took 10.665 s
Training on GPU (tree_method = hist) took 10.673 s


In [8]:
eval_size = 50000

y_pred_cpu = model_cpu.predict(X_train[:eval_size])
y_pred_gpu = model_gpu.predict(X_train[:eval_size])
y_pred_cpuhist = model_cpu_hist.predict(X_train[:eval_size])
y_pred_gpuhist = model_gpu_hist.predict(X_train[:eval_size])

from scipy.stats import spearmanr

print(spearmanr(y_pred_cpu, y_train[:eval_size]))
print(spearmanr(y_pred_gpu, y_train[:eval_size]))
print(spearmanr(y_pred_cpuhist, y_train[:eval_size]))
print(spearmanr(y_pred_gpuhist, y_train[:eval_size]))

SpearmanrResult(correlation=0.026486012825428468, pvalue=3.1546097072435049e-09)
SpearmanrResult(correlation=0.026486173415435754, pvalue=3.153920281607462e-09)
SpearmanrResult(correlation=0.049845568098914668, pvalue=6.9652413767016198e-29)
SpearmanrResult(correlation=0.049845568098914668, pvalue=6.9652413767016198e-29)
