Don't we often hear the question, "Does GPU really speed up LightGBM?" Indeed, even with the confirmation:
* With ```device="gpu"```
* [LightGBM] [Info] This is the GPU trainer!!
* [LightGBM] [Info] Using GPU Device: Tesla P100-PCIE-16GB, Vendor: NVIDIA Corporation
* [LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...

we still find it running quicker *without* GPU.

This notebook tests training with and without GPU using 3 libraries:
* CatBoost;
* LightGBM;
* XGBoost.

on 4 datasets:
* January tabular playground;
* February tabular playground;
* March tabular playground;
* catBoost.datasets.amazon.

For cases tested here, it is found that with GPU turned on:
* LightGBM never ran quicker, in fact most of the time slower;
* XGBoost always ran quicker; 
* CatBoost ran quicker on all 3 tabular playgrounds.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import catboost as cat
import lightgbm as lgb
import xgboost as xgb
from category_encoders import OrdinalEncoder, LeaveOneOutEncoder
from sklearn.model_selection import train_test_split
import os, sys, time
from tqdm import tqdm_notebook
from datetime import datetime
from pytz import timezone
print('tic', datetime.now(timezone('Canada/Pacific')).isoformat(timespec='minutes'))

# Post-processing
This section post-processes output files from Version 7 and Version 8 of this notebook. 
* Version 7 and Version 8 of this notebook ran on exactly the same code. 
* Version 7 was run with Settings > Accelerator > None. Output file was downloaded; reloading here as ../input/cpu-gpu/cpu.csv.
* Version 8 was run with Settings > Accelerator > GPU. Output file was downloaded; reloading here as ../input/cpu-gpu/gpu.csv.

In [None]:
cpu = pd.read_csv('../input/cpu-gpu/cpu.csv')
cpu['cpu_or_gpu'] = 'cpu'
gpu = pd.read_csv('../input/cpu-gpu/gpu.csv')
gpu['cpu_or_gpu'] = 'gpu'
cpugpu = pd.concat([cpu, gpu])
cpugpu.reset_index(inplace=True)
cpugpu[['rows', 'cols']] = cpugpu[['rows', 'cols']].astype(int)
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
sns.barplot(data=cpugpu.loc[cpugpu['lib']=='cat'], x='dataset', y='time', hue='cpu_or_gpu', ax=ax[0], palette='hot'); _ = ax[0].set_title('cat')
sns.barplot(data=cpugpu.loc[cpugpu['lib']=='lgb'], x='dataset', y='time', hue='cpu_or_gpu', ax=ax[1], palette='hot'); _ = ax[1].set_title('lgb')
sns.barplot(data=cpugpu.loc[cpugpu['lib']=='xgb'], x='dataset', y='time', hue='cpu_or_gpu', ax=ax[2], palette='hot'); _ = ax[2].set_title('xgb')

In [None]:
# Post-processing ends gracefully here.
print('toc', datetime.now(timezone('Canada/Pacific')).isoformat(timespec='minutes') )
exit()

In [None]:
dataX, dataX_ncoded, datay = {}, {}, {}

for dataset in ['jan', 'feb', 'mar']:
    dataX[dataset] = pd.read_csv(f'../input/tabular-playground-series-{dataset}-2021/train.csv', index_col='id')
    datay[dataset] = dataX[dataset].pop('target')

from catboost.datasets import amazon
dataX['amz'], _ = amazon()
datay['amz'] = dataX['amz'].pop('ACTION')
dataX['amz'] = dataX['amz'].astype('object')

for dataset in dataX.keys():
    print(dataset, dataX[dataset].shape, end=' ')
    to_ord_encode = dataX[dataset].select_dtypes('object').columns.to_list()
    to_loo_encode = []
    for col in dataX[dataset].select_dtypes('object').columns:
        if dataX[dataset][col].nunique()>256:  # LighGBM with GPU on can't take more than that
            to_ord_encode.remove(col)
            to_loo_encode.append(col)
    if 'object' in dataX[dataset].dtypes.values:
        tmp = dataX[dataset].copy()
        tmp[to_ord_encode] = OrdinalEncoder().fit_transform(tmp[to_ord_encode]).astype('category')
        tmp[to_loo_encode] = LeaveOneOutEncoder().fit_transform(tmp[to_loo_encode], datay[dataset])
        dataX[dataset] = tmp
        print(len(to_ord_encode), 'Ordinal, ', len(to_loo_encode), 'LeaveOneOut')
    else:
        print()

In [None]:
num_boost_rounds = 10000
early_stopping_rounds = 50
parameters = {'cat': {}, 'lgb': {}, 'xgb': {}}
scale_pos_weight = {}
for dataset in ['mar', 'amz']:
    scale_pos_weight[dataset] = (datay[dataset]==0).sum() / (datay[dataset]==1).sum()
print(scale_pos_weight)

if 'cudf' in sys.modules:
     parameters['xgb']['tree_method'] = 'gpu_hist'
     parameters['lgb']['device'] = 'gpu'
     parameters['lgb']['gpu_use_dp'] = False
     parameters['cat']['task_type'] = 'GPU'

In [None]:
def catpython(theseparams, trainX, trainy, validX, validy):
    param = parameters['cat'].copy()
#   param.update({'learning_rate': .03})
    if len(theseparams)>0:
        param.update(theseparams)
    cat_features = np.where(trainX.columns.isin(trainX.select_dtypes('category').columns))[0]
    train_pool = cat.Pool(trainX, trainy, cat_features=cat_features)
    valid_pool = cat.Pool(validX, validy, cat_features=cat_features)
    model = cat.CatBoost(param).fit(train_pool, # plot = True,
                                    eval_set              = [(validX, validy)],
                                    early_stopping_rounds = early_stopping_rounds,
                                    verbose               = num_boost_rounds//10)
    return model

def lgbpython(theseparams, trainX, trainy, validX, validy):
    param = parameters['lgb'].copy()
    if len(theseparams)>0:
        param.update(theseparams)
    trainSet = lgb.Dataset(trainX, trainy) 
    validSet = lgb.Dataset(validX, validy, reference=trainSet)
    model = lgb.train(param, train_set      = trainSet,
                      valid_sets            = [validSet], 
                      num_boost_round       = num_boost_rounds,
                      early_stopping_rounds = early_stopping_rounds,
                      verbose_eval          = num_boost_rounds//10)
    return model

def xgbpython(theseparams, trainX, trainy, validX, validy):
    param = parameters['xgb'].copy()
    if len(theseparams)>0:
        param.update(theseparams)
        
    if len(trainX.select_dtypes('category').columns) >= 1:
        trainSet = xgb.DMatrix(trainX, trainy, enable_categorical=True) 
        validSet = xgb.DMatrix(validX, validy, enable_categorical=True)
    else:
        trainSet = xgb.DMatrix(trainX, trainy) 
        validSet = xgb.DMatrix(validX, validy)
    model = xgb.train(param, trainSet, 
                      evals                 = [(validSet, 'valid')], 
                      num_boost_round       = num_boost_rounds,
                      early_stopping_rounds = early_stopping_rounds,
                      verbose_eval          = num_boost_rounds//10)
    return model

In [None]:
gpu.divide(cpu)

In [None]:
my_log = pd.DataFrame(columns=['dataset', 'lib', 'time'])

def traintis(lib, dataset, param):
    trainX, validX, trainy, validy = train_test_split(dataX[dataset], datay[dataset])
    tic = time.time()
    lib(param, trainX, trainy, validX, validy)
    return time.time() - tic

In [None]:
lib = 'cat'
dataset = 'jan'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(catpython, dataset, {'objective'       : 'RMSE', 
                                                                              'eval_metric'     : 'RMSE',
                                                                              'iterations'      : num_boost_rounds})]
dataset = 'feb'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(catpython, dataset, {'objective'       : 'RMSE', 
                                                                              'eval_metric'     : 'RMSE',
                                                                              'iterations'      : num_boost_rounds})]
dataset = 'mar'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(catpython, dataset, {'objective'       : 'Logloss', 
                                                                              'eval_metric'     : 'AUC',
                                                                              'scale_pos_weight': scale_pos_weight['mar'],
                                                                              'iterations'      : num_boost_rounds})]
dataset = 'amz'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(catpython, dataset, {'objective'       : 'Logloss', 
                                                                              'eval_metric'     : 'AUC',
                                                                              'scale_pos_weight': scale_pos_weight['amz'],
                                                                              'iterations'      : num_boost_rounds})]

In [None]:
lib = 'lgb'
dataset = 'jan'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(lgbpython, dataset, {'objective'   : 'regression', 
                                                                              'metric'      : 'rmse'})]
dataset = 'feb'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(lgbpython, dataset, {'objective'   : 'regression', 
                                                                              'metric'      : 'rmse'})]
dataset = 'mar'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(lgbpython, dataset, {'objective'   : 'binary', 
                                                                              'metric'      : 'auc',
                                                                              'is_unbalance': True})]
dataset = 'amz'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(lgbpython, dataset, {'objective'   : 'binary', 
                                                                              'metric'      : 'auc',
                                                                              'is_unbalance': True})]

In [None]:
lib = 'xgb'
dataset = 'jan'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(xgbpython, dataset, {'objective'       : 'reg:squarederror', 
                                                                              'eval_metric'     : 'rmse'})]
dataset = 'feb'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(xgbpython, dataset, {'objective'       : 'reg:squarederror', 
                                                                              'eval_metric'     : 'rmse'})]
dataset = 'mar'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(xgbpython, dataset, {'objective'       : 'binary:logistic', 
                                                                              'eval_metric'     : 'auc',
                                                                              'scale_pos_weight': scale_pos_weight['mar']})]
dataset = 'amz'
my_log.loc[f'{dataset}_{lib}'] = [dataset, lib, traintis(xgbpython, dataset, {'objective'       : 'binary:logistic', 
                                                                              'eval_metric'     : 'auc',
                                                                              'scale_pos_weight': scale_pos_weight['amz']})]

In [None]:
sns.barplot(data=my_log, x='dataset', y='time', hue='lib', palette='hot')

In [None]:
for dataset in dataX.keys():
    my_log.loc[my_log['dataset']==dataset, 'rows'] = dataX[dataset].shape[0]
    my_log.loc[my_log['dataset']==dataset, 'cols'] = dataX[dataset].shape[1]

In [None]:
my_log.to_csv('my_log.csv')

In [None]:
print('toc', datetime.now(timezone('Canada/Pacific')).isoformat(timespec='minutes') )

In [None]:
pd.read_csv('my_log.csv')