In [2]:
import xgboost as xgb
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

X, y = make_classification(n_features=50, n_redundant=0, n_informative=25, n_clusters_per_class=10, n_samples=5000)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

lgb_train = lgb.Dataset(X_train, label=y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)


In [9]:
def defineParameters(jobs):
    paramxgb = {
        'silent': 1,
        'objective': 'binary:logistic',  
        'verbose': False,
        'eta': 0.1,
        'max_depth': 8,
        'nthread': jobs,
        'min_child_weight': 100,
    }

    paramlgb = {
        'learning_rate': 0.1,
        'num_leaves': 255,
        'num_threads': jobs,
        'min_data_in_leaf': 0,
        'min_sum_hessian_in_leaf': 100,
        'verbose': -1,
        'metric': 'binary',
    }

    paramRf = {
        'max_depth': 8,
        'max_features': 50
    }
    return paramxgb, paramlgb, paramRf

In [4]:
def timeXGBoost():
    print "*** XGBoost ***"
    %timeit xgb.train(paramxgb, dtrain, training_iterations)

In [5]:
def timeLightGBM():
    print "*** LightGBM ***"
    %timeit lgb.train(paramlgb, lgb_train, training_iterations)

In [13]:
def timeRF():
    print "*** Random Forest ***"
    %timeit RandomForestClassifier(max_depth=paramRf['max_depth'], max_features=paramRf['max_features'],random_state=0, n_estimators=training_iterations, n_jobs=jobs).fit(X_train, y_train)

In [14]:
training_iterations = 200  # the number of training iterations
jobs = 1
paramxgb, paramlgb, paramRf = defineParameters(jobs)
print "*" * 20 + "  jobs = 1  " + "*" * 20
timeXGBoost()
timeLightGBM()
timeRF()

jobs = 2
paramxgb, paramlgb, paramRf = defineParameters(jobs)
print "*" * 20 + "  jobs = 2  " + "*" * 20
timeXGBoost()
timeLightGBM()
timeRF()

jobs = 4
paramxgb, paramlgb, paramRf = defineParameters(jobs)
print "*" * 20 + "  jobs = 4  " + "*" * 20
timeXGBoost()
timeLightGBM()
timeRF()

jobs = 8
paramxgb, paramlgb, paramRf = defineParameters(jobs)
print "*" * 20 + "  jobs = 8 " + "*" * 20
timeXGBoost()
timeLightGBM()
timeRF()

jobs = 16
paramxgb, paramlgb, paramRf = defineParameters(jobs)
print "*" * 20 + "  jobs = 16  " + "*" * 20
timeXGBoost()
timeLightGBM()
timeRF()

********************  jobs = 1  ********************
*** XGBoost ***
1 loop, best of 3: 7.57 s per loop
*** LightGBM ***
1 loop, best of 3: 2.32 s per loop
*** Random Forest ***
1 loop, best of 3: 33.3 s per loop
********************  jobs = 2  ********************
*** XGBoost ***
1 loop, best of 3: 4.88 s per loop
*** LightGBM ***
1 loop, best of 3: 1.59 s per loop
*** Random Forest ***
1 loop, best of 3: 21.1 s per loop
********************  jobs = 4  ********************
*** XGBoost ***
1 loop, best of 3: 5.53 s per loop
*** LightGBM ***
1 loop, best of 3: 1.56 s per loop
*** Random Forest ***
1 loop, best of 3: 15.8 s per loop
********************  jobs = 8 ********************
*** XGBoost ***
1 loop, best of 3: 4.58 s per loop
*** LightGBM ***
1 loop, best of 3: 1.62 s per loop
*** Random Forest ***
1 loop, best of 3: 13.3 s per loop
********************  jobs = 16  ********************
*** XGBoost ***
1 loop, best of 3: 5.04 s per loop
*** LightGBM ***
1 loop, best of 3: 1.59 s p