In [1]:
import xgboost
print('xgboost version:',xgboost.__version__)
import lightgbm
print('lightgbm version:',lightgbm.__version__)
import catboost
print('catboost version:',catboost.__version__)

xgboost version: 1.7.5
lightgbm version: 3.3.5
catboost version: 1.2


In [2]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=2000, n_features=20)

In [3]:
X.shape

(2000, 20)

In [4]:
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor,AdaBoostRegressor

models = [RandomForestRegressor(n_estimators=100),
          ExtraTreesRegressor(n_estimators=100),
          AdaBoostRegressor(n_estimators=100),
          GradientBoostingRegressor(n_estimators=100),
          XGBRegressor(n_estimators=100,objective='reg:squarederror'),
          CatBoostRegressor(verbose=0, n_estimators=100),
          LGBMRegressor(n_estimators=100)]


In [5]:
for model in models:
  print(type(model).__name__)
  %timeit model.fit(X,y)

RandomForestRegressor
2.06 s ± 9.12 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
ExtraTreesRegressor
767 ms ± 9.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
AdaBoostRegressor
709 ms ± 9.17 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
GradientBoostingRegressor
1.14 s ± 34.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
XGBRegressor
390 ms ± 9.35 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
CatBoostRegressor
192 ms ± 3.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
LGBMRegressor
186 ms ± 33 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
for model in models:
  print(type(model).__name__)
  %timeit model.predict(X)

RandomForestRegressor
33.9 ms ± 782 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
ExtraTreesRegressor
55.7 ms ± 23.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
AdaBoostRegressor
24.9 ms ± 362 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
GradientBoostingRegressor
3.22 ms ± 21.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
XGBRegressor
2.37 ms ± 88.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
CatBoostRegressor
8.43 ms ± 71.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
LGBMRegressor
3.31 ms ± 113 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
from numpy import mean,std,sqrt
from sklearn.model_selection import cross_val_score

for model in models:
  n_scores = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=10, n_jobs=-1)
  n_scores = sqrt(-n_scores)
  print(f'{type(model).__name__} RMSE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

RandomForestRegressor RMSE: 72.410 (3.802)
ExtraTreesRegressor RMSE: 63.536 (3.135)
AdaBoostRegressor RMSE: 80.289 (3.620)
GradientBoostingRegressor RMSE: 46.269 (3.047)
XGBRegressor RMSE: 54.828 (3.189)
CatBoostRegressor RMSE: 25.364 (2.257)
LGBMRegressor RMSE: 41.148 (1.931)


# Clasificación

In [8]:
from sklearn.datasets import load_digits

dataset = load_digits(n_class=2,as_frame=True)


In [9]:
X, y = dataset["data"], dataset["target"]
X = X/16.

In [10]:
X.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,0.3125,0.8125,0.5625,0.0625,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.375,0.8125,0.625,0.0,0.0,0.0
1,0.0,0.0,0.0,0.75,0.8125,0.3125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.6875,1.0,0.625,0.0,0.0
2,0.0,0.0,0.0625,0.5625,0.9375,0.6875,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0625,0.625,0.8125,0.1875,0.0,0.0
3,0.0,0.0,0.0,0.0,0.875,0.8125,0.0625,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0625,0.8125,1.0,0.0625,0.0
4,0.0,0.0,0.1875,0.8125,0.6875,0.4375,0.0,0.0,0.0,0.0,...,0.0625,0.0,0.0,0.0,0.125,0.75,0.8125,0.25,0.0,0.0


In [11]:
y.head()

0    0
1    1
2    0
3    1
4    0
Name: target, dtype: int64

In [12]:
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier,AdaBoostClassifier

models = [RandomForestClassifier(),
          ExtraTreesClassifier(n_estimators=100),
          AdaBoostClassifier(n_estimators=100),
          GradientBoostingClassifier(n_estimators=100),
          XGBClassifier(n_estimators=100,eval_metric='logloss'),
          CatBoostClassifier(verbose=0, n_estimators=100),
          LGBMClassifier(n_estimators=100)]

In [13]:
for model in models:
  print(type(model).__name__)
  %timeit model.fit(X,y)

RandomForestClassifier
131 ms ± 1.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
ExtraTreesClassifier
96.7 ms ± 2.25 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
AdaBoostClassifier
183 ms ± 8.52 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
GradientBoostingClassifier
117 ms ± 2.76 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
XGBClassifier
67.8 ms ± 2.46 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
CatBoostClassifier
109 ms ± 1.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
LGBMClassifier
The slowest run took 33.69 times longer than the fastest. This could mean that an intermediate result is being cached.
575 ms ± 802 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
for model in models:
  print(type(model).__name__)
  %timeit model.predict(X)

RandomForestClassifier
12.7 ms ± 173 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
ExtraTreesClassifier
13.4 ms ± 832 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
AdaBoostClassifier
20.5 ms ± 178 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
GradientBoostingClassifier
1.53 ms ± 18.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
XGBClassifier
3.71 ms ± 65 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
CatBoostClassifier
1.49 ms ± 6.81 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
LGBMClassifier
1.97 ms ± 84.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [15]:
for model in models:
  n_scores = cross_val_score(model, X, y, scoring='roc_auc', cv=3, n_jobs=-1)
  print(f'{type(model).__name__} roc_auc: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

RandomForestClassifier roc_auc: 1.000 (0.000)
ExtraTreesClassifier roc_auc: 1.000 (0.000)
AdaBoostClassifier roc_auc: 0.984 (0.023)
GradientBoostingClassifier roc_auc: 0.992 (0.012)
XGBClassifier roc_auc: 0.989 (0.015)
CatBoostClassifier roc_auc: 1.000 (0.000)
LGBMClassifier roc_auc: 1.000 (0.000)
