# STAT451: Machine Learning -- L07: Ensemble Methods Part 2/3

STAT 451: Intro to Machine Learning (Fall 2021)  
Instructor: Sebastian Raschka (sraschka@wisc.edu)  


In [1]:
%load_ext watermark
%watermark -p scikit-learn

scikit-learn: 1.0



# Gradient Boosting

In [2]:
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn import datasets


data = datasets.load_breast_cancer()
X, y = data.data, data.target

X_temp, X_test, y_temp, y_test = \
    train_test_split(X, y, test_size=0.3, random_state=123, stratify=y)

X_train, X_valid, y_train, y_valid = \
    train_test_split(X_temp, y_temp, test_size=0.2, random_state=123, stratify=y_temp)

print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])

Train/Valid/Test sizes: 318 80 171


## Original gradient boosting

In [3]:
from sklearn.ensemble import GradientBoostingClassifier


boost = GradientBoostingClassifier(
    learning_rate=0.1,
    n_estimators=100,
    max_depth=8,
    random_state=1)

boost.fit(X_train, y_train)
    
    
print("Training Accuracy: %0.2f" % boost.score(X_train, y_train))
print("Validation Accuracy: %0.2f" % boost.score(X_valid, y_valid))
print("Test Accuracy: %0.2f" % boost.score(X_test, y_test))

Training Accuracy: 1.00
Validation Accuracy: 0.90
Test Accuracy: 0.92


## HistGradientBoostingClassifier (inspired by LightGBM)

In [4]:
#from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier


boost = HistGradientBoostingClassifier(
    learning_rate=0.1,
    #n_estimators=100,
    #max_depth=8,
    random_state=1)

boost.fit(X_train, y_train)
    
print("Training Accuracy: %0.2f" % boost.score(X_train, y_train))
print("Validation Accuracy: %0.2f" % boost.score(X_valid, y_valid))
print("Test Accuracy: %0.2f" % boost.score(X_test, y_test))

Training Accuracy: 1.00
Validation Accuracy: 0.96
Test Accuracy: 0.97


```
import numpy as np
import xgboost as xgb


dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)


param = {
    'max_depth': 8,
    'eta': 0.1,  # learning rate
    'objective': 'multi:softprob',  # loss function for multiclass
    'num_class': 3}  # number of classes

boost = xgb.train(param, dtrain, num_boost_round=100)

y_pred = boost.predict(dtest)
y_labels = np.argmax(y_pred, axis=1)


print("Test Accuracy: %0.2f" % (y_labels == y_test).mean())
```

## XGBoost

In [5]:
# https://xgboost.readthedocs.io/en/latest/build.html

In [6]:
#!pip install xgboost

In [7]:
import numpy as np
import xgboost as xgb


boost = xgb.XGBClassifier()

boost.fit(X_train, y_train)
    
print("Training Accuracy: %0.2f" % boost.score(X_train, y_train))
print("Validation Accuracy: %0.2f" % boost.score(X_valid, y_valid))
print("Test Accuracy: %0.2f" % boost.score(X_test, y_test))

Training Accuracy: 1.00
Validation Accuracy: 0.95
Test Accuracy: 0.98




## LightGBM

In [None]:
# https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html
# conda install -c conda-forge lightgbm

In [8]:
import lightgbm as lgb


boost = lgb.LGBMClassifier()

boost.fit(X_train, y_train)


print("Training Accuracy: %0.2f" % boost.score(X_train, y_train))
print("Validation Accuracy: %0.2f" % boost.score(X_valid, y_valid))
print("Test Accuracy: %0.2f" % boost.score(X_test, y_test))

Training Accuracy: 1.00
Validation Accuracy: 0.96
Test Accuracy: 0.98


## CatBoost

In [None]:
# https://catboost.ai
# conda install -c conda-forge catboost

In [13]:
from catboost import CatBoostClassifier


boost = CatBoostClassifier(verbose=0)

boost.fit(X_train, y_train)


print("Training Accuracy: %0.2f" % boost.score(X_train, y_train))
print("Validation Accuracy: %0.2f" % boost.score(X_valid, y_valid))
print("Test Accuracy: %0.2f" % boost.score(X_test, y_test))

Training Accuracy: 1.00
Validation Accuracy: 0.97
Test Accuracy: 0.98
