# Gradient-Boosted Decision Trees

![image](../../images/GBDT.png)

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.inspection import DecisionBoundaryDisplay

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Load iris data
X, y = load_iris(return_X_y=True)

# Splitting for training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)
print(f'Shape of X_train: {X_train.shape}')
print(f'Shape of y_train: {y_train.shape}')
print(f'Shape of X_test: {X_test.shape}')
print(f'Shape of y_test: {y_test.shape}')

Shape of X_train: (105, 4)
Shape of y_train: (105,)
Shape of X_test: (45, 4)
Shape of y_test: (45,)


In [3]:
def create_fit_predict_gbdt(
        n_estimators: int,
        X_train: np.ndarray,
        y_train: np.ndarray,
        X_test: np.ndarray
        ) -> tuple[GradientBoostingClassifier, np.ndarray]:

    # Create a GB Classifier object
    classifier = GradientBoostingClassifier(
        n_estimators=n_estimators,
        max_depth=1,
        criterion='squared_error',
        min_samples_leaf=1)

    # Train the classifier
    classifier.fit(X_train, y_train)

    # Predict
    y_hat = classifier.predict(X_test)

    return classifier, y_hat

In [4]:
n_samples = [1, 3, 5, 10, 20, 40, 50, 100, 200, 500]

results = {n: create_fit_predict_gbdt(n_estimators=n,
                                      X_train=X_train,
                                      y_train=y_train,
                                      X_test=X_test) for n in n_samples}

for n, (classifier, pred) in results.items():
    acc = accuracy_score(y_test, pred)
    print(f'n_estimators: {n:>3d}\tacc: {acc}')

n_estimators:   1	acc: 0.6
n_estimators:   3	acc: 0.9555555555555556
n_estimators:   5	acc: 0.9555555555555556
n_estimators:  10	acc: 0.9555555555555556
n_estimators:  20	acc: 0.9555555555555556
n_estimators:  40	acc: 0.9555555555555556
n_estimators:  50	acc: 0.9555555555555556
n_estimators: 100	acc: 0.9555555555555556
n_estimators: 200	acc: 0.9333333333333333
n_estimators: 500	acc: 0.8888888888888888
