# Gradient Boosing Tree (GBT)

In [1]:
try:
    import xgboost as xgb
except ImportError:
    !pip install xgboost
    import xgboost as xgb

from ucimlrepo import fetch_ucirepo
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, roc_auc_score, f1_score

## 1. GBT Regressor

In [2]:
# Load regression dafatset https://archive.ics.uci.edu/dataset/186/wine+quality
dataset = fetch_ucirepo(id=186)
X = dataset.data.features
y = dataset.data.targets
y = y.values.ravel() # flatten to 1D array

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(5197, 11) (5197,)
(1300, 11) (1300,)


In [3]:
class MyGBTRegressor:
    def __init__(self,
                 n_estimators=100,
                 learning_rate=0.1,
                 max_depth=3,
                 random_state=None,):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.random_state = random_state
        self.initial_pred = None
        self.trees = []

    def initialize_prediction(self, y):
        return np.full(y.shape[0], np.mean(y))

    def compute_negative_gradient(self, y_true, y_pred):
        """Negative gradient of MSE loss."""
        return y_true - y_pred

    def fit(self, X, y):
        self.initial_pred = self.initialize_prediction(y)
        y_pred = self.initial_pred.copy()
        rng = np.random.RandomState(self.random_state)
        
        for _ in range(self.n_estimators):
            residuals = self.compute_negative_gradient(y, y_pred)
            tree = DecisionTreeRegressor(
                max_depth=self.max_depth,
                random_state=rng.randint(0, 10_000)
            )
            tree.fit(X, residuals)

            update = tree.predict(X)
            y_pred += self.learning_rate * update
            self.trees.append(tree)

        return self

    def predict(self, X):
        y_pred = np.full(X.shape[0], np.mean(self.initial_pred))
        for tree in self.trees:
            update = tree.predict(X)
            y_pred += self.learning_rate * update
        return y_pred

In [4]:
my_regressor = MyGBTRegressor(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42)
my_regressor.fit(X_train, y_train)
y_pred = my_regressor.predict(X_test)

my_mse = mean_squared_error(y_test, y_pred)
print(f"MyGBTRegressor MSE: {my_mse:.4f}")

MyGBTRegressor MSE: 0.4798


In [5]:
# Compare with sklearn's GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor

sk_regressor = GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42)
sk_regressor.fit(X_train, y_train)
sk_y_pred = sk_regressor.predict(X_test)
sk_regressor_mse = mean_squared_error(y_test, sk_y_pred)
print(f"Sklearn GradientBoostingRegressor MSE: {sk_regressor_mse:.4f}")

Sklearn GradientBoostingRegressor MSE: 0.4795


## 2. GBT Classifier

In [None]:
class MyGBTClassifier:
    def __init__(self,
                 n_estimators=100,
                 learning_rate=0.1,
                 max_depth=3,
                 random_state=None):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.random_state = random_state

        self.initial_log_odds = None 
        self.trees = []

    @staticmethod
    def _sigmoid(z):
        return 1.0 / (1.0 + np.exp(-z))
    
    def initialize_prediction(self, y):
        return np.log(np.sum(y) / (y.shape[0] - np.sum(y))) # log-odds of positive class

    def compute_negative_gradient(self, y_true, proba_pred):
        return y_true - proba_pred

    def fit(self, X, y):
        y = np.asarray(y).astype(float)
        self.initial_log_odds = self.initialize_prediction(y)
        f_pred = np.full(y.shape[0], self.initial_log_odds)
        rng = np.random.RandomState(self.random_state)

        for m in range(self.n_estimators):
            p_pred = self._sigmoid(f_pred)
            residuals = self.compute_negative_gradient(y, p_pred)

            tree = DecisionTreeRegressor(
                max_depth=self.max_depth,
                random_state=rng.randint(0, 10_000)
            )
            tree.fit(X, residuals)

            # Update log-odds: f_{m+1} = f_m + eta * h_m(x)
            update = tree.predict(X)
            f_pred += self.learning_rate * update

            self.trees.append(tree)

        return self

    def _raw_score(self, X):
        f_pred = np.full(X.shape[0], self.initial_log_odds)
        for tree in self.trees:
            f_pred += self.learning_rate * tree.predict(X)
        return f_pred

    def predict_proba(self, X):
        f_pred = self._raw_score(X)
        p1 = self._sigmoid(f_pred)
        p0 = 1.0 - p1
        return np.vstack([p0, p1]).T

    def predict(self, X):
        proba = self.predict_proba(X)
        return (proba[:, 1] >= 0.5).astype(int)

In [26]:
# Load classification dataset https://archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic
dataset = fetch_ucirepo(id=17)
X = dataset.data.features
y = dataset.data.targets
y = y.values.ravel() # flatten to 1D array
y = (y == 'M').astype(int)  # Convert labels to 0 and 1

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
print(f"Unique classes in y: {np.unique(y)}")

(455, 30) (455,)
(114, 30) (114,)
Unique classes in y: [0 1]


In [27]:
my_clf = MyGBTClassifier(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42)
my_clf.fit(X_train, y_train)
y_pred = my_clf.predict(X_test)
y_pred_proba = my_clf.predict_proba(X_test)[:, 1]

accuracy = accuracy_score(y_test, y_pred)
print(f"MyGBTClassifier Accuracy: {accuracy:.4f}")

f1 = f1_score(y_test, y_pred)
print(f"MyGBTClassifier F1 Score: {f1:.4f}")

roc_auc = roc_auc_score(y_test, y_pred_proba)
print(f"MyGBTClassifier ROC AUC: {roc_auc:.4f}")


MyGBTClassifier Accuracy: 0.9561
MyGBTClassifier F1 Score: 0.9412
MyGBTClassifier ROC AUC: 0.9671


In [9]:
# Compare with sklearn's GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier

sk_clf = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42)
sk_clf.fit(X_train, y_train)
sk_y_pred = sk_clf.predict(X_test)
sk_y_pred_proba = sk_clf.predict_proba(X_test)[:, 1]

sk_clf_accuracy = accuracy_score(y_test, sk_y_pred)
print(f"Sklearn GradientBoostingClassifier Accuracy: {sk_clf_accuracy:.4f}")

sk_clf_f1 = f1_score(y_test, sk_y_pred)
print(f"Sklearn GradientBoostingClassifier F1 Score: {sk_clf_f1:.4f}")

sk_clf_roc_auc = roc_auc_score(y_test, sk_y_pred_proba)
print(f"Sklearn GradientBoostingClassifier ROC AUC: {sk_clf_roc_auc:.4f}")

Sklearn GradientBoostingClassifier Accuracy: 0.9561
Sklearn GradientBoostingClassifier F1 Score: 0.9412
Sklearn GradientBoostingClassifier ROC AUC: 0.9957


## 3. XGBoost

In [10]:
# Load regression dafatset https://archive.ics.uci.edu/dataset/186/wine+quality
dataset = fetch_ucirepo(id=186)
X = dataset.data.features
y = dataset.data.targets
y = y.values.ravel() # flatten to 1D array

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(5197, 11) (5197,)
(1300, 11) (1300,)


In [11]:
xgb_regressor = xgb.XGBRegressor(
    n_estimators=50,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
xgb_regressor.fit(X_train, y_train)
y_pred = xgb_regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"XGBoost Regressor MSE: {mse:.4f}")

XGBoost Regressor MSE: 0.4790


In [12]:
# Load classification dataset https://archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic
dataset = fetch_ucirepo(id=17)
X = dataset.data.features
y = dataset.data.targets
y = y.values.ravel() # flatten to 1D array
y = (y == 'M').astype(int)  # Convert labels to 0 and 1

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
print(f"Unique classes in y: {np.unique(y)}")

(455, 30) (455,)
(114, 30) (114,)
Unique classes in y: [0 1]


In [13]:
xgb_classifier = xgb.XGBClassifier(
    n_estimators=50,
    learning_rate=0.1,
    max_depth=3,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)
xgb_classifier.fit(X_train, y_train)
y_pred = xgb_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"XGBoost Classifier Accuracy: {accuracy:.4f}")
print(f"XGBoost Classifier ROC AUC: {roc_auc:.4f}")
print(f"XGBoost Classifier F1 Score: {f1:.4f}")

XGBoost Classifier Accuracy: 0.9561
XGBoost Classifier ROC AUC: 0.9510
XGBoost Classifier F1 Score: 0.9412


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
