In [1]:
import numpy as np
import pandas as pd

from copy import deepcopy
from sklearn.tree import DecisionTreeRegressor

In [2]:
class GradientBoostRegressor:
    def __init__(self, n_learners=20, max_depth=3, loss='mse'):
        self.base_learner = DecisionTreeRegressor(max_depth=max_depth, max_leaf_nodes=8)
        self.n_learners = n_learners
        self.regrs = [deepcopy(self.base_learner) for _ in range(self.n_learners)]
        self.gammas = [[] for _ in range(self.n_learners)]
        self.leaves_output = [[] for _ in range(self.n_learners)]
        self.compute_loss = self.mse_loss if loss == 'mse' else loss

    def init_odd_pred(self, y):
        self.odd_pred = lambda X: np.ones(X.shape[0]) * y.mean()
        
    def mse_loss(self, y_true, y_pred):
        loss = ((y_true - y_pred) ** 2) / 2
        grad = y_true - y_pred
        return loss, grad
    
    def get_indices(self, regr, X):
        leaves = regr.apply(X)
        n_leaves = np.max(leaves)
        leaves_index = [np.where(leaves == leaf_num)[0] for leaf_num in range(1, 1 + n_leaves)]
        return leaves_index
    
    def find_optimal_coefs(self, X, y, y_pred, regr, t, leaves_index):
        for leaf_index in leaves_index:
            if leaf_index.size == 0:
                self.gammas[t].append(0)
                continue
                
            leaf_losses = []
            for i in np.arange(0.0, 10, 0.01): 
                leaf_losses.append(
                    self.compute_loss(
                        y[leaf_index], 
                        y_pred[leaf_index] + i * regr.predict(X[leaf_index])
                    )[0].mean()
                )
            leaf_gamma = np.arange(0.0, 10, 0.01)[np.argmin(leaf_losses)]
            self.gammas[t].append(leaf_gamma)

    def fit(self, X, y):
        self.init_odd_pred(y)
        y_pred = self.odd_pred(X)
        for t in range(self.n_learners):
            residuals = self.compute_loss(y, y_pred)[1]
            self.regrs[t].fit(X, residuals)
            leaves_index = self.get_indices(self.regrs[t], X)
            self.leaves_output[t] = [
                residuals[leaf_index].mean() if leaf_index.size != 0 else 0 for leaf_index in leaves_index
            ]
            self.find_optimal_coefs(X, y, y_pred, self.regrs[t], t, leaves_index)
            leaves = self.regrs[t].apply(X) - 1
            pred = [
                self.leaves_output[t][leaf_num] * self.gammas[t][leaf_num] for leaf_num in leaves
            ]
            y_pred += pred
  
    def predict(self, X):
        y_pred = self.odd_pred(X)
        for t in range(self.n_learners):
            leaves = self.regrs[t].apply(X) - 1
            pred = [
                self.leaves_output[t][leaf_num] * self.gammas[t][leaf_num] for leaf_num in leaves
            ]
            y_pred += pred
        return y_pred

# Сравнение с другими реализациями

In [7]:
from sklearn.datasets import load_boston, load_diabetes
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

In [8]:
boston = load_boston()
diabets = load_diabetes()

In [9]:
def get_split(dataset):
    X = boston.data
    y = boston.target 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    return X_train, X_test, y_train, y_test

In [10]:
def mae_loss(y_true, y_pred):
    loss = np.abs(y_true - y_pred)
    grad = np.where(y_true > y_pred, 1, -1)
    return loss, grad

def mse_loss(y_true, y_pred):
    loss = ((y_true - y_pred) ** 2) / 2
    grad = y_true - y_pred
    return loss, grad

In [11]:
data = []
regrs = [GradientBoostRegressor(100, 3), GradientBoostingRegressor(), 
         XGBRegressor(), LGBMRegressor()]
for dataset in (boston, diabets):
    X_train, X_test, y_train, y_test = get_split(dataset)
    data.append([])
    for regr in regrs:
        regr.fit(X_train, y_train)
        y_pred = regr.predict(X_test)
        data[-1].append(mse_loss(y_test, y_pred)[0].mean())

**MSE**:

In [12]:
pd.DataFrame(data, columns=['own', 'sklearn', 'xgb', 'lightgbm'], 
             index=['boston', 'diabets'])

Unnamed: 0,own,sklearn,xgb,lightgbm
boston,6.412597,4.013452,4.619457,5.673084
diabets,6.433693,4.209195,4.619457,5.673084


In [15]:
data = []
regrs = [GradientBoostRegressor(100, 3, mae_loss), GradientBoostingRegressor(), 
         XGBRegressor(), LGBMRegressor()]
for dataset in (boston, diabets):
    X_train, X_test, y_train, y_test = get_split(dataset)
    data.append([])
    for regr in regrs:
        regr.fit(X_train, y_train)
        y_pred = regr.predict(X_test)
        data[-1].append(mae_loss(y_test, y_pred)[0].mean())

**MAE**:

In [16]:
pd.DataFrame(data, columns=['own', 'sklearn', 'xgb', 'lightgbm'], 
             index=['boston', 'diabets'])

Unnamed: 0,own,sklearn,xgb,lightgbm
boston,2.9726,2.035753,2.091346,2.246041
diabets,3.20917,2.055225,2.091346,2.246041


Значение функций потерь у реализованного градиентного бустинга похуже, чем у готовых реализаций, но итоговое значение ошибки у всех методов небольшое и несильно отличается друг от друга.