In [1]:
from sklearn import datasets, tree, model_selection, metrics
import numpy as np

In [2]:
class GradientBooster:
    def __init__(self, n_trees=20):
        self.f = []
        self.learning_rates = []
        self.n_trees = n_trees
    
    def fit(self, x, y, lr=0.1):
        class F0:
            predict = lambda x: np.mean(y) * np.ones(x.shape[0])
        self.f.append(F0)
        self.learning_rates.append(1)
        
        for _ in range(self.n_trees):
            m = tree.DecisionTreeRegressor(max_depth=5)
            res = y - self.predict(x)
            m.fit(x, res)
            self.f.append(m)
            self.learning_rates.append(lr)
            
    def predict(self, x):
        return sum(f.predict(x) * lr for f, lr in zip(self.f, self.learning_rates))

In [3]:
# Some data
np.random.seed(123)
x = datasets.load_diabetes()['data']
y = datasets.load_diabetes()['target']
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y)

In [4]:
def evaluate(m):
    print('Training score:', metrics.r2_score(y_train, m.predict(x_train)), '\tTesting score:', metrics.r2_score(y_test, m.predict(x_test)))

In [5]:


# Algorithm to beat
p = {'max_depth': [5, 10, 15, 20],
    'min_samples_split': [2, 3, 7],
    'min_samples_leaf': [1, 3, 7]}

m = model_selection.GridSearchCV(tree.DecisionTreeRegressor(), p)
m.fit(x_train, y_train, )

evaluate(m)

Training score: 0.6595521969069875 	Testing score: 0.14972533215961115




In [15]:
class MAE:
    def loss(y_true, y_pred):
        return y_true - y_pred
    
    def prime(y_true, y_pred):
        return np.sign(y_pred - y_true)
    
    
class MSE:
    def loss(y_true, y_pred):
        return (y_true - y_pred)**2
    
    def prime(y_true, y_pred):
        return -(y_true - y_pred)
    
class L1GradientBooster:
    def __init__(self, n_trees=20):
        # It seems that the decision tree splits have a random process?
        np.random.seed(132)
        self.f = []
        self.learning_rates = []
        self.n_trees = n_trees

    def fit(self, x, y, lr=0.4):
        class F0:
            predict = lambda x: np.median(y) * np.ones(x.shape[0])

        self.f.append(F0)
        self.learning_rates.append(1)

        for _ in range(self.n_trees):
            m = tree.DecisionTreeRegressor(max_depth=5)
        
            y_pred = self.predict(x)
            res = y - y_pred
            m.fit(x, -MAE.prime(y, y_pred))
            
            leaf_idx = m.apply(x)
            y_pred_tree = m.predict(x)
            
            for leaf in set(leaf_idx):
                current_leaf_idx = np.where(leaf_idx == leaf)[0]  
                m.tree_.value[leaf, 0, 0] = np.median(res[current_leaf_idx])  
   
            self.f.append(m)
            self.learning_rates.append(lr)

    def predict(self, x):
        return sum(f.predict(x) * lr for f, lr in zip(self.f, self.learning_rates))
    
m = L1GradientBooster(20)
m.fit(x_train, y_train)
evaluate(m)
            

Training score: 0.8588362070152573 	Testing score: 0.4658251462106008


In [105]:

    

class GenericGradientBooster:
    def __init__(self, criterion=MAE, n_trees=20):
        # It seems that the decision tree splits have a random process?
        np.random.seed(132)
        self.f = []
        self.learning_rates = []
        self.criterion = criterion
        self.n_trees = n_trees

    def fit(self, x, y, lr=0.4):
        class F0:
            predict = lambda x: np.mean(y) * np.ones(x.shape[0])

        self.f.append(F0)
        self.learning_rates.append(1)

        for _ in range(self.n_trees):
            m = tree.DecisionTreeRegressor(max_depth=5)
        
            y_pred = self.predict(x)
            res = y - y_pred
            m.fit(x, -self.criterion.prime(y, y_pred))      
            
            m_wrap = WeakLearner(m, self.criterion)
            # train on the residuals as y_m-1 + res = y
            m_wrap.fit(x, res)
   
            self.f.append(m_wrap)
            self.learning_rates.append(lr)

    def predict(self, x):
        return sum(f.predict(x) * lr for f, lr in zip(self.f, self.learning_rates))
        
class WeakLearner:
    def __init__(self, m, loss):
        """
        :param m: (DecisionTree) Trained on the derivate of the loss.
        :param loss: Loss class that implements a `prime` method which returns the gradients.
        """
        self.m = m
        self.gamma = None
        self.leaf_map = None
        self.loss = loss
        
    def fit(self, x, y):
        """
        :param x: (np.array) The features in shape (rows, columns)
        :param y: (np.array) The residuals, not the gradients (pseudo-residuals)! 
                  The overall residuals are used to optimize gradient descent, as Fm-1 + Fm-now = y --> Fm-now = y - Fm-1 --> y = residuals.
        """
        # apply return the leafs that predicted y|x
        leaf_idx = self.m.apply(x)
        
        for leaf in set(leaf_idx):
            current_leaf_idx = np.where(leaf_idx == leaf)[0]  
            for _ in range(100):
                y_pred_tree = self.m.predict(x)
                gradient = self.loss.prime(y[current_leaf_idx], y_pred_tree[current_leaf_idx]).sum()
                self.m.tree_.value[leaf, 0, 0] -= 0.05 * gradient

    def predict(self, x):
        return self.m.predict(x) 

def evaluate(m):
    print('Training score:', metrics.mean_squared_error(y_train, m.predict(x_train)), '\tTesting score:', metrics.mean_squared_error(y_test, m.predict(x_test)))
   
    
# m = GenericGradientBooster(MAE, n_trees=10)
# m.fit(x_train, y_train)
# evaluate(m)

def evaluate(m):
    print('Training score:', metrics.mean_absolute_error(y_train, m.predict(x_train)), '\tTesting score:', metrics.mean_absolute_error(y_test, m.predict(x_test)))
   
    
m = GenericGradientBooster(MAE, n_trees=20)
m.fit(x_train, y_train)
evaluate(m)

Training score: 21.389953590779374 	Testing score: 45.09319231571819
