In [1]:
import numpy as np
import math

In [3]:
class SquareLoss:
    def __init__(self):
        pass
    
    def loss(self, y, y_pred):
        return 0.5 * np.power((y - y_pred), 2)
    
    def gradient(self, y, y_pred):
        return -(y - y_pred)

class CrossEntropy:
    def __init__(self): pass
    
    def loss(self, y, p):
        p = np.clip(p, 1e-15, 1 - 1e-15)
        return -y * np.log(p) - (1 - y) * np.log(1 - p)
    
    def acc(self, y, p):
        return accuracy_score(np.argmax(y, axis=1), np.argmax(p, axis=1))
    
    def gradient(self, y, p):
        p = np.clip(p, 1e-15, 1 - 1e-15)
        return -(y / p) + (1 - y) / (1 - p)

In [5]:
class GradientBoosting:
    def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        
        self.loss = SquareLoss()
        
        if not self.regression:
            self.loss = CrossEntropy()
            
        self.tress = []
        for _ in range(n_estimators):
            tree = RegressionTree(min_samples_split=self.min_samples_split, 
                                  min_impurity=self.min_impurity, 
                                  max_depth=self.max_depth)
            self.trees.append(tree)
            
    def fit(self, X, y):
        y_pred = np.full(np.shape(y), np.mean(y, axis=0))
        for i in range(self.n_estimators):
            gradient = self.loss.gradient(y, y_pred)
            self.trees[i].fit(X, gradient)
            update = self.trees[i].predict(X)
            y_pred -= np.multiply(self.learning_rate, update)
            
    def predict(self, X):
        y_pred = np.array([])
        for tree in self.trees:
            update = tree.predict(X)
            update = np.multiply(self.learning_rate, update)
            y_pred = -update if not y_pred.any() else y_pred - update
        
        if not self.regression:
            y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1)
            y_pred = np.argmax(y_pred, axis=1)
        return y_pred

In [6]:
class GradientBoostingRegressor(GradientBoosting):
    def __init__(self, n_estimators=200, learning_rate=0.1, min_samples_split=2,
                min_var_reduction=1e-7, max_depth=2):
        super(GradientBoosting, self).__init__(n_estimators=n_estimators,
                                              learning_rate=learning_rate,
                                              min_samples_split=min_samples_split,
                                              min_impurity=min_var_reduction,
                                              max_depth=max_depth,
                                              regression=True)


In [7]:
class GradientBoostingClassifier(GradientBoosting):
    def __init__(self, n_estimators=200, learning_rate=0.1, min_samples_split=2,
                min_impurity=1e-7, max_depth=2):
        super(GradientBoosting, self).__init__(n_estimators=n_estimators,
                                              learning_rate=learning_rate,
                                              min_samples_split=min_samples_split,
                                              min_impurity=min_impurity,
                                              max_depth=max_depth,
                                              regression=True)
    
    def fit(self, X, y):
        y = to_categorical(y)
        super(GradientBoostingClassifier, self).fit(X, y)
