In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics
from sklearn.datasets import make_regression

In [2]:
x, y = make_regression(n_samples=10, n_features=2, n_informative=2, noise=0.35)

display(x, y)

array([[-0.59467865,  1.17659713],
       [-1.57659187, -0.82005132],
       [ 0.86870669, -0.51945701],
       [-0.79228551,  1.80592856],
       [-2.02687943, -2.25812447],
       [ 0.0862256 ,  0.68765017],
       [-0.0748271 , -0.03159647],
       [-0.66395571, -0.1534205 ],
       [-1.43970949, -1.04534703],
       [ 0.50257497, -0.79198512]])

array([   7.1032028 , -112.84950864,   29.82896272,   16.90170315,
       -184.59266395,   27.12121091,   -5.14832141,  -40.77685989,
       -112.96968328,    1.53681215])

In [21]:
class Gradient_boosting_regressor:
    
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        self.inital_value = None
        
    def fit(self, x, y):
        self.inital_value = np.mean(y)
        residuals = y - self.inital_value
        
        for i in range(self.n_estimators):
            
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(x, residuals)
            self.models.append(tree)
            
            predictions = tree.predict(x)
            residuals -= self.learning_rate * predictions
            
    def predict(self, x):
        
        y_pred = np.full(x.shape[0], self.inital_value)
        
        for tree in self.models:
            y_pred += self.learning_rate * tree.predict(x)
            
        return y_pred
    
    
if __name__ == '__main__':
    gb = Gradient_boosting_regressor(n_estimators=35, learning_rate=0.1, max_depth=3)
    gb.fit(x, y)
    
    y_pred = gb.predict(x)
    
    mae = metrics.mean_absolute_error(y_pred, y)
    print(f'mae: {mae}')
        
    

mae: 1.8170539599996887


In [22]:
y_pred

array([   6.53264584, -110.96050249,   27.19643356,   14.27761745,
       -180.90781505,   25.32050345,   -5.54430878,  -39.15745945,
       -111.07766897,    0.475409  ])

In [23]:
y

array([   7.1032028 , -112.84950864,   29.82896272,   16.90170315,
       -184.59266395,   27.12121091,   -5.14832141,  -40.77685989,
       -112.96968328,    1.53681215])

-----

In [1]:
import numpy as np
from scipy.special import expit
from sklearn.datasets import make_regression, make_classification
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics

x, y = make_classification(n_samples=20, n_features=2, n_informative=2, n_classes=2, n_repeated=0, n_redundant=0, n_clusters_per_class=2, weights=[0.4, 0.6])


class GradientBoostingRegressorTRAIN:

    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimator = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.initial_value = None
        self.models = list()

    def fit(self, x, y):
        self.initial_value = np.mean(y)
        residual = y - self.initial_value

        for _ in range(self.n_estimator):

            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(x, residual)
            self.models.append(tree)

            y_pred = tree.predict(x)
            residual -= self.initial_value * y_pred

    def predict(self, x):

        y_pred = np.full(x.shape[0], self.initial_value)

        for tree in self.models:
            y_pred += self.learning_rate * tree.predict(x)
        return y_pred


class GradientBoostingClassifierTRAIN(GradientBoostingRegressorTRAIN):

    def __init__(self,  n_estimators=100, learning_rate=0.1, max_depth=3):
        super().__init__()

    def fit(self, x, y):
        prob = np.mean(y)

        self.initial_value = np.log(prob / (1 - prob))
        y_pred = np.full(y.shape, self.initial_value)

        for _ in range(self.n_estimator):

            residual = y - expit(y_pred)

            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(x, residual)
            self.models.append(tree)

            y_pred += self.learning_rate * tree.predict(x)

    def predict_proba(self, x):

        y_pred = np.full(x.shape[0], self.initial_value)

        for tree in self.models:
            y_pred += self.learning_rate * tree.predict(x)

        return expit(y_pred)

    def predict(self, x):
        return (self.predict_proba(x) >= 0.5).astype(int)


GBC = GradientBoostingClassifierTRAIN(n_estimators=40, learning_rate=0.1, max_depth=3)
GBC.fit(x, y)

y_predict = GBC.predict(x)
pred_proba = GBC.predict_proba(x)


print(pred_proba)

[0.17871288 0.09866273 0.90659262 0.90659262 0.09866273 0.1789106
 0.90659262 0.90659262 0.90659262 0.90659262 0.09866273 0.17871288
 0.09866273 0.90659262 0.1789106  0.90659262 0.90659262 0.70675609
 0.85098026 0.90659262]
