In [174]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.datasets import make_regression, make_friedman1
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

x, y = make_friedman1(n_features=500, noise=7, n_samples=1000)

In [175]:
reg = LinearRegression()
reg.fit(x, y)
sklearn_mse = mean_squared_error(y, reg.predict(x))

In [176]:
class MSELoss(object):
    """Mean Squared Error loss function"""
    
    def get_neg_gradient(self, y_true, y):
        return y_true - y
    
    def __call__(self, y_true, y):
        return np.mean((y_true - y) ** 2)
    
    
loss = MSELoss()
my_mse = loss(y, reg.predict(x))

my_mse == sklearn_mse

True

In [215]:
from scipy.optimize import minimize

class GradientBoostingRegressor(object):
    def __init__(self, weak_learner, learning_rate, num_rounds, adaptive_learning_rate=False):
        self.learning_rate = learning_rate
        self.num_rounds = num_rounds
        self.loss = MSELoss()
        self.estimators = []
        self.weak_learner = weak_learner
        self.adaptive_learning_rate = adaptive_learning_rate
        self.gammas = [1]
    
    def _fit_base_model(self, x, y):
        self._base_model = self.weak_learner()
        self._base_model.fit(x, y)
        self.estimators.append(self._base_model)
        
    def _gamma_obj(self, gamma, y, old_preds, preds):
        """Objective function used to minimize learning rate when using adaptive mode"""
        return self.loss(y, old_preds + gamma * preds)
    
    def _boosting_round(self, x, y, preds):
        residual = self.loss.get_neg_gradient(y, preds)
        
        boosting_model = self.weak_learner()
        boosting_model.fit(x, residual)
        old_preds = preds
        preds = boosting_model.predict(x)
        
        if self.adaptive_learning_rate:
            gamma = minimize(self._gamma_obj, self.learning_rate, args=(y, old_preds, preds), tol=1e-28, method = 'Nelder-Mead').x[0]
        else:
            gamma = self.learning_rate
        
        self.estimators.append(boosting_model)
        self.gammas.append(gamma)
        
        return preds * gamma
        
    def fit(self, x, y):
        self._fit_base_model(x, y)
        preds = self._base_model.predict(x)
        
        for i in range(0, self.num_rounds):
            preds += self._boosting_round(x, y, preds)
            
    def predict(self, x):
        result = self.estimators[0].predict(x)
        for i, estimator in enumerate(self.estimators[1:]):
            result += estimator.predict(x) * self.gammas[i + 1]
            
        return result
            
gbm = GradientBoostingRegressor(Ridge, 0.01, 100, adaptive_learning_rate=True)
gbm.fit(x, y)
p = gbm.predict(x)
my_mse, loss(y, p)

(29.258625841565809, 29.258625841565941)

In [216]:
my_mse - loss(y, p)

-1.3145040611561853e-13

In [217]:
gbm.gammas

[1,
 1.0726567382812515,
 1.0656640625000013,
 1.0721875000000014,
 1.0655000000000014,
 0.0099999997615814219,
 0.011000000000000001,
 0.010500000000000001,
 0.0099921874999999993,
 0.01003125,
 0.010015625,
 0.010562499999999999,
 0.01,
 0.01,
 0.0094999999999999998,
 0.01000390625,
 0.01,
 0.010500000000000001,
 0.0099687500000000002,
 0.01,
 0.0094999999999999998,
 0.010500000000000001,
 0.0099687500000000002,
 0.01,
 0.010500000000000001,
 0.010499023437500001,
 0.01,
 0.010501953125000001,
 0.01,
 0.01,
 0.01,
 0.0098750000000000001,
 0.01,
 0.00975,
 0.0094999999999999998,
 0.01,
 0.01,
 0.0099960937500000006,
 0.010500000000000001,
 0.01,
 0.0099375000000000002,
 0.010625000000000001,
 0.0094999999999999998,
 0.010500000000000001,
 0.01,
 0.010500000000000001,
 0.010500000000000001,
 0.0099687500000000002,
 0.010500000000000001,
 0.0099687499999999984,
 0.010250001907348632,
 0.0097187499999999982,
 0.010062500000000002,
 0.010500244140625,
 0.01,
 0.01,
 0.01,
 0.01025,
 0.010