In [91]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

In [92]:
class GradientBoostingRegressor:
    def __init__(self, lr=0.1, n_estimators=100, base_learner=DecisionTreeRegressor):
        self.lr = lr
        self.n_estimators = n_estimators
        self.base_learner = base_learner
        
    def fit(self, X, y, **params):
        #empty list to store weak models/learners
        self.base_models = [] 
        
        #base model f0
        Fm = np.full(shape=y.shape, fill_value=0.0)
        
        for i in range (0, self.n_estimators):
            #Calculate the (pseudo) residuals/negative which represents the error that the next model will try to correct
            residual = y - Fm
            
            #initialize a new base learner(next model) with the specified parameters as before
            base_model = self.base_learner(**params)
            
            #train the now initialized base model on train data X and the residuals of previous models
            base_model.fit(X,residual)
            
            #add the model to the list of models in base_models
            self.base_models.append(base_model)
            
            #update the predictions
            Fm = Fm + self.lr * base_model.predict(X)
        
        #return the final prediction
        return Fm
    
    
    def predict(self, X):
        #Initialize with zeros to update and store the predictions 
        y_pred = np.zeros(X.shape[0]) 

        #iterate over all the trained models
        for base_model in self.base_models:
            update = self.lr * base_model.predict(X)
            
            #Aggregate the prediction from all the base learners
            y_pred = y_pred + update  

        return y_pred

In [93]:
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import explained_variance_score

data = datasets.load_breast_cancer()
X, y = data.data, data.target

#pd.DataFrame(X)
#pd.DataFrame(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = GradientBoostingRegressor()
model.fit(X_train, y_train, max_depth = 20)
y_pred = model.predict(X_test)

In [96]:
explained_variance = explained_variance_score(y_test, y_pred)
print("Explained Variance Score:", explained_variance)

Explained Variance Score: 0.8922602146930558


## Using Sklearns GradientBoostingRegressor Library

In [54]:
from sklearn.ensemble import GradientBoostingRegressor

In [55]:
model_2 = GradientBoostingRegressor(n_estimators=25, learning_rate=0.1,loss="squared_error", max_depth=20, random_state = 0)

model_2.fit(X_train, y_train)

y_pred = model_2.predict(X_test)

explained_variance = explained_variance_score(y_test, y_pred)
print("Explained Variance Score:", explained_variance)

Explained Variance Score: 0.8793977919508051


#### The Explained Variance Score of our model and the Explained Variance Score of the model of Sklearns GradientBoostingRegressor is almost similar