In [10]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_regression
from matplotlib.pyplot import plot
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor

In [46]:
class GBoost:

    def __init__(self,lr=0.2,max_depth=8,n_models=50):
        self.lr_ = lr
        self.n_models_ = n_models
        self.max_depth_ = max_depth
        self.clfs_ = []
        self.f0_ = None

    def fit(self,x_train,y_train):
        self.f0_ = np.mean(y_train)
        pres = y_train - self.f0_
        for i in range(self.n_models_):
            dt = DecisionTreeRegressor(max_depth = self.max_depth_)
            model = dt.fit(x_train,pres)
            self.clfs_.append(model)
            pres = pres - (self.lr_ * dt.predict(x_train)) 

    def predict(self,x_test):
        rows = x_test.shape[0]
        pred = np.zeros((rows,)) + self.f0_
        for clf in self.clfs_:
            pred = pred + (self.lr_ * clf.predict(x_test))
        return pred

In [23]:
x,y = make_regression(n_samples=10000,n_features=4,random_state=42)

In [16]:
x

array([[ 1.17269758,  0.31571942, -0.98344976, -0.47601775],
       [-0.07914503,  0.56388155, -1.21806185, -0.07831368],
       [ 0.18430906,  0.8205535 , -1.10318438, -0.42002773],
       ...,
       [ 0.604436  ,  0.24691277,  2.4730176 , -0.64394317],
       [-0.88410914,  1.31220799,  1.23715098, -0.15378517],
       [ 0.23704122,  0.84808716, -0.14248885,  0.78818001]])

In [17]:
y

array([  9.4194194 , -87.81029001, -52.53516816, ..., 253.59388628,
        85.08506119,  38.15320313])

In [83]:
mygb = GBoost(lr=0.1,max_depth=8,n_models=50)
gb = GradientBoostingRegressor(learning_rate=0.1,max_depth=8,n_estimators=50)

In [84]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 42)

In [85]:
mygb.fit(x_train,y_train)
gb.fit(x_train,y_train)

In [86]:
y_pred_mygb = mygb.predict(x_test)
y_pred_gb = gb.predict(x_test)

In [87]:
r2_score(y_test,y_pred_mygb)

0.9941546348697715

In [88]:
r2_score(y_test,y_pred_gb)

0.9941108493624282