In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.base import RegressorMixin, BaseEstimator
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
def generate_data(n_samples=10000, n_features=1):
    X, y = make_regression(
        n_samples=n_samples,
        n_features=n_features,
        noise=15,
        random_state=42,
    )
    return X, y

In [None]:
def plot_dataset(X, y):
    plt.figure(figsize=(10,6))
    plt.scatter(X, y)
    plt.show()

In [None]:
X, y = generate_data()
plot_dataset(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.25, random_state = 42
)

In [None]:
plot_dataset(X_train, y_train)

In [None]:
plot_dataset(X_test, y_test)

In [None]:
def plot_prediction(X, y, model):
    plt.figure(figsize=(10,6))
    plt.scatter(X, y)
    grid = np.arange(np.min(X), np.max(X), 0.1).reshape(-1, 1)
    plt.plot(grid, model.predict(grid), 'red')
    plt.show()

In [None]:
def print_metrics(true, predict):
    r2 = r2_score(y_test, predict)
    rmse = mean_squared_error(y_test, predict)**0.5
    #rmse1 = mean_squared_error(true, predict, squared=False)
    mae = mean_absolute_error(y_test, predict)
    #print(rmse, rmse1)
    print(f'Results:\nr2:   {r2:.3f}\nrmse: {rmse:.3f}\nmae:  {mae:.3f}') 

In [None]:
class MyGBRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, n_estimators=100, max_depth=3, min_samples_leaf = 2,
                 learning_rate = 0.1 ):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.learning_rate = learning_rate
        self.trees = []

    def fit(self,X,y):
        
        predict = [np.mean(y)] * X.shape[0]
        
        for _ in range(self.n_estimators):
            model = DecisionTreeRegressor(
                max_depth=self.max_depth,
                min_samples_leaf=self.min_samples_leaf,
            )
            error = y - predict
            model.fit(X, error)
            self.trees.append(model)
            predict = predict + self.learning_rate*model.predict(X)

    def predict(self, X):
        preds = [0]* X.shape[0]
        for model in self.trees:
            preds = preds + self.learning_rate*model.predict(X)
            
        return preds

In [None]:
model = MyGBRegressor()
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
plot_prediction(X_test, y_test, model)

In [None]:
model = GradientBoostingRegressor()
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
plot_prediction(X_test, y_test, model)