In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.base import RegressorMixin, BaseEstimator
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
def generate_data(n_samples=10000, n_features=1):
    X, y = make_regression(
        n_samples=n_samples,
        n_features=n_features,
        noise=15,
        random_state=42,
    )
    return X, y

In [None]:
def plot_dataset(X, y):
    plt.figure(figsize=(10,6))
    plt.scatter(X, y)
    plt.show()

In [None]:
X, y = generate_data()
plot_dataset(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.25, random_state = 42
)

In [None]:
plot_dataset(X_train, y_train)

In [None]:
plot_dataset(X_test, y_test)

In [None]:
def plot_prediction(X, y, model):
    plt.figure(figsize=(10,6))
    plt.scatter(X, y)
    grid = np.arange(np.min(X), np.max(X), 0.1).reshape(-1, 1)
    plt.plot(grid, model.predict(grid), 'red')
    plt.show()

In [None]:
def print_metrics(true, predict):
    r2 = r2_score(y_test, predict)
    rmse = mean_squared_error(y_test, predict)**0.5
    #rmse1 = mean_squared_error(true, predict, squared=False)
    mae = mean_absolute_error(y_test, predict)
    #print(rmse, rmse1)
    print(f'Results:\nr2:   {r2:.3f}\nrmse: {rmse:.3f}\nmae:  {mae:.3f}') 

In [None]:
class MyRFRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, num_trees=100, min_samples_split=2, max_depth=5):
        self.num_trees = num_trees
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.decision_trees = []
                
    def fit(self, X, y):

        for _ in range(self.num_trees):
            clf = DecisionTreeRegressor(
                min_samples_split=self.min_samples_split,
                max_depth=self.max_depth
            )
            X_b, y_b = self.boostrap(X, y)
            clf.fit(X_b, y_b)
            self.decision_trees.append(clf)
    
    @staticmethod
    def boostrap(X, y):
        n_rows, n_cols = X.shape
        samples = np.random.choice(a=n_rows, size=n_rows, replace=True)
        return X[samples], y[samples]
    
    def predict(self, X):
        y_predict = []
        for tree in self.decision_trees:
            y_predict.append(tree.predict(X))
        y_predict = np.swapaxes(a=y_predict, axis1=0, axis2=1)
        predictions = []
        for preds in y_predict:
            predictions.append(np.mean(preds))
        return np.array(predictions)

In [None]:
model = MyRFRegressor()
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
plot_prediction(X, y, model)

In [None]:
model = RandomForestRegressor()
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
plot_prediction(X_test, y_test, model)