In [1]:
import numpy as np
import pandas as pd

In [2]:
class RegressionTree:
    def __init__(self,depth):
        self.tree=None
        self.max_depth=depth

    def mae(self,arr):
        mean=arr.mean()
        
        return np.mean(abs(arr-mean))

    def mse(self,arr):
        mean=arr.mean()
        
        return np.mean((arr-mean)**2)
        

    def _best_split(self, X ,y):
        best_mse=float('inf')
        best_index=None
        best_thresh=None
        for feature_index in range(X.shape[1]):
            thresholds=np.unique(X[:,feature_index])
            for threshold in thresholds:
                left_mask=X[:,feature_index]<=threshold
                right_mask=X[:,feature_index]>threshold

                if len(y[left_mask])==0 or len(y[right_mask])==0:
                    continue
                left_mse=self.mse(y[left_mask])
                right_mse=self.mse(y[right_mask])

                weighted_mse=(len(y[left_mask])*left_mse+ len(y[right_mask])* right_mse)/len(y)
                
                if weighted_mse<best_mse:
                    best_mse=weighted_mse
                    best_index=feature_index
                    best_thresh=threshold

        return best_index,best_thresh

    def build_Tree(self,X,y,depth=0):
        if len(np.unique(y)) == 1 or depth == self.max_depth:
            return {"leaf":True,'value':np.mean(y)}
            
        feature_index,threshold=self._best_split(X,y)

        if feature_index==None:
            return {"leaf":True,'value':np.mean(y)}

        left_mask=X[:,feature_index]<=threshold
        right_mask=X[:,feature_index]>threshold

        left_subtree=self.build_Tree(X[left_mask],y[left_mask],depth+1)
        right_subtree=self.build_Tree(X[right_mask],y[right_mask],depth+1)

        return {
                'leaf':False,
                'feature_index':feature_index,
                'threshold':threshold,
                'left_subtree':left_subtree,
                'right_subtree':right_subtree
               }
        
    def fit(self,X,y):
        self.Tree=self.build_Tree(X,y)

    def predict_one(self,x):
        tree=self.Tree
        while not tree['leaf']:
            if x[tree['feature_index']] <= tree['threshold']:
                tree = tree['left_subtree']
            else:
                tree = tree['right_subtree']
        return tree['value']
        
    def predict(self,X):
        return [self.predict_one(i) for i in X]

In [9]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

X,y=make_regression(n_features=3,n_targets=1,random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

tree = RegressionTree(3)
tree.fit(X_train, y_train)
y_pred = tree.predict(X_test)

from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error,r2_score

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MSE:", mse)
print("MAE:", mae)
print("r2score:", r2)



MSE: 919.3340765765324
MAE: 23.317970889685093
r2score: 0.9188569048590992
