In [3]:
import numpy as np

class DecisionTreeRegressor:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def _build_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        if depth == self.max_depth or n_samples <= 1:
            return np.mean(y)
        else:
            # Find the best split
            best_feature = None
            best_split = None
            best_loss = float('inf')
            for feature in range(n_features):
                unique_values = np.unique(X[:, feature])
                for value in unique_values:
                    left_mask = X[:, feature] <= value
                    right_mask = X[:, feature] > value
                    left_y = y[left_mask]
                    right_y = y[right_mask]
                    if len(left_y) > 0 and len(right_y) > 0:
                        loss = np.sum((left_y - np.mean(left_y)) ** 2) + np.sum((right_y - np.mean(right_y)) ** 2)
                        if loss < best_loss:
                            best_loss = loss
                            best_feature = feature
                            best_split = value

            if best_feature is not None:
                left_mask = X[:, best_feature] <= best_split
                right_mask = X[:, best_feature] > best_split
                left_tree = self._build_tree(X[left_mask], y[left_mask], depth + 1)
                right_tree = self._build_tree(X[right_mask], y[right_mask], depth + 1)
                return best_feature, best_split, left_tree, right_tree
            else:
                return np.mean(y)

    def predict(self, X):
        return np.array([self._predict_tree(x, self.tree) for x in X])

    def _predict_tree(self, x, tree):
        if isinstance(tree, tuple):
            feature, split, left_tree, right_tree = tree
            if x[feature] <= split:
                return self._predict_tree(x, left_tree)
            else:
                return self._predict_tree(x, right_tree)
        else:
            return tree

class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        # Initialize residuals to the target values
        residuals = y.astype(float)
        
        for _ in range(self.n_estimators):
            # Fit a decision tree to the residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            
            # Update residuals using the predictions of the tree
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            
            # Add the tree to the ensemble
            self.trees.append(tree)

    def predict(self, X):
        # Make predictions by summing predictions of all trees
        predictions = np.zeros(len(X))
        for tree in self.trees:
            predictions += self.learning_rate * tree.predict(X)
        return predictions

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def r_squared(y_true, y_pred):
    numerator = np.sum((y_true - y_pred) ** 2)
    denominator = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - (numerator / denominator)

# Example dataset
X_train = np.array([[1], [2], [3], [4], [5]])
y_train = np.array([2, 3, 4, 5, 6])

# Initialize and train gradient boosting regressor
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
gb_regressor.fit(X_train, y_train)

# Make predictions
y_pred = gb_regressor.predict(X_train)

# Evaluate performance
mse = mean_squared_error(y_train, y_pred)
r2 = r_squared(y_train, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)


Mean Squared Error: 1.2699142395739956e-08
R-squared: 0.9999999936504288
