In [192]:
import numpy as np
import pandas as pd
X = np.array([
    [2, 6],
    [4, 5],
    [1, 8],
    [5, 5],
    [3, 7],
    [6, 4],
    [2, 7],
    [7, 3],
    [3, 6],
    [5, 6]
], dtype=float)

y = np.array([50, 60, 45, 65, 55, 70, 52, 75, 58, 68], dtype=float)

df = pd.concat([
    pd.DataFrame(X, columns=['Hours Studied', 'Sleep Hours']),
    pd.DataFrame(y, columns=['Score'])], axis=1
)
df

Unnamed: 0,Hours Studied,Sleep Hours,Score
0,2.0,6.0,50.0
1,4.0,5.0,60.0
2,1.0,8.0,45.0
3,5.0,5.0,65.0
4,3.0,7.0,55.0
5,6.0,4.0,70.0
6,2.0,7.0,52.0
7,7.0,3.0,75.0
8,3.0,6.0,58.0
9,5.0,6.0,68.0


In [193]:
class DecisionStump:
    def __init__(self, x, residuals):
        self.x = x
        self.y = residuals
        self.best_split_idx, self.best_split_value, self.left_value, self.right_value = self.get_best_split()

    def calc_err(self, left, right):
        l_mean = left.mean()
        r_mean = right.mean()

        left_sse = np.sum((left - l_mean) ** 2)
        right_sse = np.sum((right - r_mean) ** 2)

        total_sse = left_sse + right_sse
        return total_sse

    def get_best_split(self):
        X = self.x
        y = self.y
        n_features = X.shape[1]
        feature_sses = {}
        for feature_idx in range(n_features):
            thresholds = np.unique(X[:, feature_idx])
            all_sses = {}
            for threshold in thresholds:
                left_mask = X[:, feature_idx] <= threshold
                right_mask = X[:, feature_idx] > threshold
                if left_mask.sum() == 0 or right_mask.sum() == 0:
                    continue
                
                y_left = y[left_mask]
                y_right = y[right_mask]
                all_sses[threshold] = self.calc_err(y_left, y_right)
            best_threshold = min(all_sses, key=all_sses.get)
            feature_sses[feature_idx] = (best_threshold, all_sses[best_threshold] )
        # I know it's horrible it's just for debugging
        best_split_idx = min(feature_sses, key=lambda x: min(feature_sses, key=feature_sses.get))
        best_split_threshold = feature_sses[best_split_idx][0]
        left_mask = X[:, best_split_idx] <= best_split_threshold
        right_mask = X[:, best_split_idx] > best_split_threshold
        
        left_value = y[left_mask].mean()
        right_value = y[right_mask].mean()
        return (best_split_idx, best_split_threshold, left_value, right_value)

    def predict(self, x):
        mask = x[:, self.best_split_idx] <= self.best_split_value
        preds = np.zeros(X.shape[0])
        preds[mask] = self.left_value
        preds[~mask] = self.right_value
        return preds
    

In [194]:
class GradientBoost:
    def __init__(self, n_trees = 200, lr=0.1):
        self.n_trees = n_trees
        self.lr = lr
        self.trees = []
        self.F0 = None
    def fit(self, X, y):
        self.F0 = np.mean(y)
        preds = np.full_like(y, self.F0, dtype=float)

        for _ in range(self.n_trees):
            residuals = y - preds
            tree = DecisionStump(X, residuals)
            preds += self.lr * tree.predict(X)
            self.trees.append(tree)
            # print(preds, "-----")
            # print(tree.predict(X))
    
    def predict(self, X):
        preds = np.full(X.shape[0], self.F0, dtype=float)
        for tree in self.trees:
            preds += self.lr * tree.predict(X)
        return preds


In [195]:
gb = GradientBoost()
gb.fit(X,y)

In [196]:
preds = gb.predict(X)
preds, y

(array([51.00016448, 59.99996523, 45.00061499, 66.49990636, 56.50005177,
        69.99978445, 51.00016448, 74.99939012, 56.50005177, 66.49990636]),
 array([50., 60., 45., 65., 55., 70., 52., 75., 58., 68.]))