In [5]:
import numpy as np
from collections import defaultdict

In [19]:
class XgBt:
    def __init__(self, max_depth=3, min_samples_split=2, lambda_=1.0, gamma=0.0):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.lambda_ = lambda_
        self.gamma = gamma
        self.tree = None

    def _calc_gain(self, G, H):
        return (G ** 2) / (H + self.lambda_)

    def _split_score(self, G_left, H_left, G_right, H_right, G_total, H_total):
        gain = (
            self._calc_gain(G_left, H_left)
            + self._calc_gain(G_right, H_right)
            - self._calc_gain(G_total, H_total)
        )
        return gain - self.gamma

    def _best_split(self, X, g, h):
        best_gain = -np.inf
        best_feat, best_thresh = None, None
        n_samples, n_features = X.shape
        G_total, H_total = np.sum(g), np.sum(h)

        for feature in range(n_features):
            sorted_indices = X[:, feature].argsort()
            X_sorted, g_sorted, h_sorted = X[sorted_indices], g[sorted_indices], h[sorted_indices]

            G_left, H_left = 0, 0
            G_right, H_right = G_total, H_total

            for i in range(1, n_samples):
                G_left += g_sorted[i - 1]
                H_left += h_sorted[i - 1]
                G_right -= g_sorted[i - 1]
                H_right -= h_sorted[i - 1]

                if X_sorted[i, feature] == X_sorted[i - 1, feature]:
                    continue

                gain = self._split_score(G_left, H_left, G_right, H_right, G_total, H_total)

                if gain > best_gain:
                    best_gain = gain
                    best_feat = feature
                    best_thresh = (X_sorted[i, feature] + X_sorted[i - 1, feature]) / 2

        return best_feat, best_thresh, best_gain

    def _build_tree(self, X, g, h, depth):
        if depth >= self.max_depth or len(X) < self.min_samples_split:
            weight = -np.sum(g) / (np.sum(h) + self.lambda_)
            return {'leaf': weight}

        feat, thresh, gain = self._best_split(X, g, h)
        if feat is None or gain <= 0:
            weight = -np.sum(g) / (np.sum(h) + self.lambda_)
            return {'leaf': weight}

        left_idx = X[:, feat] <= thresh
        right_idx = ~left_idx

        left_subtree = self._build_tree(X[left_idx], g[left_idx], h[left_idx], depth + 1)
        right_subtree = self._build_tree(X[right_idx], g[right_idx], h[right_idx], depth + 1)

        return {
            'feature': feat,
            'threshold': thresh,
            'left': left_subtree,
            'right': right_subtree,
        }

    def fit(self, X, g, h):
        self.tree = self._build_tree(X, g, h, 0)

    def _predict_row(self, x, node):
        if 'leaf' in node:
            return node['leaf']
        if x[node['feature']] <= node['threshold']:
            return self._predict_row(x, node['left'])
        else:
            return self._predict_row(x, node['right'])

    def predict(self, X):
        return np.array([self._predict_row(row, self.tree) for row in X])


class XGBoostRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3, lambda_=1.0, gamma=0.0):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.lambda_ = lambda_
        self.gamma = gamma
        self.trees = []

    def fit(self, X, y):
        y_pred = np.zeros_like(y)
        for _ in range(self.n_estimators):
            g = y_pred - y
            h = np.ones_like(y)
            tree = XgBt(max_depth=self.max_depth, lambda_=self.lambda_, gamma=self.gamma)
            tree.fit(X, g, h)
            update = tree.predict(X)
            y_pred -= self.learning_rate * update
            self.trees.append(tree)

    def predict(self, X):
        y_pred = np.zeros(X.shape[0])
        for tree in self.trees:
            y_pred -= self.learning_rate * tree.predict(X)
        return y_pred


In [21]:
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([1.2, 1.9, 3.0, 4.1, 5.1])

model = XGBoostRegressor(n_estimators=5, learning_rate=0.1, max_depth=2)
model.fit(X, y)
preds = model.predict(X)
print(preds)


[-0.33153787 -1.65438147 -1.65438147 -1.65438147 -1.65438147]
