In [None]:
from sklearn.ensemble import GradientBoostingRegressor

X = np.array([[1, 2],
              [2, 3],
              [3, 4],
              [4, 5],
              [5, 6]])
y = np.array([1, 1, 0, 0, 0])

regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

regressor.fit(X, y)

y_pred = regressor.predict(X)

mse = np.mean((y - y_pred) ** 2)
print(f"Mean Squared Error: {mse:.2f}")

def r2_score_percentage(y_true, y_pred):
      # Calculate the total sum of squares (TSS)
      tss = np.sum((y_true - np.mean(y_true)) ** 2)

      # Calculate the residual sum of squares (RSS)
      rss = np.sum((y_true - y_pred) ** 2)

      # Compute the R² score
      r2_score = 1 - (rss / tss)

      # Convert R² score to percentage
      r2_percentage = r2_score * 100

      return r2_percentage

r2_score = r2_score_percentage(y, y_pred)
print("R² Score:", r2_score)

Mean Squared Error: 0.00
R² Score: 99.99999992944922


# DecisionTreeRegressor Pseudocode

## Class: `DecisionTreeRegressor`

### Attributes:
- `root`: Stores the root node of the decision tree.

---

## Methods:

### `fit(X, y)`
**Purpose:** Build the decision tree based on the provided features `X` and target labels `y`.

1. Call `_build_tree(X, y)`.
2. Set `root` to the returned tree.

---

### `_build_tree(X, y)`
**Purpose:** Recursively build the decision tree.

1. Get the number of samples (`n_samples`) and features (`n_features`) in `X`.
2. Calculate the number of unique labels in `y`.
3. **Base Case:**
   - If `n_labels` (unique labels in `y`) is 1:
     - Compute the mean of labels as the leaf value using `_mean_of_labels(y)`.
     - Return a new `Node` with this value.
4. Find the best feature and threshold to split:
   - Call `_best_split(X, y)`.
   - Store `best_feature` and `best_threshold`.
5. Split the data based on the best feature and threshold:
   - Call `_split(X[:, best_feature], best_threshold)`.
   - Store indices for left (`left_idxs`) and right (`right_idxs`) splits.
6. Recursively build the left and right subtrees:
   - Call `_build_tree` on the left split: `(X[left_idxs, :], y[left_idxs])`.
   - Call `_build_tree` on the right split: `(X[right_idxs, :], y[right_idxs])`.
7. Return a new `Node` with the following:
   - `feature`: `best_feature`
   - `threshold`: `best_threshold`
   - `left`: Left subtree
   - `right`: Right subtree

---

### `_mean_of_labels(y)`
**Purpose:** Calculate the mean of the target labels.

1. Return the mean of `y`.

---

### `_split(X_column, split_threshold)`
**Purpose:** Split the data into left and right subsets based on a threshold.

1. Find indices where `X_column <= split_threshold`:
   - Store in `left_idxs`.
2. Find indices where `X_column > split_threshold`:
   - Store in `right_idxs`.
3. Return `left_idxs` and `right_idxs`.

---

### `_best_split(X, y)`
**Purpose:** Identify the best feature and threshold to split the data.

1. Initialize variables:
   - `best_gain = infinity`
   - `split_idx = None`
   - `split_threshold = None`
2. For each feature in `X`:
   - Extract the feature column.
   - Sort the values.
   - Compute midpoints of consecutive values as potential thresholds.
3. For each threshold:
   - Compute the information gain using `_information_gain(y, X_column, threshold)`.
   - If the gain is better than `best_gain`:
     - Update `best_gain`, `split_idx`, and `split_threshold`.
4. Return `split_idx` and `split_threshold`.

---

### `_information_gain(y, X_column, threshold)`
**Purpose:** Compute the information gain of a potential split.

1. Split the data using `_split(X_column, threshold)`:
   - Get `left_idxs` and `right_idxs`.
2. **Edge Case:** If either split is empty, return `0`.
3. Compute:
   - Total number of samples: `n`.
   - Number of left and right samples: `n_l`, `n_r`.
   - Entropy of left and right splits: `e_l`, `e_r`.
4. Calculate weighted entropy of the split:
   - `(n_l / n) * e_l + (n_r / n) * e_r`.
5. Return the information gain.

---

### `_entropy(y)`
**Purpose:** Compute the variance (proxy for entropy) of labels.

1. Compute the mean squared deviation of `y`.
2. Return the variance.

---

### `predict(X)`
**Purpose:** Predict the output for input data `X`.

1. For each sample in `X`:
   - Traverse the tree starting from the root using `_traverse_tree`.
   - Store the prediction.
2. Return all predictions as an array.

---

### `_traverse_tree(x, node)`
**Purpose:** Traverse the decision tree to make a prediction for a single sample.

1. If `node` is a leaf node:
   - Return `node.value`.
2. Else:
   - If `x[node.feature] <= node.threshold`:
     - Recursively traverse the left subtree.
   - Otherwise, traverse the right subtree.
3. Return the result from the recursive traversal.


In [None]:
import numpy as np

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

    def is_leaf_node(self):
        return self.value is not None


class DecisionTreeRegressor:
    def __init__(self):
        self.root = None

    def fit(self, X, y):
        self.root = self._build_tree(X, y)

    def _build_tree(self, X, y):
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

        if(n_labels == 1):
          leaf_value = self._mean_of_labels(y)
          return Node(value=leaf_value)

        best_feature, best_threshold = self._best_split(X, y)

        left_idxs, right_idxs = self._split(X[:, best_feature], best_threshold)
        left = self._build_tree(X[left_idxs, :], y[left_idxs])
        right = self._build_tree(X[right_idxs, :], y[right_idxs])
        return Node(best_feature, best_threshold, left, right)

    def _mean_of_labels(self, y):
        return np.mean(y)

    def _split(self, X_column, split_threshold):
        left_idxs = np.argwhere(X_column <= split_threshold).flatten()
        right_idxs = np.argwhere(X_column > split_threshold).flatten()
        return left_idxs, right_idxs

    def _best_split(self, X, y):
        best_gain, split_idx, split_threshold = float("inf"), None, None

        for feat_idx in range(X.shape[1]):
            X_Column = X[:, feat_idx]
            X_Column_sorted = np.sort(X_Column)
            thresholds = (X_Column_sorted[:-1] + X_Column_sorted[1:])/2

            for threshold in thresholds:
                gain = self._information_gain(y, X_Column, threshold)
                if gain < best_gain:
                    best_gain = gain
                    split_idx = feat_idx
                    split_threshold = threshold

        return split_idx, split_threshold

    def _information_gain(self, y, X_column, threshold):
      # parent_entropy = self._entropy(y)

      left_idxs, right_idxs = self._split(X_column, threshold)

      if len(left_idxs) == 0 or len(right_idxs) == 0:
        return 0

      n, n_l, n_r = len(y), len(left_idxs), len(right_idxs)
      e_l, e_r = self._entropy(y[left_idxs]), self._entropy(y[right_idxs])
      information_gain = (n_l/n)*e_l + (n_r/n)*e_r
      return information_gain

    def _entropy(self, y):
      return np.mean((y - np.mean(y)) ** 2)

    def predict(self, X):
        predictions = np.array([self._traverse_tree(x, self.root) for x in X])
        return predictions

    def _traverse_tree(self, x, node):
        if node.is_leaf_node():
            return node.value

        if x[node.feature] <= node.threshold:
            return self._traverse_tree(x, node.left)
        return self._traverse_tree(x, node.right)

X = np.array([[1, 2],
              [2, 3],
              [3, 4],
              [4, 5],
              [5, 6]])
y = np.array([1, 1, 0, 0, 0])

regressor = DecisionTreeRegressor()
regressor.fit(X, y)

# Predict on the test set
y_pred = regressor.predict(X)

# Evaluate the model
mse = np.mean((y - y_pred) ** 2)
print(f"Mean Squared Error: {mse:.2f}")

def r2_score_percentage(y_true, y_pred):
      # Calculate the total sum of squares (TSS)
      tss = np.sum((y_true - np.mean(y_true)) ** 2)

      # Calculate the residual sum of squares (RSS)
      rss = np.sum((y_true - y_pred) ** 2)

      # Compute the R² score
      r2_score = 1 - (rss / tss)

      # Convert R² score to percentage
      r2_percentage = r2_score * 100

      return r2_percentage

r2_score = r2_score_percentage(y, y_pred)
print("R² Score:", r2_score)

Mean Squared Error: 0.00
R² Score: 100.0


# DecisionTreeRegressor Pseudocode

## Class: `DecisionTreeRegressor`

### Attributes:
- `root`: Stores the root node of the decision tree.

---

## Methods:

### `fit(X, y)`
**Purpose:** Build the decision tree based on the provided features `X` and target labels `y`.

1. Call `_build_tree(X, y)`.
2. Set `root` to the returned tree.

---

### `_build_tree(X, y)`
**Purpose:** Recursively build the decision tree.

1. Determine the number of samples (`n_samples`) and features (`n_features`) in `X`.
2. Calculate the number of unique labels in `y` as `n_labels`.
3. **Base Case:**
   - If `n_labels` is 1 (all labels are identical):
     - Compute the mean of labels as the leaf value using `_mean_of_labels(y)`.
     - Return a new `Node` with `value=leaf_value`.
4. Find the best feature and threshold to split:
   - Call `_best_split(X, y)`.
   - Store `best_feature` and `best_threshold`.
5. Split the data into left and right subsets:
   - Call `_split(X[:, best_feature], best_threshold)`.
   - Store indices for left (`left_idxs`) and right (`right_idxs`) splits.
6. Recursively build the left and right subtrees:
   - Call `_build_tree(X[left_idxs, :], y[left_idxs])` for the left subtree.
   - Call `_build_tree(X[right_idxs, :], y[right_idxs])` for the right subtree.
7. Return a new `Node` with:
   - `feature`: `best_feature`
   - `threshold`: `best_threshold`
   - `left`: Left subtree
   - `right`: Right subtree

---

### `_mean_of_labels(y)`
**Purpose:** Calculate the mean of the target labels.

1. Return the mean of `y`.

---

### `_split(X_column, split_threshold)`
**Purpose:** Split the data into left and right subsets based on a threshold.

1. Identify indices where `X_column <= split_threshold`:
   - Store in `left_idxs`.
2. Identify indices where `X_column > split_threshold`:
   - Store in `right_idxs`.
3. Return `left_idxs` and `right_idxs`.

---

### `_best_split(X, y)`
**Purpose:** Identify the best feature and threshold to split the data.

1. Initialize variables:
   - `best_gain = infinity`
   - `split_idx = None`
   - `split_threshold = None`
2. Iterate over each feature in `X`:
   - Extract the feature column.
   - Sort the feature values.
   - Compute midpoints between consecutive values as potential thresholds.
3. For each threshold:
   - Compute the information gain using `_information_gain(y, X_column, threshold)`.
   - If the gain is better (lower) than `best_gain`:
     - Update `best_gain`, `split_idx`, and `split_threshold`.
4. Return `split_idx` and `split_threshold`.

---

### `_information_gain(y, X_column, threshold)`
**Purpose:** Compute the information gain for a potential split.

1. Split the data using `_split(X_column, threshold)`:
   - Obtain `left_idxs` and `right_idxs`.
2. **Edge Case:** If either split is empty, return `0`.
3. Compute:
   - Total samples: `n`.
   - Number of samples in left and right splits: `n_l`, `n_r`.
   - Variance (entropy proxy) of left and right splits: `e_l`, `e_r`.
4. Calculate the weighted variance:
   - `(n_l / n) * e_l + (n_r / n) * e_r`.
5. Return the information gain.

---

### `_entropy(y)`
**Purpose:** Compute the variance (proxy for entropy) of the labels.

1. Compute the mean squared deviation of `y`:
   - `(y - mean(y))^2`.
2. Return the variance.

---

### `predict(X)`
**Purpose:** Predict the output for input data `X`.

1. For each sample in `X`:
   - Traverse the tree starting from the root using `_traverse_tree`.
   - Store the prediction.
2. Return all predictions as an array.

---

### `_traverse_tree(x, node)`
**Purpose:** Traverse the decision tree to make a prediction for a single sample.

1. If `node` is a leaf node:
   - Return `node.value`.
2. Otherwise:
   - If `x[node.feature] <= node.threshold`:
     - Recursively traverse the left subtree.
   - Otherwise, recursively traverse the right subtree.
3. Return the result from the recursive traversal.


In [None]:
import numpy as np

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

    def is_leaf_node(self):
        return self.value is not None


class DecisionTreeRegressor:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth
        self.root = None

    def fit(self, X, y):
        self.root = self._build_tree(X, y)

    def _build_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

        if(n_labels == 1 or self.max_depth is not None and depth >= self.max_depth):
          leaf_value = self._mean_of_labels(y)
          return Node(value=leaf_value)

        best_feature, best_threshold = self._best_split(X, y)

        left_idxs, right_idxs = self._split(X[:, best_feature], best_threshold)
        left = self._build_tree(X[left_idxs, :], y[left_idxs], depth+1)
        right = self._build_tree(X[right_idxs, :], y[right_idxs], depth+1)
        return Node(best_feature, best_threshold, left, right)

    def _mean_of_labels(self, y):
        return np.mean(y)

    def _split(self, X_column, split_threshold):
        left_idxs = np.argwhere(X_column <= split_threshold).flatten()
        right_idxs = np.argwhere(X_column > split_threshold).flatten()
        return left_idxs, right_idxs

    def _best_split(self, X, y):
        best_gain, split_idx, split_threshold = float("inf"), None, None

        for feat_idx in range(X.shape[1]):
            X_Column = X[:, feat_idx]
            X_Column_sorted = np.sort(X_Column)
            thresholds = (X_Column_sorted[:-1] + X_Column_sorted[1:])/2

            for threshold in thresholds:
                gain = self._information_gain(y, X_Column, threshold)
                if gain < best_gain:
                    best_gain = gain
                    split_idx = feat_idx
                    split_threshold = threshold

        return split_idx, split_threshold

    def _information_gain(self, y, X_column, threshold):
      # parent_entropy = self._entropy(y)

      left_idxs, right_idxs = self._split(X_column, threshold)

      if len(left_idxs) == 0 or len(right_idxs) == 0:
        return 0

      n, n_l, n_r = len(y), len(left_idxs), len(right_idxs)
      e_l, e_r = self._entropy(y[left_idxs]), self._entropy(y[right_idxs])
      information_gain = (n_l/n)*e_l + (n_r/n)*e_r
      return information_gain

    def _entropy(self, y):
      return np.mean((y - np.mean(y)) ** 2)

    def predict(self, X):
        predictions = np.array([self._traverse_tree(x, self.root) for x in X])
        return predictions

    def _traverse_tree(self, x, node):
        if node.is_leaf_node():
            return node.value

        if x[node.feature] <= node.threshold:
            return self._traverse_tree(x, node.left)
        return self._traverse_tree(x, node.right)

X = np.array([[1, 2],
              [2, 3],
              [3, 4],
              [4, 5],
              [5, 6]])
y = np.array([1, 1, 0, 0, 0])  # Labels must be -1 or 1

regressor = DecisionTreeRegressor(max_depth=3)
regressor.fit(X, y)

# Predict on the test set
y_pred = regressor.predict(X)

# Evaluate the model
mse = np.mean((y - y_pred) ** 2)
print(f"Mean Squared Error: {mse:.2f}")

def r2_score_percentage(y_true, y_pred):
      # Calculate the total sum of squares (TSS)
      tss = np.sum((y_true - np.mean(y_true)) ** 2)

      # Calculate the residual sum of squares (RSS)
      rss = np.sum((y_true - y_pred) ** 2)

      # Compute the R² score
      r2_score = 1 - (rss / tss)

      # Convert R² score to percentage
      r2_percentage = r2_score * 100

      return r2_percentage

r2_score = r2_score_percentage(y, y_pred)
print("R² Score:", r2_score)

Mean Squared Error: 0.00
R² Score: 100.0


# GradientBoostingRegressor Pseudocode

## Class: `GradientBoostingRegressor`

### Attributes:
- `n_estimators`: Number of trees (iterations) to build.
- `learning_rate`: Shrinkage factor applied to the predictions of each tree.
- `max_depth`: Maximum depth of each decision tree.
- `trees`: List to store all decision trees.

---

## Methods:

### `__init__(n_estimators=100, learning_rate=0.1, max_depth=1)`
**Purpose:** Initialize the regressor with the given parameters.

1. Set `self.n_estimators` to `n_estimators`.
2. Set `self.learning_rate` to `learning_rate`.
3. Set `self.max_depth` to `max_depth`.
4. Initialize `self.trees` as an empty list.

---

### `fit(X, y)`
**Purpose:** Train the gradient boosting model on the input data `X` and targets `y`.

1. Calculate the number of samples `m = len(y)`.
2. Initialize the model's baseline prediction as the mean of the target values:
   - `self.initial_prediction = mean(y)`.
3. Compute the initial residuals as:
   - `residuals = y - self.initial_prediction`.
4. **Iterate** for `n_estimators` steps:
   - Create a new `DecisionTreeRegressor` with `max_depth=self.max_depth`.
   - Fit the tree on the residuals:
     - `tree.fit(X, residuals)`.
   - Predict the residuals using the current tree:
     - `predictions = tree.predict(X)`.
   - Update the residuals by subtracting the learning rate-scaled predictions:
     - `residuals -= self.learning_rate * predictions`.
   - Add the tree to the `self.trees` list.

---

### `predict(X)`
**Purpose:** Predict target values for the input data `X`.

1. Initialize predictions with the baseline prediction:
   - `y_pred = array of size X.shape[0], filled with self.initial_prediction`.
2. **For each tree** in `self.trees`:
   - Add the learning rate-scaled predictions of the tree to `y_pred`:
     - `y_pred += self.learning_rate * tree.predict(X)`.
3. Return `y_pred` as the final predictions.


In [None]:
import numpy as np

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

    def is_leaf_node(self):
        return self.value is not None


class DecisionTreeRegressor:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth
        self.root = None

    def fit(self, X, y):
        self.root = self._build_tree(X, y)

    def _build_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

        if(n_labels == 1 or self.max_depth is not None and depth >= self.max_depth):
          leaf_value = self._mean_of_labels(y)
          return Node(value=leaf_value)
        # if(n_labels == 1):
        #   leaf_value = self._mean_of_labels(y)
        #   return Node(value=leaf_value)

        best_feature, best_threshold = self._best_split(X, y)

        left_idxs, right_idxs = self._split(X[:, best_feature], best_threshold)
        left = self._build_tree(X[left_idxs, :], y[left_idxs], depth + 1)
        right = self._build_tree(X[right_idxs, :], y[right_idxs], depth + 1)
        return Node(best_feature, best_threshold, left, right)

    def _mean_of_labels(self, y):
        return np.mean(y)

    def _split(self, X_column, split_threshold):
        left_idxs = np.argwhere(X_column <= split_threshold).flatten()
        right_idxs = np.argwhere(X_column > split_threshold).flatten()
        return left_idxs, right_idxs

    def _best_split(self, X, y):
        best_gain, split_idx, split_threshold = float("inf"), None, None

        for feat_idx in range(X.shape[1]):
            X_Column = X[:, feat_idx]
            X_Column_sorted = np.sort(X_Column)
            thresholds = (X_Column_sorted[:-1] + X_Column_sorted[1:])/2

            for threshold in thresholds:
                gain = self._information_gain(y, X_Column, threshold)
                if gain < best_gain:
                    best_gain = gain
                    split_idx = feat_idx
                    split_threshold = threshold

        return split_idx, split_threshold

    def _information_gain(self, y, X_column, threshold):
      # parent_entropy = self._entropy(y)

      left_idxs, right_idxs = self._split(X_column, threshold)

      if len(left_idxs) == 0 or len(right_idxs) == 0:
        return 0

      n, n_l, n_r = len(y), len(left_idxs), len(right_idxs)
      e_l, e_r = self._entropy(y[left_idxs]), self._entropy(y[right_idxs])
      information_gain = (n_l/n)*e_l + (n_r/n)*e_r
      return information_gain

    def _entropy(self, y):
      return np.mean((y - np.mean(y)) ** 2)

    def predict(self, X):
        predictions = np.array([self._traverse_tree(x, self.root) for x in X])
        return predictions

    def _traverse_tree(self, x, node):
        if node.is_leaf_node():
            return node.value

        if x[node.feature] <= node.threshold:
            return self._traverse_tree(x, node.left)
        return self._traverse_tree(x, node.right)


class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=1):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        m = len(y)
        self.initial_prediction = np.mean(y)
        residuals = y - self.initial_prediction

        for _ in range(self.n_estimators):
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        y_pred = np.full(X.shape[0], self.initial_prediction)
        for tree in self.trees:
            y_pred += self.learning_rate * tree.predict(X)
        return y_pred
X = np.array([[1, 2],
              [2, 3],
              [3, 4],
              [4, 5],
              [5, 6]])
y = np.array([1, 1, 0, 0, 0])

regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=10)
regressor.fit(X, y)

# Predict on the test set
y_pred = regressor.predict(X)

# Evaluate the model
mse = np.mean((y - y_pred) ** 2)
print(f"Mean Squared Error: {mse:.2f}")

def r2_score_percentage(y_true, y_pred):
      # Calculate the total sum of squares (TSS)
      tss = np.sum((y_true - np.mean(y_true)) ** 2)

      # Calculate the residual sum of squares (RSS)
      rss = np.sum((y_true - y_pred) ** 2)

      # Compute the R² score
      r2_score = 1 - (rss / tss)

      # Convert R² score to percentage
      r2_percentage = r2_score * 100

      return r2_percentage

r2_score = r2_score_percentage(y, y_pred)
print("R² Score:", r2_score)

Mean Squared Error: 0.00
R² Score: 99.99999992944922
