In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, plot_tree

from joblib import Parallel, delayed

In [20]:
from QuadraticConstraintModel import get_leaf_samples

from QuadraticConstraintModel import constrained_optimization_gurobi

from QuadraticConstraintModel import predict_from_COF

from QuadraticConstraintModel import  get_h_from_COF

from QuadraticConstraintModel import get_feature_bounds_from_COF

In [3]:
def normalized_root_mean_square_error(y_true, y_pred):
    """
    Computes the Normalized Root Mean Square Error (NRMSE) between y_true and y_pred.
    If the range of y_true is zero, it normalizes by the number of samples * outputs.

    Parameters:
        y_true (np.ndarray): Ground truth values, shape (n_samples, n_outputs)
        y_pred (np.ndarray): Predicted values, shape (n_samples, n_outputs)

    Returns:
        float: NRMSE value
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    # Compute RMSE
    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
    
    # Compute range
    y_range = np.max(y_true) - np.min(y_true)
    
    if y_range != 0:
        # Normalize by range
        return rmse / y_range
    else:
        # Normalize by n_samples * n_outputs
        n_samples, n_outputs = y_true.shape
        return np.sqrt(np.sum((y_true - y_pred) ** 2) / (n_samples * n_outputs))


In [13]:
def process_leaf(leaf_id, indices, X_train, y_train, feature_names, optimizer):
    X_leaf = X_train[indices]
    y_leaf = y_train[indices]
    
    # Choose optimizer
    if optimizer == "gurobi":
        M, m0, h = constrained_optimization_gurobi(X_leaf, y_leaf)
    elif optimizer == "gurobi_MSE":
        M, m0, h = constrained_optimization_MSE_gurobi(X_leaf, y_leaf)
    elif optimizer == "least_squares":
        M, m0, h = least_squares_solution(X_leaf, y_leaf)
    elif optimizer == "gurobi_l2":
        # print("Optimizing with L2 regularization")
        M, m0, h = constrained_optimization_regularization_gurobi(X_leaf, y_leaf)
    elif optimizer == "gurobi_MSE_l2":
        # print("Optimizing with MSE L2 regularization")
        M, m0, h = constrained_optimization_MSE_regularization_gurobi(X_leaf, y_leaf)
    else:
        M, m0, h = constrained_optimization(X_leaf, y_leaf)
    
    # Build model info
    model = {
        "leaf_id": leaf_id,
        "CO_Model": {'M': M, 'm0': m0, 'h': h},
        "no_samples": len(indices),
        "indices": indices,
        "bounds": {
            feature_names[i]: (X_leaf[:, i].min(), X_leaf[:, i].max())
            for i in range(X_leaf.shape[1])
        }
    }
    return model


def train_COF_on_leaves_parallel(X_train, y_train, tree, feature_names=None, optimizer="gurobi", n_jobs=-1):
    """
    Train constrained optimization models on tree leaves in parallel.

    Parameters:
        X_train, y_train : np.ndarray
        tree : fitted sklearn tree
        feature_names : list of feature names (optional)
        optimizer : {"gurobi", "CVXPY + SCS"}
        n_jobs : number of parallel workers (-1 = all cores)
    """
    leaf_samples = get_leaf_samples(tree, X_train)

    if feature_names is None:
        feature_names = [f"feature_{i}" for i in range(X_train.shape[1])]

    # Run leaf computations in parallel
    tree_extracted_info = Parallel(n_jobs=n_jobs)(
        delayed(process_leaf)(leaf_id, indices, X_train, y_train, feature_names, optimizer)
        for leaf_id, indices in leaf_samples.items()
    )

    return tree_extracted_info


In [4]:
# Function to load DataSet
def load_dataset(file_path, num_attributes=2, num_classes=2):
    data = pd.read_csv(file_path)
    X = data.iloc[:, 0 :  num_attributes].values
    y = data.iloc[:,  num_attributes:  num_attributes + num_classes].values
    # y = data.iloc[:, 9:10].values
    return X, y

In [47]:
sys_name = "navigation_old" #"exp"
n_samples = 500000 #10000
X, y = load_dataset(f"Dataset/{sys_name}/{sys_name}_{n_samples}/data_{sys_name}_{n_samples}.csv",num_attributes=2, num_classes=2)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size=0.1)
print(f" Shape of X_Training = {X_train.shape} \n Shape of X_Testing = {X_test.shape}")
print(f" Shape of Y_Training = {y_train.shape} \n Shape of Y_Testing = {y_test.shape}")


 Shape of X_Training = (449999, 2) 
 Shape of X_Testing = (50000, 2)
 Shape of Y_Training = (449999, 2) 
 Shape of Y_Testing = (50000, 2)


In [9]:
tree = DecisionTreeRegressor()
tree.fit(X_train,y_train)
print(f"Number of leaves: {tree.get_n_leaves()}")
print(f"Total depth of tree: {tree.get_depth()}")
print(f"Number of nodes: {tree.tree_.node_count}")

Number of leaves: 5000
Total depth of tree: 22
Number of nodes: 9999


In [49]:
def train_and_prune_COF_tree_v3(
    X_train, y_train, X_test=None, y_test=None,
    initial_tree_params=None, optimizer="gurobi",
    alpha=1e-6, h_min=0, ignore_h=False, n_jobs=-1
):
    """
    Train a decision tree, build COF models, and prune tree sequentially.
    """

    # -------------------------------
    # 1️⃣ Train initial tree
    # -------------------------------
    if initial_tree_params is None:
        initial_tree_params = {"max_depth": 5}

    tree = DecisionTreeRegressor(**initial_tree_params)
    tree.fit(X_train, y_train)

    # -------------------------------
    # 2️⃣ Build COF models for leaves in parallel
    # -------------------------------
    COF_model_tree = train_COF_on_leaves_parallel(
        X_train, y_train, tree, optimizer=optimizer, n_jobs=n_jobs
    )

    children_left = tree.tree_.children_left
    children_right = tree.tree_.children_right
    leaf_nodes = np.where(children_left == -1)[0]

    leaf_h_dict = {leaf: COF_model_tree[i]['CO_Model']['h'] for i, leaf in enumerate(leaf_nodes)}
    leaf_indices_dict = {leaf: COF_model_tree[i]['indices'] for i, leaf in enumerate(leaf_nodes)}
    leaf_COFS_dict = {leaf: COF_model_tree[i] for i, leaf in enumerate(leaf_nodes)}

    # -------------------------------
    # 3️⃣ Helper function: compute COF
    # -------------------------------
    def compute_h_and_COF(indices):
        """
        Returns h, M, m0 for given indices using optimizer
        """
        if ignore_h:
            return 0, np.zeros((1, X_train.shape[1])), np.zeros(1)

        if optimizer == "gurobi":
            M, m0, h = constrained_optimization_gurobi(X_train[indices], y_train[indices])
        elif optimizer == "gurobi_MSE":
            M, m0, h = constrained_optimization_MSE_gurobi(X_train[indices], y_train[indices])
        else:
            M, m0, h = constrained_optimization(X_train[indices], y_train[indices])

        return h, M, m0

    # -------------------------------
    # 4️⃣ Sequential pruning function
    # -------------------------------
    def prune_node(node):
        left = children_left[node]
        right = children_right[node]

        # Leaf node
        if left == -1 and right == -1:
            return leaf_h_dict[node], 1, leaf_indices_dict[node], False

        # Process children sequentially
        left_cost, left_leaves, left_indices, _ = prune_node(left)
        right_cost, right_leaves, right_indices, _ = prune_node(right)

        combined_indices = np.concatenate([left_indices, right_indices])
        subtree_cost = left_cost + right_cost
        subtree_leaves = left_leaves + right_leaves

        # Compute parent COF
        h_parent, M_parent, m0_parent = compute_h_and_COF(combined_indices)
        prune_cost = h_parent + alpha

        # Decide pruning
        prune_flag = (not ignore_h and h_parent < h_min) or (prune_cost <= subtree_cost)

        if prune_flag:
            # Update tree
            children_left[node] = -1
            children_right[node] = -1

            # Remove child leaves
            for child in [left, right]:
                leaf_COFS_dict.pop(child, None)
                leaf_h_dict.pop(child, None)
                leaf_indices_dict.pop(child, None)

            # Update parent leaf info
            X_leaf = X_train[combined_indices]
            leaf_h_dict[node] = h_parent
            leaf_indices_dict[node] = combined_indices
            leaf_COFS_dict[node] = {
                "leaf_id": node,
                "CO_Model": {"h": h_parent, "M": M_parent, "m0": m0_parent},
                "indices": combined_indices,
                "no_samples": len(combined_indices),
                "bounds": {
                    f"feature_{i}": (X_leaf[:, i].min(), X_leaf[:, i].max())
                    for i in range(X_leaf.shape[1])
                }
            }

            # Debug printing
            print(f"🌳 Pruning triggered at node {node}, h={h_parent:.6f}, alpha={alpha}")
            print(f"➡️ Leaf indices count: {len(combined_indices)}")
            print(f"➡️ M shape: {M_parent.shape}, m0 shape: {m0_parent.shape}")

            return prune_cost, 1, combined_indices, True
        else:
            return subtree_cost, subtree_leaves, combined_indices, False

    # -------------------------------
    # 5️⃣ Stats before pruning
    # -------------------------------
    num_leaves_before = len(leaf_nodes)
    print(f"Before pruning: Leaves={num_leaves_before}")
    print(f"h values before pruning: {list(leaf_h_dict.values())}")

    # -------------------------------
    # 6️⃣ Iterative pruning until stable
    # -------------------------------
    previous_leaf_count = -1
    while True:
        prune_node(0)
        COF_model_tree_pruned = list(leaf_COFS_dict.values())
        current_leaf_count = len(COF_model_tree_pruned)
        print(f"➡️ Current leaf count after step: {current_leaf_count}")
        if current_leaf_count == previous_leaf_count:
            break
        previous_leaf_count = current_leaf_count

    # -------------------------------
    # 7️⃣ Stats after pruning
    # -------------------------------
    print(f"After pruning: Leaves={len(COF_model_tree_pruned)}")
    print(f"h values after pruning: {[leaf['CO_Model']['h'] for leaf in COF_model_tree_pruned]}")

    return tree, COF_model_tree_pruned

In [50]:
tree, COF_model =train_and_prune_COF_tree_v3(X_train, y_train, X_test=X_test, y_test=y_test, 
                                initial_tree_params={"min_samples_leaf":10}, optimizer="gurobi", 
                                alpha=2, h_min=1, ignore_h=True, n_jobs=-1)

Set parameter Username
Set parameter LicenseID to value 2598283
Set parameter Username
Set parameter LicenseID to value 2598283
Academic license - for non-commercial use only - expires 2025-12-11
Academic license - for non-commercial use only - expires 2025-12-11
Set parameter Username
Set parameter LicenseID to value 2598283
Set parameter Username
Set parameter LicenseID to value 2598283
Set parameter Username
Set parameter LicenseID to value 2598283
Set parameter Username
Set parameter LicenseID to value 2598283
Set parameter Username
Set parameter LicenseID to value 2598283
Academic license - for non-commercial use only - expires 2025-12-11
Set parameter Username
Set parameter LicenseID to value 2598283
Academic license - for non-commercial use only - expires 2025-12-11
Academic license - for non-commercial use only - expires 2025-12-11
Academic license - for non-commercial use only - expires 2025-12-11
Academic license - for non-commercial use only - expires 2025-12-11
Set paramete

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [26]:
COF_model

[{'leaf_id': np.int64(119),
  'CO_Model': {'M': array([[-1.23378826e-02,  2.06652489e+00],
          [-1.00000460e+00, -3.03843062e-07]]),
   'm0': array([1.18663109e-01, 6.81040748e-07]),
   'h': 0.006782454659385989},
  'no_samples': 125,
  'indices': [0,
   49,
   78,
   192,
   285,
   456,
   473,
   545,
   558,
   577,
   620,
   668,
   677,
   707,
   815,
   967,
   989,
   1123,
   1174,
   1339,
   1382,
   1436,
   1519,
   1522,
   1558,
   1608,
   1649,
   1681,
   1772,
   1780,
   1836,
   1908,
   1965,
   2061,
   2203,
   2358,
   2405,
   2426,
   2486,
   2498,
   2535,
   2566,
   2661,
   2786,
   2869,
   3114,
   3328,
   3368,
   3411,
   3444,
   3471,
   3659,
   3691,
   3733,
   3765,
   3771,
   3851,
   3899,
   3920,
   3987,
   3999,
   4174,
   4192,
   4193,
   4232,
   4272,
   4353,
   4373,
   4425,
   4469,
   4475,
   4488,
   4543,
   4603,
   4636,
   4788,
   4864,
   4872,
   4900,
   4982,
   5109,
   5254,
   5379,
   5388,
   5399,
   5

In [51]:
len(COF_model)

1

In [28]:
constraints_on_leaves = get_feature_bounds_from_COF(COF_model)

In [33]:
high_h = get_h_from_COF(COF_model, greater_then= 1)
high_h

[2.3358495465642157,
 6.401459401470405,
 8.856544282965023,
 1.3102764249261871,
 4.058125658872891,
 5.48017675272217,
 7.976992881504814,
 7.7558770343066845,
 2.33576318726724,
 2.438149783979728,
 1.7834954335979425,
 1.1689565347365947,
 3.3519714372404406,
 1.1902964284238906,
 3.0468206592791613,
 1.790857861507377,
 1.9686439089633996,
 1.648850032892773,
 2.500528756964233,
 1.0601470381418516,
 5.152810615478651,
 1.4765730327967377,
 1.977888359054111,
 1.9810801208818194,
 1.4352632213396839,
 1.5470871544541493,
 1.4245731258260275,
 2.066862192094619,
 1.84266639032782,
 1.2175944993636598,
 2.302638149702546,
 1.1855165401714511,
 1.0993899675283811,
 9.566766112797573,
 5.7163571287356865,
 4.210530594890318,
 6.8223434610032125,
 2.107631570123387,
 5.0299485939750825,
 1.4006955129322545,
 5.055356229351448]

# Custom

In [45]:
import numpy as np
from typing import Optional, List

class _Node:
    def __init__(self, depth: int = 0):
        self.depth = depth
        self.is_leaf = True
        self.feature: Optional[int] = None
        self.threshold: Optional[float] = None
        self.left: Optional[int] = None
        self.right: Optional[int] = None
        self.n_samples: int = 0
        self.value: Optional[np.ndarray] = None
        self.mse: float = 0.0
        self.idx: Optional[np.ndarray] = None  # keep track of samples reaching this node
        self.leaf_id: Optional[int] = None     # assign after pruning

class CustomDecisionTreeRegressor:
    def __init__(
        self,
        max_depth: Optional[int] = None,
        min_samples_split: int = 2,
        mse_threshold: float = 1e-7,
        min_improvement: float = 1e-7,
    ):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.mse_threshold = mse_threshold
        self.min_improvement = min_improvement

        self.n_features_in_: Optional[int] = None
        self.n_outputs_: Optional[int] = None
        self._nodes: List[_Node] = []

    @staticmethod
    def _mse(sum_y: np.ndarray, sum_y2: float, n: int) -> float:
        if n <= 0:
            return 0.0
        return float((sum_y2 - float(np.sum(sum_y ** 2)) / n) / n)

    def fit(self, X: np.ndarray, y: np.ndarray):
        X = np.asarray(X)
        y = np.asarray(y)
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        self.n_features_in_ = X.shape[1]
        self.n_outputs_ = y.shape[1]
        self._nodes = []
        self._build_node(X, y, np.arange(X.shape[0]), depth=0)
        self._assign_leaf_ids()
        self._fitted_X = X
        self._fitted_y = y
        return self

    def _build_node(self, X, y, idx, depth):
        node_id = len(self._nodes)
        node = _Node(depth=depth)
        self._nodes.append(node)

        Y = y[idx]
        n_node = Y.shape[0]
        sum_y = Y.sum(axis=0)
        sum_y2 = float((Y ** 2).sum())

        node.n_samples = n_node
        node.value = sum_y / max(n_node, 1)
        node.mse = self._mse(sum_y, sum_y2, n_node)
        node.idx = idx

        stop_by_depth = (self.max_depth is not None and depth >= self.max_depth)
        if n_node < self.min_samples_split or stop_by_depth or node.mse <= self.mse_threshold:
            node.is_leaf = True
            return node_id

        best = self._best_split(X, y, idx, sum_y, sum_y2, n_node)
        if best is None:
            node.is_leaf = True
            return node_id

        feat, thr, left_idx, right_idx, mse_left, mse_right = best
        parent_mse = node.mse
        n_left, n_right = left_idx.size, right_idx.size
        weighted_child_mse = (n_left * mse_left + n_right * mse_right) / n_node
        gain = parent_mse - weighted_child_mse

        if gain <= self.min_improvement:
            node.is_leaf = True
            return node_id

        node.is_leaf = False
        node.feature = int(feat)
        node.threshold = float(thr)
        node.left = self._build_node(X, y, left_idx, depth + 1)
        node.right = self._build_node(X, y, right_idx, depth + 1)
        return node_id

    def _best_split(self, X, y, idx, sum_y, sum_y2, n_node):
        X_node = X[idx]
        Y_node = y[idx]
        total_sum_y = sum_y
        total_sum_y2 = sum_y2
        parent_mse = self._mse(total_sum_y, total_sum_y2, n_node)

        best_feat = None
        best_thr = None
        best_left_idx = None
        best_right_idx = None
        best_mse_left = None
        best_mse_right = None

        for f in range(self.n_features_in_):
            x = X_node[:, f]
            order = np.argsort(x, kind="mergesort")
            x_sorted = x[order]
            Y_sorted = Y_node[order]

            diffs = x_sorted[1:] - x_sorted[:-1]
            valid = diffs != 0.0
            if not np.any(valid):
                continue

            csum_y = np.cumsum(Y_sorted, axis=0)
            row_sq = np.einsum("ij,ij->i", Y_sorted, Y_sorted)
            csum_y2 = np.cumsum(row_sq)
            split_positions = np.nonzero(valid)[0]

            left_n = (split_positions + 1).astype(np.int64)
            right_n = n_node - left_n
            left_sum_y = csum_y[split_positions]
            right_sum_y = total_sum_y - left_sum_y
            left_sum_y2 = csum_y2[split_positions]
            right_sum_y2 = total_sum_y2 - left_sum_y2

            left_mse = (left_sum_y2 - np.sum(left_sum_y ** 2, axis=1) / left_n) / left_n
            right_mse = (right_sum_y2 - np.sum(right_sum_y ** 2, axis=1) / right_n) / right_n
            weighted_child_mse = (left_n * left_mse + right_n * right_mse) / n_node

            best_pos = int(np.argmin(weighted_child_mse))
            candidate_mse = float(weighted_child_mse[best_pos])
            if candidate_mse >= parent_mse:
                continue

            i = split_positions[best_pos]
            thr = 0.5 * (x_sorted[i] + x_sorted[i + 1])
            mask_left = x <= thr
            left_idx = idx[mask_left]
            right_idx = idx[~mask_left]
            if left_idx.size == 0 or right_idx.size == 0:
                continue

            if best_thr is None or candidate_mse < (best_mse_left + best_mse_right if best_mse_left is not None else np.inf):
                best_feat = f
                best_thr = thr
                best_mse_left = float(left_mse[best_pos])
                best_mse_right = float(right_mse[best_pos])
                best_left_idx = left_idx
                best_right_idx = right_idx

        if best_thr is None:
            return None
        return best_feat, best_thr, best_left_idx, best_right_idx, best_mse_left, best_mse_right

    # --------------------
    # Least squares & pruning by h
    # --------------------
    def _least_squares_solution(self, X_leaf, y_leaf):
        n_samples, n_features = X_leaf.shape
        X_aug = np.hstack([np.ones((n_samples, 1)), X_leaf])
        XtX = X_aug.T @ X_aug
        XtY = X_aug.T @ y_leaf
        Theta = np.linalg.pinv(XtX) @ XtY
        m0 = Theta[0, :]
        M = Theta[1:, :].T
        Y_hat = X_aug @ Theta
        residuals = y_leaf - Y_hat
        h_val = np.sum(residuals ** 2)
        return M, m0, h_val

    def prune_by_h(self, X: np.ndarray, y: np.ndarray, h_threshold: float):
        """
        Post-pruning: collapse children into parent if combined h-value <= threshold.
        """
    
        def _compute_h(idx: np.ndarray) -> float:
            _, _, h_val = self._least_squares_solution(X[idx], y[idx])
            return h_val
    
        def _prune_recursive(node_id: int, idx: np.ndarray):
            node = self._nodes[node_id]
    
            if node.is_leaf:
                return idx
    
            # Split indices
            left_mask = X[idx, node.feature] <= node.threshold
            left_idx = idx[left_mask]
            right_idx = idx[~left_mask]
    
            # Recurse
            left_idx = _prune_recursive(node.left, left_idx)
            right_idx = _prune_recursive(node.right, right_idx)
    
            # Compute combined h
            combined_idx = np.concatenate([left_idx, right_idx])
            combined_h = _compute_h(combined_idx)
    
            # If small enough, prune children (make parent leaf)
            if combined_h <= h_threshold:
                node.is_leaf = True
                node.left = None
                node.right = None
                node.feature = None
                node.threshold = None
                node.value = y[combined_idx].mean(axis=0)
                return combined_idx
    
            return combined_idx
    
        # Start recursion from root
        all_idx = np.arange(X.shape[0])
        _prune_recursive(0, all_idx)

    # --------------------
    # Assign leaf IDs
    # --------------------
    def _assign_leaf_ids(self):
        counter = 0
        for node in self._nodes:
            if node.is_leaf:
                node.leaf_id = counter
                counter += 1

    # --------------------
    # Prediction
    # --------------------
    def predict_row(self, x):
        node_id = 0
        while not self._nodes[node_id].is_leaf:
            node = self._nodes[node_id]
            if x[node.feature] <= node.threshold:
                node_id = node.left
            else:
                node_id = node.right
        return self._nodes[node_id].value

    def predict(self, X):
        X = np.asarray(X)
        return np.vstack([self.predict_row(row) for row in X])


In [46]:


# 2. Fit the custom decision tree
tree = CustomDecisionTreeRegressor(min_samples_split=2)
tree.fit(X_train, y_train)

print("Number of nodes before pruning:", sum(node.is_leaf for node in tree._nodes))

# 3. Apply pruning based on h_threshold
tree.prune_by_h(X_train, y_train, h_threshold=0.1)

print("Number of nodes after pruning:", sum(node.is_leaf for node in tree._nodes))

# 4. Make predictions
X_test = np.array([[0.2, 0.8], [0.5, 0.5]])
y_pred = tree.predict(X_test)
print("Predictions:", y_pred)


Number of nodes before pruning: 8932
Number of nodes after pruning: 17762
Predictions: [[ 0.30241318 -0.16595536]
 [-0.32021594 -0.48623573]]
