In [1]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from lineartree import (
    LinearForestRegressor,
    LinearBoostRegressor,
    LinearTreeRegressor,
)
from sklearn.linear_model import LinearRegression
from lineartree._classes import _predict_branch
from JOPLEn.partitioner import LinearBoostPartition, LinearForestPartition

In [2]:
np.random.seed(0)

n_points = 1000
x = np.random.randn(n_points, 2)
# y = x[:, 0] ** 2 + x[:, 1] + np.random.randn(n_points) * 0.1
y = np.sin(2 * x[:, 0]) + np.cos(2 * x[:, 1]) + np.random.randn(n_points) * 0.1

# Create a figure
fig = go.Figure()

# Add the first scatter plot
fig.add_trace(
    go.Scatter3d(x=x[:, 0], y=x[:, 1], z=y, mode="markers", marker=dict(color="red"))
)

# Update the layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

# Show the figure
fig.show()

In [69]:
# Import necessary libraries
from lineartree import LinearTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor


# Define the Gradient Boosted Decision Tree using LinearRegressionTree
class GBLTRegressor:
    def __init__(
        self,
        n_estimators=100,
        learning_rate=0.1,
        max_depth=3,
        base_estimator=LinearRegression(),
    ):
        # Initialize the base learners as Linear Tree Regressors
        self.base_learners = [
            LinearTreeRegressor(
                max_depth=max_depth,
                base_estimator=base_estimator,
            )
            for _ in range(n_estimators)
        ]
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate

    def fit(self, X, y):
        # Initial residuals are just the original labels
        residuals = y

        for learner in self.base_learners:
            # Fit learner to the residuals
            learner.fit(X, residuals)

            # Update residuals
            predictions = learner.predict(X)
            residuals = residuals - self.learning_rate * predictions

    def predict(self, X):
        # Start with all zeros
        predictions = np.zeros(X.shape[0])

        for learner in self.base_learners:
            # Add predictions from each learner
            predictions += self.learning_rate * learner.predict(X)

        return predictions

In [72]:
from JOPLEn.partitioner import TreePartition
from JOPLEn.enums import LossType
from typing import Any


class LinearBoostPartition(TreePartition):
    def _get_leaves(self, x: np.ndarray) -> np.ndarray:
        partitions = np.empty((x.shape[0], self.n_partitions), dtype=int)

        for i, tree in enumerate(self.model.base_learners):
            partitions[:, i] = tree.apply(x)

        return partitions

    def _fit_model(self, x: np.ndarray, y: np.ndarray):
        match self.loss_type:
            case LossType.regression:
                model_class = GBLTRegressor
            case LossType.binary_classification | LossType.multinomial_classification:
                raise NotImplementedError("Classification not implemented")
            case _:
                raise ValueError("Loss type not supported")

        assert (np.log2(self.n_cells) % 1) == 0, "n_cells must be a power of 2"

        if "base_estimator" not in self.model_kwargs:
            self.model_kwargs["base_estimator"] = LinearRegression()

        self.model = model_class(
            n_estimators=self.n_partitions,
            max_depth=int(np.log2(self.n_cells)),
            **self.model_kwargs,
        )

        self.model.fit(x, y.flatten())

    def _prefit_model(self, model: Any):
        self.model = model

(1000, 5)

In [63]:
from sklearn.ensemble import RandomForestRegressor

model = GBLTRegressor(n_estimators=2, learning_rate=1.0)
model.fit(x, y)

In [64]:
# Predict and evaluate
predictions = model.predict(x)
# predictions = model.base_learners[0].predict(x)

# Create a figure
fig = go.Figure()

# Add the first scatter plot
fig.add_trace(
    go.Scatter3d(x=x[:, 0], y=x[:, 1], z=y, mode="markers", marker=dict(color="red"))
)

# Add the predictions
fig.add_trace(
    go.Scatter3d(
        x=x[:, 0],
        y=x[:, 1],
        z=predictions,
        mode="markers",
        marker=dict(color="blue"),
    )
)

# Update the layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

# Show the figure
fig.show()

In [86]:
from JOPLEn.singletask import JOPLEn

model = JOPLEn(
    partitioner=LinearBoostPartition,
    n_cells=8,
    random_state=0,
    n_partitions=100,
    part_kwargs={"learning_rate": 1.0},
)

history = model.fit(x, y, max_iters=1000, print_epochs=50)

pred1 = model.predict(x)

[18:17:57]: Epoch     50 | TrL: 0.997395 | FNorm: 0.013881 | PNorm: 2.019686 | WNz:    3 | Obj: 0.917739
[18:17:57]: Epoch    100 | TrL: 0.906447 | FNorm: 0.162853 | PNorm: 6.964565 | WNz:    3 | Obj: 0.759567
[18:17:57]: Epoch    150 | TrL: 0.812215 | FNorm: 0.578224 | PNorm: 13.165838 | WNz:    3 | Obj: 0.614401
[18:17:57]: Epoch    200 | TrL: 0.716236 | FNorm: 1.296422 | PNorm: 19.649427 | WNz:    3 | Obj: 0.486336
[18:17:57]: Epoch    250 | TrL: 0.620998 | FNorm: 2.355776 | PNorm: 26.392048 | WNz:    3 | Obj: 0.379527
[18:17:57]: Epoch    300 | TrL: 0.542131 | FNorm: 3.742152 | PNorm: 33.241283 | WNz:    3 | Obj: 0.308847
[18:17:57]: Epoch    350 | TrL: 0.483960 | FNorm: 5.302052 | PNorm: 39.576351 | WNz:    3 | Obj: 0.269454
[18:17:57]: Epoch    400 | TrL: 0.444694 | FNorm: 6.803008 | PNorm: 44.823143 | WNz:    3 | Obj: 0.250881
[18:17:57]: Epoch    450 | TrL: 0.419611 | FNorm: 8.038969 | PNorm: 48.685387 | WNz:    3 | Obj: 0.243283
[18:17:57]: Epoch    500 | TrL: 0.402754 | FNorm

In [81]:
from JOPLEn.partitioner import GBPartition

model = JOPLEn(
    partitioner=GBPartition,
    n_cells=8,
    random_state=0,
    n_partitions=100,
)

history = model.fit(x, y, max_iters=1000, print_epochs=50)

pred2 = model.predict(x)

[18:16:28]: Epoch     50 | TrL: 0.983052 | FNorm: 0.018173 | PNorm: 2.305286 | WNz:    3 | Obj: 0.891585
[18:16:28]: Epoch    100 | TrL: 0.838999 | FNorm: 0.233676 | PNorm: 8.265625 | WNz:    3 | Obj: 0.651700
[18:16:28]: Epoch    150 | TrL: 0.660867 | FNorm: 0.905579 | PNorm: 16.263561 | WNz:    3 | Obj: 0.412063
[18:16:28]: Epoch    200 | TrL: 0.493434 | FNorm: 2.067374 | PNorm: 24.550514 | WNz:    3 | Obj: 0.245498
[18:16:28]: Epoch    250 | TrL: 0.354863 | FNorm: 3.497994 | PNorm: 31.906868 | WNz:    3 | Obj: 0.151452
[18:16:28]: Epoch    300 | TrL: 0.252491 | FNorm: 4.904628 | PNorm: 37.770725 | WNz:    3 | Obj: 0.108227
[18:16:28]: Epoch    350 | TrL: 0.197853 | FNorm: 6.051324 | PNorm: 41.960667 | WNz:    3 | Obj: 0.097040
[18:16:28]: Epoch    400 | TrL: 0.179405 | FNorm: 6.795781 | PNorm: 44.467602 | WNz:    3 | Obj: 0.098091
[18:16:28]: Epoch    450 | TrL: 0.165760 | FNorm: 7.111783 | PNorm: 45.474304 | WNz:    3 | Obj: 0.096916
[18:16:28]: Epoch    500 | TrL: 0.150727 | FNorm

In [95]:
# Create a figure
fig = go.Figure()

# Add the first scatter plot
fig.add_trace(
    go.Scatter3d(x=x[:, 0], y=x[:, 1], z=y, mode="markers", marker=dict(color="red"))
)

# Add the predictions from each method
fig.add_trace(
    go.Scatter3d(
        x=x[:, 0],
        y=x[:, 1],
        z=pred1.flatten(),
        mode="markers",
        marker=dict(color="blue"),
        name="Linear Forest",
    )
)

# fig.add_trace(
#     go.Scatter3d(
#         x=x[:, 0],
#         y=x[:, 1],
#         z=pred2.flatten(),
#         mode="markers",
#         marker=dict(color="green"),
#         name="Random Forest",
#     )
# )

# Update the layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

# Show the figure
fig.show()

In [None]:
np.random.seed(0)

x = np.random.randn(1000, 2)
y = np.sin(2 * x[:, 0]) + np.cos(2 * x[:, 1]) + np.random.randn(1000) * 0.1

# model = VarMaxForest(n_estimators=10, max_depth=3, min_samples_leaf=100)
model = JOPLEn(
    partitioner=VarMaxForestPartition,
    n_cells=8,
    random_state=0,
    n_partitions=100,
    part_kwargs={"min_samples_leaf": 20},
)

history = model.fit(x, y, max_iters=1000, print_epochs=50, mu=0.1)

predictions = model.predict(x).flatten()

[17:42:14]: Epoch     50 | TrL: 0.410175 | FNorm: 9.842708 | PNorm: 53.482536 | WNz:    3 | Obj: 0.254147
[17:42:14]: Epoch    100 | TrL: 0.340393 | FNorm: 12.674873 | PNorm: 60.407150 | WNz:    3 | Obj: 0.234227
[17:42:15]: Epoch    150 | TrL: 0.338747 | FNorm: 12.683508 | PNorm: 60.413597 | WNz:    3 | Obj: 0.233282
[17:42:15]: Epoch    200 | TrL: 0.345817 | FNorm: 12.225052 | PNorm: 59.330666 | WNz:    3 | Obj: 0.233151
[17:42:15]: Epoch    250 | TrL: 0.341691 | FNorm: 12.475643 | PNorm: 59.938072 | WNz:    3 | Obj: 0.233046
[17:42:15]: Epoch    300 | TrL: 0.342397 | FNorm: 12.428420 | PNorm: 59.830765 | WNz:    3 | Obj: 0.233019
[17:42:15]: Epoch    350 | TrL: 0.343110 | FNorm: 12.383019 | PNorm: 59.690998 | WNz:    3 | Obj: 0.233014
[17:42:15]: Epoch    400 | TrL: 0.342525 | FNorm: 12.418893 | PNorm: 59.805496 | WNz:    3 | Obj: 0.233003
[17:42:15]: Epoch    450 | TrL: 0.342350 | FNorm: 12.429291 | PNorm: 59.826954 | WNz:    3 | Obj: 0.232997
[17:42:15]: Epoch    500 | TrL: 0.3430

In [None]:
# Create a figure
fig = go.Figure()

# # Add the first scatter plot
# fig.add_trace(
#     go.Scatter3d(x=x[:, 0], y=x[:, 1], z=y, mode="markers", marker=dict(color="red"))
# )

# Add the predictions
fig.add_trace(
    go.Scatter3d(
        x=x[:, 0],
        y=x[:, 1],
        z=predictions,
        mode="markers",
        marker=dict(color="blue"),
    )
)

# Update the layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

# Show the figure
fig.show()

In [None]:
from JOPLEn.partitioner import RFPartition

model = JOPLEn(
    partitioner=RFPartition,
    n_cells=8,
    random_state=0,
    n_partitions=100,
    part_kwargs={"min_samples_leaf": 20},
)

history = model.fit(x, y, max_iters=1000, print_epochs=50, mu=0.1)

predictions = model.predict(x).flatten()

[17:42:15]: Epoch     50 | TrL: 0.392925 | FNorm: 2.895354 | PNorm: 28.851131 | WNz:    3 | Obj: 0.171649
[17:42:16]: Epoch    100 | TrL: 0.364995 | FNorm: 4.141450 | PNorm: 34.001842 | WNz:    3 | Obj: 0.164635
[17:42:16]: Epoch    150 | TrL: 0.361766 | FNorm: 4.352855 | PNorm: 34.771130 | WNz:    3 | Obj: 0.164592
[17:42:16]: Epoch    200 | TrL: 0.366198 | FNorm: 4.046901 | PNorm: 33.577728 | WNz:    3 | Obj: 0.164497
[17:42:16]: Epoch    250 | TrL: 0.365413 | FNorm: 4.098181 | PNorm: 33.824806 | WNz:    3 | Obj: 0.164482
[17:42:16]: Epoch    300 | TrL: 0.364668 | FNorm: 4.145901 | PNorm: 33.967735 | WNz:    3 | Obj: 0.164460
[17:42:16]: Epoch    350 | TrL: 0.364873 | FNorm: 4.132380 | PNorm: 33.916092 | WNz:    3 | Obj: 0.164461
[17:42:16]: Epoch    400 | TrL: 0.365041 | FNorm: 4.120670 | PNorm: 33.903099 | WNz:    3 | Obj: 0.164458
[17:42:16]: Epoch    450 | TrL: 0.365178 | FNorm: 4.111416 | PNorm: 33.840302 | WNz:    3 | Obj: 0.164456
[17:42:16]: Epoch    500 | TrL: 0.364888 | FNo

In [None]:
# Create a figure
fig = go.Figure()

# # Add the first scatter plot
# fig.add_trace(
#     go.Scatter3d(x=x[:, 0], y=x[:, 1], z=y, mode="markers", marker=dict(color="red"))
# )

# Add the predictions
fig.add_trace(
    go.Scatter3d(
        x=x[:, 0],
        y=x[:, 1],
        z=predictions,
        mode="markers",
        marker=dict(color="blue"),
    )
)

# Update the layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

# Show the figure
fig.show()