# Oblique decision trees and random forests
> This should also be a wrapper, I think

## Import and install packages

In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
!pip install numba
!pip install scikit-obliquetree



In [3]:
import scikit_obliquetree
import numba

print(scikit_obliquetree.__version__)
print(numba.__version__)

0.1.4
0.61.0


In [4]:
# There are 3 oblique algorihms: BUTIF, HHCART, and CO2
# Let's grab all of them
from scikit_obliquetree.BUTIF import BUTIF

from scikit_obliquetree.HHCART import HouseHolderCART
from scikit_obliquetree.segmentor import MeanSegmentor, MSE

from scikit_obliquetree.CO2 import ContinuouslyOptimizedObliqueRegressionTree

## Inspect HHCart

In [5]:
from hyperdt.toy_data import wrapped_normal_mixture
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data, labels = wrapped_normal_mixture(num_dims=2, num_classes=2, num_points=1000, seed=42, noise_std=2)
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Fit a BUTIF tree
hhcart = HouseHolderCART(task="classification", impurity=MSE(), segmentor=MeanSegmentor())
hhcart.fit(X_train, y_train)
print(accuracy_score(y_test, hhcart.predict(X_test)))

0.825


In [6]:
hhcart._root.__dict__

{'depth': 0,
 'labels': array([1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1,
        1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
        1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1,
        1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0,
        1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
        0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
        0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0,
        0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
        1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
        1, 0, 1

The decision rule for HHCart is:

```
X @ weights[:-1] < weights[-1]
```

## Testing our implementation

In [7]:
from hyperdt.oblique import HyperbolicHouseHolderClassifier

h_hhcart = HyperbolicHouseHolderClassifier()
h_hhcart.fit(X_train, y_train)
print(accuracy_score(y_test, h_hhcart.predict(X_test)))

0.81


## Same thing for CO2

In [8]:
co2 = ContinuouslyOptimizedObliqueRegressionTree(impurity=MSE(), segmentor=MeanSegmentor(), task="classification")
co2.fit(X_train, y_train)
y_pred = (co2.predict(X_test) > 0.5).astype(int)
print(accuracy_score(y_test, y_pred))

0.845


In [18]:
from hyperdt.oblique import HyperbolicContinuouslyOptimizedClassifier

h_co2 = HyperbolicContinuouslyOptimizedClassifier(midpoint_method="naive")
h_co2.fit(X_train, y_train)
y_pred = h_co2.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.83


In [27]:
# Test the regressor

from hyperdt.oblique import HyperbolicContinuouslyOptimizedRegressor
from sklearn.metrics import mean_squared_error

h_co2 = HyperbolicContinuouslyOptimizedRegressor(midpoint_method="naive")
h_co2.fit(X_train, y_train.astype(float))
y_pred = h_co2.predict(X_test)
print(mean_squared_error(y_test.astype(float), y_pred))

0.11212182968717822


In [35]:
def hyperbolic_data():
    """Generate hyperbolic data for testing."""
    # Generate data that satisfies hyperboloid constraints
    n_samples = 100
    n_features = 5
    manifold_dim = n_features - 1

    # Use the wrapped_normal_mixture function from toy_data
    X, y_class = wrapped_normal_mixture(num_points=n_samples, num_classes=3, num_dims=manifold_dim, seed=42)

    # Create regression targets
    y_reg = np.sin(X[:, 1]) + np.cos(X[:, 2]) + 0.1 * np.random.randn(len(X))

    return X, y_class, y_reg


# def test_co2_classifier(hyperbolic_data):
#     """Test that HyperbolicContinuouslyOptimizedClassifier works."""
#     X, y_class, _ = hyperbolic_data

#     # Test CO2 classifier
#     co2_clf = HyperbolicContinuouslyOptimizedClassifier(
#         max_depth=3, curvature=1.0, timelike_dim=0, validate_input_geometry=False
#     )
#     co2_clf.fit(X, y_class)
#     co2_y_pred = co2_clf.predict(X)

#     assert co2_y_pred.shape == y_class.shape

X, y_class, y_reg = hyperbolic_data()

binary_y = (y_class <= 1).astype(int)  # Convert to binary: class 0 and classes 1,2 combined

# Test CO2 classifier
co2_clf = HyperbolicContinuouslyOptimizedClassifier(
    max_depth=3, curvature=1.0, timelike_dim=0, validate_input_geometry=False
)
co2_clf.fit(X, binary_y)
co2_y_pred = co2_clf.predict(X)

assert co2_y_pred.shape == binary_y.shape

co2_reg = HyperbolicContinuouslyOptimizedRegressor(
    max_depth=3, curvature=1.0, timelike_dim=0, validate_input_geometry=False
)
co2_reg.fit(X, y_reg)
co2_y_pred_reg = co2_reg.predict(X)

assert co2_y_pred_reg.shape == y_reg.shape