In [20]:
from sklearn.ensemble import GradientBoostingClassifier
from JOPLEn.singletask import JOPLEn, LogisticLoss
from JOPLEn.partitioner import GBPartition
from JOPLEn.enums import CellModel
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.datasets import make_classification

In [21]:
# Create a synthetic dataset
x, y = make_classification(n_samples=1000, n_features=20, random_state=0)

# Split the dataset into a training and a test set
x_train, x_test = x[:800], x[800:]
y_train, y_test = y[:800], y[800:]

In [22]:
# Train a gradient boosting model
clf = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=1.0,
    max_depth=1,
    random_state=0,
    loss="log_loss",
)

clf.fit(x_train, y_train)

# Evaluate the model
y_pred = clf.predict(x_test)
y_pred_proba = clf.predict_proba(x_test)[:, 1]

# using prebabilities
print(f"Log loss: {log_loss(y_test, y_pred_proba):.2f}")
print(f"AUC: {roc_auc_score(y_test, y_pred_proba):.2f}")

# using predictions
print(f"Log loss: {log_loss(y_test, y_pred):.2f}")
print(f"AUC: {roc_auc_score(y_test, y_pred):.2f}")

Log loss: 0.24
AUC: 0.97
Log loss: 1.62
AUC: 0.96


In [36]:
jp = JOPLEn(
    loss_fn=LogisticLoss,
    partitioner=GBPartition,
    random_state=0,
    n_cells=10,
    n_partitions=10,
    cell_model=CellModel.constant,
)

jp.fit(x_train, y_train, verbose=True, max_iters=400, alpha=0, lam=0.01, mu=0.01)

y_pred_proba = jp.predict(x_test)
y_pred = (y_pred_proba > 0.5).astype(int)

# using probabilities
print(f"Log loss: {log_loss(y_test, y_pred_proba):.2f}")
print(f"AUC: {roc_auc_score(y_test, y_pred_proba):.2f}")

# using predictions
print(f"Log loss: {log_loss(y_test, y_pred):.2f}")
print(f"AUC: {roc_auc_score(y_test, y_pred):.2f}")

[04:04:07]: Epoch    100 | TrL: 0.506250 | FNorm: 0.136372 | PNorm: 1.167785 | WNz:    1 | Obj: 0.585158
[04:04:07]: Epoch    200 | TrL: 0.506250 | FNorm: 1.514515 | PNorm: 3.891678 | WNz:    1 | Obj: 0.389995
[04:04:07]: Epoch    300 | TrL: 0.506250 | FNorm: 4.896377 | PNorm: 6.997412 | WNz:    1 | Obj: 0.250075
[04:04:07]: Epoch    400 | TrL: 0.506250 | FNorm: 9.348858 | PNorm: 9.668949 | WNz:    1 | Obj: 0.180537
Log loss: 0.20
AUC: 0.98
Log loss: 2.52
AUC: 0.93


In [37]:
[(int(x), float(y), int(z)) for x, y, z in zip(y_test, y_pred_proba, y_pred)]

[(1, 0.8988485336303711, 1),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (1, 0.8988485336303711, 1),
 (1, 0.7403137683868408, 1),
 (1, 0.8988485336303711, 1),
 (1, 0.8988485336303711, 1),
 (0, 0.1066424548625946, 0),
 (1, 0.8988485336303711, 1),
 (1, 0.8988485336303711, 1),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (1, 0.8988485336303711, 1),
 (1, 0.8016042113304138, 1),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.15806594491004944, 0),
 (1, 0.8988485336303711, 1),
 (0, 0.1066424548625946, 0),
 (1, 0.8988485336303711, 1),
 (0, 0.1066424548625946, 0),
 (1, 0.8988485336303711, 1),
 (0, 0.2555866539478302, 0),
 (1, 0.8988485336303711, 1),
 (1, 0.8988485336303711, 1),
 (0, 0.15806594491004944, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.1066424548625946, 0),
 (0, 0.10664

In [38]:
from lightgbm import LGBMClassifier
from sklearn.base import ClassifierMixin

issubclass(LGBMClassifier, ClassifierMixin)

True

In [39]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

encoder = LabelEncoder()

data = np.array([0.0, 2.0, 0.0, 2.0])
encoder.fit_transform(data)

array([0, 1, 0, 1])

In [42]:
from JOPLEn.singletask import LogisticLoss
import cupy

w = jp.w
s = jp._get_cells(x_train)

x = cupy.array(np.ones((y_train.shape[0], 1)))

# x.shape, w.shape

loss = LogisticLoss(10)

loss.grad(w, x, cupy.array(2 * y_train - 1)[:, None], s)
# loss.predict(w, x, s)

array([[ 1.87499022e-03, -5.06571835e-04, -3.40375662e-04,
         4.95325612e-04,  0.00000000e+00, -3.57401928e-04,
         3.41540129e-04, -3.98488790e-03,  0.00000000e+00,
         0.00000000e+00,  1.52062896e-03, -4.26606912e-04,
        -4.61134471e-04,  8.73241529e-04,  2.78838194e-04,
        -5.98732379e-04, -1.50767912e-04, -3.51284838e-03,
         0.00000000e+00,  0.00000000e+00, -6.61710263e-04,
         1.75573231e-03, -3.40375662e-04,  7.89990464e-04,
         0.00000000e+00, -3.57401928e-04, -1.50767912e-04,
        -3.51284838e-03,  0.00000000e+00,  0.00000000e+00,
         1.87499022e-03, -5.06571835e-04, -9.83118411e-05,
        -1.92789495e-04, -5.94221574e-04,  6.82870931e-04,
         3.41540129e-04, -3.98488790e-03,  0.00000000e+00,
         0.00000000e+00, -8.14196052e-04,  2.11153064e-03,
        -9.83118411e-05, -1.92789495e-04,  0.00000000e+00,
         1.59733150e-04,  3.41540129e-04, -3.98488790e-03,
         0.00000000e+00,  0.00000000e+00, -1.80502168e-0

In [47]:
np.set_printoptions(threshold=np.inf)

y_train

array([0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,