In [1]:
import time
import pandas as pd
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
from perpetual import PerpetualBooster

In [2]:
from importlib.metadata import version

print(f"scikit-learn: {version('scikit-learn')}")
print(f"perpetual: {version('perpetual')}")

scikit-learn: 1.5.1
perpetual: 0.5.0


In [3]:
def evaluate(model, X_train, y_train, X_test, y_test, budget=None):
    start = time.time()
    model.fit(X_train, y_train, budget=budget) if budget else model.fit(X_train, y_train)
    duration = time.time() - start
    return duration, accuracy_score(y_test, model.predict(X_test)), log_loss(y_test, model.predict_proba(X_test))

datasets = {"Breast Cancer": load_breast_cancer(return_X_y=True), "Binary Iris": (load_iris(return_X_y=True)[0][load_iris().target!=2], load_iris(return_X_y=True)[1][load_iris().target!=2])}
results = pd.DataFrame(columns=["Dataset", "Model", "Budget", "Time", "Accuracy", "Log Loss"])

for name, (X, y) in datasets.items():
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    pb = PerpetualBooster(objective="LogLoss")
    rf = RandomForestClassifier()
    results = pd.concat([results,
                         pd.DataFrame([[name, "Perpetual", "0.1", *evaluate(pb, X_train, y_train, X_test, y_test, budget=0.1)]], columns=results.columns),
                         pd.DataFrame([[name, "Perpetual", "1.0", *evaluate(pb, X_train, y_train, X_test, y_test, budget=1.0)]], columns=results.columns),
                         pd.DataFrame([[name, "Perpetual", "2.0", *evaluate(pb, X_train, y_train, X_test, y_test, budget=2.0)]], columns=results.columns),
                         pd.DataFrame([[name, "RF", "-", *evaluate(rf, X_train, y_train, X_test, y_test)]], columns=results.columns),
                        ],
                    ignore_index=True)

Reached iteration limit before auto stopping. Try to decrease the budget for the best performance.
Reached iteration limit before auto stopping. Try to decrease the budget for the best performance.
Reached iteration limit before auto stopping. Try to decrease the budget for the best performance.
  results = pd.concat([results,
Reached iteration limit before auto stopping. Try to decrease the budget for the best performance.
Reached iteration limit before auto stopping. Try to decrease the budget for the best performance.
Reached iteration limit before auto stopping. Try to decrease the budget for the best performance.


In [4]:
results

Unnamed: 0,Dataset,Model,Budget,Time,Accuracy,Log Loss
0,Breast Cancer,Perpetual,0.1,149.592308,0.973684,0.158678
1,Breast Cancer,Perpetual,1.0,129.906461,0.973684,0.12322
2,Breast Cancer,Perpetual,2.0,155.879312,0.973684,0.099885
3,Breast Cancer,RF,-,0.522181,0.964912,0.103776
4,Binary Iris,Perpetual,0.1,0.335295,1.0,3.2e-05
5,Binary Iris,Perpetual,1.0,0.378495,1.0,0.000273
6,Binary Iris,Perpetual,2.0,0.334572,1.0,0.004814
7,Binary Iris,RF,-,0.305424,1.0,0.002518


In [5]:
results.to_markdown()

'|    | Dataset       | Model     | Budget   |       Time |   Accuracy |    Log Loss |\n|---:|:--------------|:----------|:---------|-----------:|-----------:|------------:|\n|  0 | Breast Cancer | Perpetual | 0.1      | 149.592    |   0.973684 | 0.158678    |\n|  1 | Breast Cancer | Perpetual | 1.0      | 129.906    |   0.973684 | 0.12322     |\n|  2 | Breast Cancer | Perpetual | 2.0      | 155.879    |   0.973684 | 0.0998845   |\n|  3 | Breast Cancer | RF        | -        |   0.522181 |   0.964912 | 0.103776    |\n|  4 | Binary Iris   | Perpetual | 0.1      |   0.335295 |   1        | 3.21456e-05 |\n|  5 | Binary Iris   | Perpetual | 1.0      |   0.378495 |   1        | 0.000273461 |\n|  6 | Binary Iris   | Perpetual | 2.0      |   0.334572 |   1        | 0.00481403  |\n|  7 | Binary Iris   | RF        | -        |   0.305424 |   1        | 0.00251769  |'

In [6]:
print(results.to_markdown())

|    | Dataset       | Model     | Budget   |       Time |   Accuracy |    Log Loss |
|---:|:--------------|:----------|:---------|-----------:|-----------:|------------:|
|  0 | Breast Cancer | Perpetual | 0.1      | 149.592    |   0.973684 | 0.158678    |
|  1 | Breast Cancer | Perpetual | 1.0      | 129.906    |   0.973684 | 0.12322     |
|  2 | Breast Cancer | Perpetual | 2.0      | 155.879    |   0.973684 | 0.0998845   |
|  3 | Breast Cancer | RF        | -        |   0.522181 |   0.964912 | 0.103776    |
|  4 | Binary Iris   | Perpetual | 0.1      |   0.335295 |   1        | 3.21456e-05 |
|  5 | Binary Iris   | Perpetual | 1.0      |   0.378495 |   1        | 0.000273461 |
|  6 | Binary Iris   | Perpetual | 2.0      |   0.334572 |   1        | 0.00481403  |
|  7 | Binary Iris   | RF        | -        |   0.305424 |   1        | 0.00251769  |
