In [1]:
import optuna
import numpy as np
from lightgbm import LGBMRegressor, LGBMClassifier
from sklearn.metrics import mean_squared_error, log_loss
from sklearn.datasets import fetch_covtype, fetch_california_housing
from sklearn.model_selection import train_test_split, cross_validate
from perpetual import PerpetualBooster

In [2]:
!python --version

Python 3.10.14


In [3]:
from importlib.metadata import version

print(f"numpy: {version('numpy')}")
print(f"optuna: {version('optuna')}")
print(f"lightgbm: {version('lightgbm')}")
print(f"scikit-learn: {version('scikit-learn')}")
print(f"perpetual: {version('perpetual')}")

numpy: 1.26.4
optuna: 3.6.0
lightgbm: 4.3.0
scikit-learn: 1.4.2
perpetual: 0.1.0


In [4]:
task_is_cal_housing = True  # change to False for Cover Types task.

In [5]:
seed = 4   # average results are reported for 5 seeds -> [0, 1, 2, 3, 4]
n_estimators = 1  # results are reported for 100, 200, 300 n_estimators.
n_trials = 1

In [6]:
if task_is_cal_housing:
    data, target = fetch_california_housing(return_X_y=True, as_frame=True)
    scoring = "neg_mean_squared_error"
    metric_function = mean_squared_error
    metric_name = "mse"
    LGBMBooster = LGBMRegressor
    objective_type = "SquaredLoss"
else:
    data, target = fetch_covtype(return_X_y=True, as_frame=True)
    scoring = "neg_log_loss"
    metric_function = log_loss
    metric_name = "log_loss"
    LGBMBooster = LGBMClassifier
    objective_type = "LogLoss"

In [7]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2248, random_state=seed)

print(f"len(X_train): {len(X_train)}")
print(f"len(X_test): {len(X_test)}")

len(X_train): 16000
len(X_test): 4640


In [8]:
best_cv_results = None
cv_results = None

def save_best_cv_results(study, trial):
    global best_cv_results
    if study.best_trial.number == trial.number:
        best_cv_results = cv_results

In [9]:
def objective_function(trial):
    global cv_results
    params = {
        'seed': seed,
        'verbosity': -1,
        'n_estimators': n_estimators,
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.5, log=True),
        'min_split_gain': trial.suggest_float('min_split_gain', 1e-6, 1.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-6, 1.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-6, 1.0, log=True),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0),
        'subsample': trial.suggest_float('subsample', 0.2, 1.0),
        'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
        'max_depth': trial.suggest_int('max_depth', 3, 33),
        'num_leaves': trial.suggest_int('num_leaves', 2, 1024),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 100),
    }
    model = LGBMBooster(**params)
    cv_results = cross_validate(model, X_train, y_train, cv=5, scoring=scoring, return_train_score=True, return_estimator=True)
    return -1 * np.mean(cv_results['test_score'])

In [10]:
sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction='minimize', sampler=sampler)

[I 2024-07-10 12:37:35,963] A new study created in memory with name: no-name-4c6a881c-9102-44d9-b018-3af8d37cb2ae


In [11]:
%%time
study.optimize(objective_function, n_trials=n_trials, callbacks=[save_best_cv_results])

[I 2024-07-10 12:37:36,403] Trial 0 finished with value: 1.0644386016870766 and parameters: {'learning_rate': 0.4073657656436648, 'min_split_gain': 0.0019204079494910193, 'reg_alpha': 0.685655809011563, 'reg_lambda': 0.019448941142879615, 'colsample_bytree': 0.7581830596778167, 'subsample': 0.3728715964643011, 'subsample_freq': 10, 'max_depth': 3, 'num_leaves': 260, 'min_child_samples': 44}. Best is trial 0 with value: 1.0644386016870766.


CPU times: total: 422 ms
Wall time: 404 ms


In [12]:
print(f"Number of finished trials: {len(study.trials)}")
print(f"Best trial:")
print(f"  Number: {study.best_trial.number}")
print(f"  Value: {study.best_trial.value}")
print(f"  Params: ")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

Number of finished trials: 1
Best trial:
  Number: 0
  Value: 1.0644386016870766
  Params: 
    learning_rate: 0.4073657656436648
    min_split_gain: 0.0019204079494910193
    reg_alpha: 0.685655809011563
    reg_lambda: 0.019448941142879615
    colsample_bytree: 0.7581830596778167
    subsample: 0.3728715964643011
    subsample_freq: 10
    max_depth: 3
    num_leaves: 260
    min_child_samples: 44


In [13]:
print(f"CV train scores: {-1 * best_cv_results['train_score']}")
print(f"CV train scores average : {round(np.mean(-1 * best_cv_results['train_score']), 6)}")
print(f"CV valid scores: {-1 * best_cv_results['test_score']}")
print(f"CV valid scores average : {round(np.mean(-1 * best_cv_results['test_score']), 6)}")

CV train scores: [1.05503594 1.06615895 1.0512751  1.0563806  1.0637785 ]
CV train scores average : 1.058526
CV valid scores: [1.08474222 1.06704007 1.08133229 1.06841159 1.02066684]
CV valid scores average : 1.064439


In [14]:
models = best_cv_results["estimator"]

In [15]:
for i, model in enumerate(models):
    y_pred = model.predict_proba(X_train) if metric_name == "log_loss" else model.predict(X_train)
    print(f"Model {i}, train {metric_name}: {round(metric_function(y_train, y_pred), 6)}")

Model 0, train mse: 1.060977
Model 1, train mse: 1.066335
Model 2, train mse: 1.057287
Model 3, train mse: 1.058787
Model 4, train mse: 1.055156


In [16]:
for i, model in enumerate(models):
    y_pred = model.predict_proba(X_test) if metric_name == "log_loss" else model.predict(X_test)
    print(f"Model {i}, test {metric_name}: {round(metric_function(y_test, y_pred), 6)}")

Model 0, test mse: 1.035823
Model 1, test mse: 1.042579
Model 2, test mse: 1.032748
Model 3, test mse: 1.039261
Model 4, test mse: 1.031398


In [17]:
if metric_name == "log_loss":
    y_pred = np.mean([model.predict_proba(X_train) for model in models], axis=0)
else:
    y_pred = np.mean([model.predict(X_train) for model in models], axis=0)
print(f"Train {metric_name}: {round(metric_function(y_train, y_pred), 6)}")

Train mse: 1.049704


In [18]:
if metric_name == "log_loss":
    y_pred = np.mean([model.predict_proba(X_test) for model in models], axis=0)
else:
    y_pred = np.mean([model.predict(X_test) for model in models], axis=0)
print(f"Test {metric_name}: {round(metric_function(y_test, y_pred), 6)}")

Test mse: 1.02622


<style scoped>
table {
  font-size: 12px;
}
</style>

| LightGBM n_estimators | Seed | LightGBM mse | LightGBM cpu time |
| --------------------- | ---- | ------------ | ----------------- |
| 100                   | 0    | 0.186588     | 729               |
| 100                   | 1    | 0.194348     | 1294              |
| 100                   | 2    | 0.197862     | 990               |
| 100                   | 3    | 0.188629     | 1143              |
| 100                   | 4    | 0.194338     | 860               |
| 100                   | avg  | 0.192196     | 978               |
| 300                   | 0    | 0.185100     | 2589              |
| 300                   | 1    | 0.192767     | 3650              |
| 300                   | 2    | 0.190481     | 2746              |
| 300                   | 3    | 0.182359     | 2782              |
| 300                   | 4    | 0.191614     | 3871              |
| 300                   | avg  | 0.188464     | 3128              |

In [19]:
model = PerpetualBooster(objective=objective_type)

In [20]:
%%time
model.fit(X_train, y_train, budget=1.5)

CPU times: total: 21.6 s
Wall time: 20.7 s


<perpetual.PerpetualBooster at 0x1163632a260>

In [21]:
if metric_name == "log_loss":
    y_pred = model.predict_proba(X_test)
else:
    y_pred = model.predict(X_test)
print(f"Test {metric_name}: {round(metric_function(y_test, y_pred), 6)}")

Test mse: 0.192352


In [22]:
model.number_of_trees

244

### target_loss_decrement = (budget / 10.0) * eta * loss_avg;

<style scoped>
table {
  font-size: 12px;
}
</style>

| Perpetual budget | Seed | Perpetual mse | Perpetual cpu time |
| ---------------- | ---- | ------------- | ------------------ |
| 1.0              | 0    | 0.187273      | 9.23               |
| 1.0              | 1    | 0.189911      | 10.5               |
| 1.0              | 2    | 0.194937      | 11.0               |
| 1.0              | 3    | 0.182932      | 9.77               |
| 1.0              | 4    | 0.198443      | 9.88               |
| 1.0              | avg  | 0.190699      | 10.1               |
| 1.5              | 0    | 0.185843      | 28.6               |
| 1.5              | 1    | 0.188146      | 26.8               |
| 1.5              | 2    | 0.190484      | 26.6               |
| 1.5              | 3    | 0.178708      | 25.1               |
| 1.5              | 4    | 0.192352      | 21.6               |
| 1.5              | avg  | 0.187107      | 25.7               |

### tld = eta * eta * loss_avg

<style scoped>
table {
  font-size: 12px;
}
</style>

| Perpetual budget | Seed | Perpetual mse | Perpetual cpu time |
| ---------------- | ---- | ------------- | ------------------ |
| 1.1              | 0    | 0.190265      | 8.27               |
| 1.1              | 1    | 0.190839      | 8.81               |
| 1.1              | 2    | 0.198457      | 11.2               |
| 1.1              | 3    | 0.181992      | 8.94               |
| 1.1              | 4    | 0.199403      | 7.47               |
| 1.1              | avg  | 0.192191      | 8.94               |
| 1.4              | 0    | 0.189875      | 16.8               |
| 1.4              | 1    | 0.186736      | 20.0               |
| 1.4              | 2    | 0.191496      | 21.1               |
| 1.4              | 3    | 0.180240      | 19.2               |
| 1.4              | 4    | 0.197255      | 18.3               |
| 1.4              | avg  | 0.189120      | 19.1               |
| 1.5              | 0    | 0.189845      | 20.5               |
| 1.5              | 1    | 0.188703      | 23.9               |
| 1.5              | 2    | 0.195430      | 31.3               |
| 1.5              | 3    | 0.179527      | 27.6               |
| 1.5              | 4    | 0.196902      | 23.2               |
| 1.5              | avg  | 0.190081      | 25.3               |
| 1.6              | 0    | 0.188318      | 28.4               |
| 1.6              | 1    | 0.187110      | 31.8               |
| 1.6              | 2    | 0.195210      | 37.9               |
| 1.6              | 3    | 0.179427      | 33.9               |
| 1.6              | 4    | 0.197369      | 28.1               |
| 1.6              | avg  | 0.189487      | 32.0               |