In [1]:
from sklearn.datasets import load_iris

df = load_iris(as_frame=True).frame
X = df.drop(columns='target')
y = df['target']

# BaselineModel test

In [2]:
from semiq_ml.baseline_model import BaselineModel

baseline = BaselineModel(
    task_type="classification",
    metric="f1_weighted",
    models="gbm",
    random_state=42,
)
baseline.fit(X, y)

baseline.get_results()

  from .autonotebook import tqdm as notebook_tqdm
2025-06-12 10:18:43,571 - INFO - Starting BaselineModel training for classification with metric: f1_weighted (Maximize: True)
2025-06-12 10:18:43,571 - INFO - Validation set size: 20%
2025-06-12 10:18:43,572 - INFO - Preprocessor type: general_ohe
2025-06-12 10:18:43,572 - INFO - Numeric columns: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
2025-06-12 10:18:43,572 - INFO - Categorical columns: []
2025-06-12 10:18:43,609 - INFO -   LGBM f1_weighted: train=1.0000, val=0.8997 (Time: 0.04s)
2025-06-12 10:18:43,610 - INFO -   --> NEW BEST model: LGBM with f1_weighted: 0.8997
2025-06-12 10:19:09,777 - INFO -   XGBoost f1_weighted: train=1.0000, val=0.9333 (Time: 26.17s)
2025-06-12 10:19:09,778 - INFO -   --> NEW BEST model: XGBoost with f1_weighted: 0.9333
2025-06-12 10:19:10,032 - INFO -   CatBoost f1_weighted: train=1.0000, val=0.9666 (Time: 0.25s)
2025-06-12 10:19:10,032 - INFO -   --> NEW BEST model: 

Unnamed: 0,model,train_score,val_score,fit_time,preprocessor_used,status,error_message
0,LGBM,1.0,0.899749,0.037673,general_ohe,Success,
1,XGBoost,1.0,0.933333,26.167119,general_ohe,Success,
2,CatBoost,1.0,0.966583,0.253307,catboost_internal,Success,


# OptunaOptimizer

In [3]:
from semiq_ml.tuning import OptunaOptimizer
tuner = OptunaOptimizer(
    task_type="classification",
    metric="f1_weighted",
    n_trials=1,
    gpu=True,
)


tuned_xgb = tuner.tune_model("LGBM", X, y, n_jobs=1)
tuner.get_tuning_results()

2025-06-12 10:19:10,042 - INFO - Setting number of classes to 3
2025-06-12 10:19:10,043 - INFO - Starting hyperparameter optimization for LGBM
2025-06-12 10:19:10,043 - INFO - Number of trials: 1, timeout: None
[I 2025-06-12 10:19:10,043] A new study created in memory with name: no-name-2c2c3ee6-803d-4571-bf71-4db3fa694cc5
  0%|          | 0/1 [00:00<?, ?it/s]2025-06-12 10:19:10,048 - INFO - Preprocessor type: general_ohe
2025-06-12 10:19:10,048 - INFO - Numeric columns: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
2025-06-12 10:19:10,048 - INFO - Categorical columns: []
2025-06-12 10:19:10,305 - INFO - Trial 0 for LGBM - f1_weighted: 0.9666 (Time: 0.26s)
Best trial: 0. Best value: 0.966583: 100%|██████████| 1/1 [00:00<00:00,  3.80it/s]
2025-06-12 10:19:10,309 - INFO - Optimization completed. Best score: 0.9665831244778613
2025-06-12 10:19:10,309 - INFO - Best parameters: {'boosting_type': 'dart', 'learning_rate': 0.07661100707771368, 'num_leaves':

[I 2025-06-12 10:19:10,306] Trial 0 finished with value: 0.9665831244778613 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.07661100707771368, 'num_leaves': 52, 'max_depth': 1, 'min_data_in_leaf': 15, 'min_sum_hessian_in_leaf': 2.9154431891537547, 'feature_fraction': 0.8005575058716043, 'bagging_fraction': 0.8540362888980227, 'bagging_freq': 1, 'lambda_l1': 7.579479953348009, 'lambda_l2': 2.1368329072358767, 'min_gain_to_split': 0.21233911067827616, 'extra_trees': False, 'n_estimators': 339, 'device': 'gpu', 'gpu_platform_id': 0, 'gpu_device_id': 0}. Best is trial 0 with value: 0.9665831244778613.


{'best_params': {'boosting_type': 'dart',
  'learning_rate': 0.07661100707771368,
  'num_leaves': 52,
  'max_depth': 1,
  'min_data_in_leaf': 15,
  'min_sum_hessian_in_leaf': 2.9154431891537547,
  'feature_fraction': 0.8005575058716043,
  'bagging_fraction': 0.8540362888980227,
  'bagging_freq': 1,
  'lambda_l1': 7.579479953348009,
  'lambda_l2': 2.1368329072358767,
  'min_gain_to_split': 0.21233911067827616,
  'extra_trees': False,
  'n_estimators': 339,
  'device': 'gpu',
  'gpu_platform_id': 0,
  'gpu_device_id': 0},
 'best_value': 0.9665831244778613,
 'best_trial': FrozenTrial(number=0, state=1, values=[0.9665831244778613], datetime_start=datetime.datetime(2025, 6, 12, 10, 19, 10, 45323), datetime_complete=datetime.datetime(2025, 6, 12, 10, 19, 10, 306584), params={'boosting_type': 'dart', 'learning_rate': 0.07661100707771368, 'num_leaves': 52, 'max_depth': 1, 'min_data_in_leaf': 15, 'min_sum_hessian_in_leaf': 2.9154431891537547, 'feature_fraction': 0.8005575058716043, 'bagging_fra