[UC12] Tune custom classifier hyperparameters

1. Define custom classifier

In [1]:
from lightgbm import LGBMClassifier
import mlflow
import optuna
import numpy as np
from typing import Union, Dict
from sklearn.pipeline import Pipeline

from mlcb.base_classes.BaseTunableModel import BaseTunableModel

class LGBMTunable(BaseTunableModel):
    def _train(self, hyperparameters: Dict[str, Union[int, float, str]]) -> Pipeline:
        model = LGBMClassifier(**hyperparameters)
        model.fit(self.train_features, self.train_labels)
        return model
    
    def _evaluate(self, model: Pipeline) -> Union[np.ndarray, np.ndarray]:
        predictions = model.predict_proba(self.test_features)
        return predictions

    def _save_model(self, model: Pipeline, signature: Any) -> None:
        """
        Saves the trained model using MLFlow.

        Args:
            model (Pipeline): The trained scikit-learn model to save.
            signature (Any): The input/output signature for the model.
        """
        mlflow.lightgbm.log_model(model, "trained_best_model", signature=signature)

    def _suggest_hyperparameters(self, trial: optuna.Trial) -> Dict[str, Union[int, float, str]]:
        return {
            'num_leaves': trial.suggest_int('num_leaves', 31, 256),
            'max_depth': trial.suggest_int('max_depth', -1, 16),
            'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1e-1, log=True),
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
            'subsample': trial.suggest_float('subsample', 0.4, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
            'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 20),
            'min_data_in_bin': trial.suggest_int('min_data_in_bin', 1, 20)
        }

1. Load dataset

In [2]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

X, y = load_wine(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

2. Define experiment parameters

In [3]:
EXPERIMENT_NAME = 'UC12_tune_custom_model'
TUNING_TRIALS = 50
classifier = LGBMTunable(X_train, y_train, X_test, y_test)

TypeError: Can't instantiate abstract class LGBMTunable without an implementation for abstract methods '_evaluate', '_save_model', '_train'

3. Run tuning

In [None]:
classifier.tune(n_trials=TUNING_TRIALS, experiment_name=EXPERIMENT_NAME, log_nested=True)

4. Analyze tuning process

Tuning statistics are available on address: http://127.0.0.1:5000/.

5. Close MLFlow for this experiment


In [None]:
classifier.logger._close_mlflow()