In [None]:
import pandas as pd

quick = True             # work with sampled data to reduce computing time
run_gridSearchCV = False # run or not hyperparameters optimization with GridSearchCV()
run_optuna = True        # run or not hyperparameters optimization with Optuna

filename = 'df-light.pkl' if quick else 'df-full.pkl'

df = pd.read_pickle(f'./{filename}')
data = df.iloc[:, 1:]
target = df['grav']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=222)

In [None]:
import time
import optuna
from sklearn.model_selection import cross_val_score
from catboost import CatBoostClassifier

if run_optuna:

    start_time = time.time()

    # 1. Define an objective function to be maximized.
    def objective(trial):

        # 2. Suggest values for the hyperparameters using a trial object.
        classifier_name = trial.suggest_categorical('classifier', ['CatBoostClassifier'])
        if classifier_name == 'CatBoostClassifier':
            dt_iterations    = trial.suggest_int('iterations', 100, 200)
            dt_learning_rate = trial.suggest_float('learning_rate', 0.01, 0.1)

            classifier_obj = CatBoostClassifier(iterations=dt_iterations, learning_rate=dt_learning_rate, cat_features=list(X_train.columns))

            score = cross_val_score(classifier_obj, X_train, y_train, cv=3, scoring="f1", verbose=1)
            accuracy = score.mean()

        return accuracy

    # 3. Create a study object and optimize the objective function.
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=10)

    print("--- Optimization with Optuna performed in %s seconds ---" % (time.time() - start_time))

    fig = optuna.visualization.plot_param_importances(study)
    fig.show()