In [1]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl

from sklearn.model_selection import StratifiedGroupKFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

import math
from scipy.stats import zscore
from numpy import nanmean, nanstd

from sklearn.metrics import roc_auc_score
from sklearn.ensemble import VotingClassifier

from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

import lightgbm as lgb
import catboost as cb
import xgboost as xgb

import random

import torch

import optuna

In [2]:
LGB = False
CB = False
XGB = False
OPTIMIZE_OPTUNA_LGB = True
OPTIMIZE_OPTUNA_CB = False
OPTIMIZE_OPTUNA_XGB = False

# lgb LIGHTGBM Score: 0.18075

# xgb XGBOOST Score: 0.18181

# CATBOOST Score: 0.17941


In [3]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(42)

In [4]:
# !python /kaggle/input/script-5-fold-effnetv1b0/main.py
# !mv submission.csv submission_effnetv1b0_oof.csv

In [5]:
# !python /kaggle/input/script-5-fold-resnest101/main.py
# !mv submission.csv submission_resnest101_oof.csv

In [6]:
root = Path('/kaggle/input/isic-2024-challenge')

train_path = root / 'train-metadata.csv'
test_path = root / 'test-metadata.csv'
subm_path = root / 'sample_submission.csv'

id_col = 'isic_id'
target_col = 'target'
group_col = 'patient_id'

err = 1e-5
sampling_ratio = 0.01
seed = 42

do_ud = True


num_cols = [
    'age_approx',                        # Approximate age of patient at time of imaging.
    'clin_size_long_diam_mm',            # Maximum diameter of the lesion (mm).+
    'tbp_lv_A',                          # A inside  lesion.+
    'tbp_lv_Aext',                       # A outside lesion.+
    'tbp_lv_B',                          # B inside  lesion.+
    'tbp_lv_Bext',                       # B outside lesion.+ 
    'tbp_lv_C',                          # Chroma inside  lesion.+
    'tbp_lv_Cext',                       # Chroma outside lesion.+
    'tbp_lv_H',                          # Hue inside the lesion; calculated as the angle of A* and B* in LAB* color space. Typical values range from 25 (red) to 75 (brown).+
    'tbp_lv_Hext',                       # Hue outside lesion.+
    'tbp_lv_L',                          # L inside lesion.+
    'tbp_lv_Lext',                       # L outside lesion.+
    'tbp_lv_areaMM2',                    # Area of lesion (mm^2).+
    'tbp_lv_area_perim_ratio',           # Border jaggedness, the ratio between lesions perimeter and area. Circular lesions will have low values; irregular shaped lesions will have higher values. Values range 0-10.+
    'tbp_lv_color_std_mean',             # Color irregularity, calculated as the variance of colors within the lesion's boundary.
    'tbp_lv_deltaA',                     # Average A contrast (inside vs. outside lesion).+
    'tbp_lv_deltaB',                     # Average B contrast (inside vs. outside lesion).+
    'tbp_lv_deltaL',                     # Average L contrast (inside vs. outside lesion).+
    'tbp_lv_deltaLB',                    #
    'tbp_lv_deltaLBnorm',                # Contrast between the lesion and its immediate surrounding skin. Low contrast lesions tend to be faintly visible such as freckles; high contrast lesions tend to be those with darker pigment. Calculated as the average delta LB of the lesion relative to its immediate background in LAB* color space. Typical values range from 5.5 to 25.+
    'tbp_lv_eccentricity',               # Eccentricity.+
    'tbp_lv_minorAxisMM',                # Smallest lesion diameter (mm).+
    'tbp_lv_nevi_confidence',            # Nevus confidence score (0-100 scale) is a convolutional neural network classifier estimated probability that the lesion is a nevus. The neural network was trained on approximately 57,000 lesions that were classified and labeled by a dermatologist.+,++
    'tbp_lv_norm_border',                # Border irregularity (0-10 scale); the normalized average of border jaggedness and asymmetry.+
    'tbp_lv_norm_color',                 # Color variation (0-10 scale); the normalized average of color asymmetry and color irregularity.+
    'tbp_lv_perimeterMM',                # Perimeter of lesion (mm).+
    'tbp_lv_radial_color_std_max',       # Color asymmetry, a measure of asymmetry of the spatial distribution of color within the lesion. This score is calculated by looking at the average standard deviation in LAB* color space within concentric rings originating from the lesion center. Values range 0-10.+
    'tbp_lv_stdL',                       # Standard deviation of L inside  lesion.+
    'tbp_lv_stdLExt',                    # Standard deviation of L outside lesion.+
    'tbp_lv_symm_2axis',                 # Border asymmetry; a measure of asymmetry of the lesion's contour about an axis perpendicular to the lesion's most symmetric axis. Lesions with two axes of symmetry will therefore have low scores (more symmetric), while lesions with only one or zero axes of symmetry will have higher scores (less symmetric). This score is calculated by comparing opposite halves of the lesion contour over many degrees of rotation. The angle where the halves are most similar identifies the principal axis of symmetry, while the second axis of symmetry is perpendicular to the principal axis. Border asymmetry is reported as the asymmetry value about this second axis. Values range 0-10.+
    'tbp_lv_symm_2axis_angle',           # Lesion border asymmetry angle.+
    'tbp_lv_x',                          # X-coordinate of the lesion on 3D TBP.+
    'tbp_lv_y',                          # Y-coordinate of the lesion on 3D TBP.+
    'tbp_lv_z',                          # Z-coordinate of the lesion on 3D TBP.+
]

new_num_cols = [
    'lesion_size_ratio',             # tbp_lv_minorAxisMM      / clin_size_long_diam_mm
    'lesion_shape_index',            # tbp_lv_areaMM2          / tbp_lv_perimeterMM **2
    'hue_contrast',                  # tbp_lv_H                - tbp_lv_Hext              abs
    'luminance_contrast',            # tbp_lv_L                - tbp_lv_Lext              abs
    'lesion_color_difference',       # tbp_lv_deltaA **2       + tbp_lv_deltaB **2 + tbp_lv_deltaL **2  sqrt  
    'border_complexity',             # tbp_lv_norm_border      + tbp_lv_symm_2axis
    'color_uniformity',              # tbp_lv_color_std_mean   / tbp_lv_radial_color_std_max

    'position_distance_3d',          # tbp_lv_x **2 + tbp_lv_y **2 + tbp_lv_z **2  sqrt
    'perimeter_to_area_ratio',       # tbp_lv_perimeterMM      / tbp_lv_areaMM2
    'area_to_perimeter_ratio',       # tbp_lv_areaMM2          / tbp_lv_perimeterMM
    'lesion_visibility_score',       # tbp_lv_deltaLBnorm      + tbp_lv_norm_color
    'symmetry_border_consistency',   # tbp_lv_symm_2axis       * tbp_lv_norm_border
    'consistency_symmetry_border',   # tbp_lv_symm_2axis       * tbp_lv_norm_border / (tbp_lv_symm_2axis + tbp_lv_norm_border)

    'color_consistency',             # tbp_lv_stdL             / tbp_lv_Lext
    'consistency_color',             # tbp_lv_stdL*tbp_lv_Lext / tbp_lv_stdL + tbp_lv_Lext
    'size_age_interaction',          # clin_size_long_diam_mm  * age_approx
    'hue_color_std_interaction',     # tbp_lv_H                * tbp_lv_color_std_mean
    'lesion_severity_index',         # tbp_lv_norm_border      + tbp_lv_norm_color + tbp_lv_eccentricity / 3
    'shape_complexity_index',        # border_complexity       + lesion_shape_index
    'color_contrast_index',          # tbp_lv_deltaA + tbp_lv_deltaB + tbp_lv_deltaL + tbp_lv_deltaLBnorm

    'log_lesion_area',               # tbp_lv_areaMM2          + 1  np.log
    'normalized_lesion_size',        # clin_size_long_diam_mm  / age_approx
    'mean_hue_difference',           # tbp_lv_H                + tbp_lv_Hext    / 2
    'std_dev_contrast',              # tbp_lv_deltaA **2 + tbp_lv_deltaB **2 + tbp_lv_deltaL **2   / 3  np.sqrt
    'color_shape_composite_index',   # tbp_lv_color_std_mean   + bp_lv_area_perim_ratio + tbp_lv_symm_2axis   / 3
    'lesion_orientation_3d',         # tbp_lv_y                , tbp_lv_x  np.arctan2
    'overall_color_difference',      # tbp_lv_deltaA           + tbp_lv_deltaB + tbp_lv_deltaL   / 3

    'symmetry_perimeter_interaction',# tbp_lv_symm_2axis       * tbp_lv_perimeterMM
    'comprehensive_lesion_index',    # tbp_lv_area_perim_ratio + tbp_lv_eccentricity + bp_lv_norm_color + tbp_lv_symm_2axis   / 4
    'color_variance_ratio',          # tbp_lv_color_std_mean   / tbp_lv_stdLExt
    'border_color_interaction',      # tbp_lv_norm_border      * tbp_lv_norm_color
    'border_color_interaction_2',
    'size_color_contrast_ratio',     # clin_size_long_diam_mm  / tbp_lv_deltaLBnorm
    'age_normalized_nevi_confidence',# tbp_lv_nevi_confidence  / age_approx
    'age_normalized_nevi_confidence_2',
    'color_asymmetry_index',         # tbp_lv_symm_2axis       * tbp_lv_radial_color_std_max

    'volume_approximation_3d',       # tbp_lv_areaMM2          * sqrt(tbp_lv_x**2 + tbp_lv_y**2 + tbp_lv_z**2)
    'color_range',                   # abs(tbp_lv_L - tbp_lv_Lext) + abs(tbp_lv_A - tbp_lv_Aext) + abs(tbp_lv_B - tbp_lv_Bext)
    'shape_color_consistency',       # tbp_lv_eccentricity     * tbp_lv_color_std_mean
    'border_length_ratio',           # tbp_lv_perimeterMM      / pi * sqrt(tbp_lv_areaMM2 / pi)
    'age_size_symmetry_index',       # age_approx              * clin_size_long_diam_mm * tbp_lv_symm_2axis
    'index_age_size_symmetry',       # age_approx              * tbp_lv_areaMM2 * tbp_lv_symm_2axis
    
]

cat_cols = ['sex', 'anatom_site_general', 'tbp_tile_type', 'tbp_lv_location', 'tbp_lv_location_simple', 'attribution']

norm_cols = [f'{col}_patient_norm' for col in num_cols + new_num_cols]
_patient_sum_ratio = [f'{col}_patient_sum_ratio' for col in num_cols + new_num_cols]
_patient_minmax = [f'{col}_patient_minmax' for col in num_cols + new_num_cols]
_patient_rank = [f'{col}_patient_rank' for col in num_cols + new_num_cols]
_patient_quantile_scaled = [f'{col}_patient_quantile_scaled' for col in num_cols + new_num_cols]

special_cols = ['count_per_patient']
feature_cols = num_cols + new_num_cols + cat_cols + norm_cols + special_cols + _patient_rank + _patient_minmax + _patient_sum_ratio
# _patient_minmax + _patient_quantile_scaled + _patient_sum_ratio

In [7]:
def select_features_using_corr_matrix(df, threshold=0.91):
    corr_matrix = df.corr().abs()
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool_))
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    selected_features = df.columns.difference(to_drop)
    return selected_features.tolist()

In [8]:

def read_data(path, oof_path=None, oof_path2=None):
    df = (
        pl.read_csv(path)
        .with_columns(
            pl.col('age_approx').cast(pl.String).replace('NA', np.nan).cast(pl.Float64),
        )
        .with_columns(
            pl.col(pl.Float64).fill_nan(pl.col(pl.Float64).median()), # You may want to impute test data with train
        )
        .with_columns(
            lesion_size_ratio              = pl.col('tbp_lv_minorAxisMM') / pl.col('clin_size_long_diam_mm'),
            lesion_shape_index             = pl.col('tbp_lv_areaMM2') / (pl.col('tbp_lv_perimeterMM') ** 2),
            hue_contrast                   = (pl.col('tbp_lv_H') - pl.col('tbp_lv_Hext')).abs(),
            luminance_contrast             = (pl.col('tbp_lv_L') - pl.col('tbp_lv_Lext')).abs(),
            lesion_color_difference        = (pl.col('tbp_lv_deltaA') ** 2 + pl.col('tbp_lv_deltaB') ** 2 + pl.col('tbp_lv_deltaL') ** 2).sqrt(),
            border_complexity              = pl.col('tbp_lv_norm_border') + pl.col('tbp_lv_symm_2axis'),
            color_uniformity               = pl.col('tbp_lv_color_std_mean') / (pl.col('tbp_lv_radial_color_std_max') + err),
        )
        .with_columns(
            position_distance_3d           = (pl.col('tbp_lv_x') ** 2 + pl.col('tbp_lv_y') ** 2 + pl.col('tbp_lv_z') ** 2).sqrt(),
            perimeter_to_area_ratio        = pl.col('tbp_lv_perimeterMM') / pl.col('tbp_lv_areaMM2'),
            area_to_perimeter_ratio        = pl.col('tbp_lv_areaMM2') / pl.col('tbp_lv_perimeterMM'),
            lesion_visibility_score        = pl.col('tbp_lv_deltaLBnorm') + pl.col('tbp_lv_norm_color'),
            combined_anatomical_site       = pl.col('anatom_site_general') + '_' + pl.col('tbp_lv_location'),
            symmetry_border_consistency    = pl.col('tbp_lv_symm_2axis') * pl.col('tbp_lv_norm_border'),
            consistency_symmetry_border    = pl.col('tbp_lv_symm_2axis') * pl.col('tbp_lv_norm_border') / (pl.col('tbp_lv_symm_2axis') + pl.col('tbp_lv_norm_border')),
        )
        .with_columns(
            color_consistency              = pl.col('tbp_lv_stdL') / pl.col('tbp_lv_Lext'),
            consistency_color              = pl.col('tbp_lv_stdL') * pl.col('tbp_lv_Lext') / (pl.col('tbp_lv_stdL') + pl.col('tbp_lv_Lext')),
            size_age_interaction           = pl.col('clin_size_long_diam_mm') * pl.col('age_approx'),
            hue_color_std_interaction      = pl.col('tbp_lv_H') * pl.col('tbp_lv_color_std_mean'),
            lesion_severity_index          = (pl.col('tbp_lv_norm_border') + pl.col('tbp_lv_norm_color') + pl.col('tbp_lv_eccentricity')) / 3,
            shape_complexity_index         = pl.col('border_complexity') + pl.col('lesion_shape_index'),
            color_contrast_index           = pl.col('tbp_lv_deltaA') + pl.col('tbp_lv_deltaB') + pl.col('tbp_lv_deltaL') + pl.col('tbp_lv_deltaLBnorm'),
        )
        .with_columns(
            log_lesion_area                = (pl.col('tbp_lv_areaMM2') + 1).log(),
            normalized_lesion_size         = pl.col('clin_size_long_diam_mm') / pl.col('age_approx'),
            mean_hue_difference            = (pl.col('tbp_lv_H') + pl.col('tbp_lv_Hext')) / 2,
            std_dev_contrast               = ((pl.col('tbp_lv_deltaA') ** 2 + pl.col('tbp_lv_deltaB') ** 2 + pl.col('tbp_lv_deltaL') ** 2) / 3).sqrt(),
            color_shape_composite_index    = (pl.col('tbp_lv_color_std_mean') + pl.col('tbp_lv_area_perim_ratio') + pl.col('tbp_lv_symm_2axis')) / 3,
            lesion_orientation_3d          = pl.arctan2(pl.col('tbp_lv_y'), pl.col('tbp_lv_x')),
            overall_color_difference       = (pl.col('tbp_lv_deltaA') + pl.col('tbp_lv_deltaB') + pl.col('tbp_lv_deltaL')) / 3,
        )
        .with_columns(
            symmetry_perimeter_interaction = pl.col('tbp_lv_symm_2axis') * pl.col('tbp_lv_perimeterMM'),
            comprehensive_lesion_index     = (pl.col('tbp_lv_area_perim_ratio') + pl.col('tbp_lv_eccentricity') + pl.col('tbp_lv_norm_color') + pl.col('tbp_lv_symm_2axis')) / 4,
            color_variance_ratio           = pl.col('tbp_lv_color_std_mean') / pl.col('tbp_lv_stdLExt'),
            border_color_interaction       = pl.col('tbp_lv_norm_border') * pl.col('tbp_lv_norm_color'),
            border_color_interaction_2     = pl.col('tbp_lv_norm_border') * pl.col('tbp_lv_norm_color') / (pl.col('tbp_lv_norm_border') + pl.col('tbp_lv_norm_color')),
            size_color_contrast_ratio      = pl.col('clin_size_long_diam_mm') / pl.col('tbp_lv_deltaLBnorm'),
            age_normalized_nevi_confidence = pl.col('tbp_lv_nevi_confidence') / pl.col('age_approx'),
            age_normalized_nevi_confidence_2 = (pl.col('clin_size_long_diam_mm')**2 + pl.col('age_approx')**2).sqrt(),
            color_asymmetry_index          = pl.col('tbp_lv_radial_color_std_max') * pl.col('tbp_lv_symm_2axis'),
        )
        .with_columns(
            volume_approximation_3d        = pl.col('tbp_lv_areaMM2') * (pl.col('tbp_lv_x')**2 + pl.col('tbp_lv_y')**2 + pl.col('tbp_lv_z')**2).sqrt(),
            color_range                    = (pl.col('tbp_lv_L') - pl.col('tbp_lv_Lext')).abs() + (pl.col('tbp_lv_A') - pl.col('tbp_lv_Aext')).abs() + (pl.col('tbp_lv_B') - pl.col('tbp_lv_Bext')).abs(),
            shape_color_consistency        = pl.col('tbp_lv_eccentricity') * pl.col('tbp_lv_color_std_mean'),
            border_length_ratio            = pl.col('tbp_lv_perimeterMM') / (2 * np.pi * (pl.col('tbp_lv_areaMM2') / np.pi).sqrt()),
            age_size_symmetry_index        = pl.col('age_approx') * pl.col('clin_size_long_diam_mm') * pl.col('tbp_lv_symm_2axis'),
            index_age_size_symmetry        = pl.col('age_approx') * pl.col('tbp_lv_areaMM2') * pl.col('tbp_lv_symm_2axis'),
        )
        .with_columns(
            ((pl.col(col) - pl.col(col).mean().over('patient_id')) / (pl.col(col).std().over('patient_id') + err)).alias(f'{col}_patient_norm') for col in (num_cols + new_num_cols)
        )
        .with_columns(
            (pl.col(col) / (pl.col(col).sum().over('patient_id') + err)).alias(f'{col}_patient_sum_ratio') for col in (num_cols + new_num_cols)
        )
        .with_columns(
            ((pl.col(col) - pl.col(col).min().over('patient_id')) / (pl.col(col).max().over('patient_id') - pl.col(col).min().over('patient_id') + err)).alias(f'{col}_patient_minmax') for col in (num_cols + new_num_cols)
        )
        .with_columns(
            (pl.col(col).rank('ordinal').over('patient_id')).alias(f'{col}_patient_rank') for col in (num_cols + new_num_cols)
        )
        .with_columns(
            ((pl.col(col) - pl.col(col).quantile(0.25).over('patient_id')) / (pl.col(col).quantile(0.75).over('patient_id') - pl.col(col).quantile(0.25).over('patient_id') + err)).alias(f'{col}_patient_quantile_scaled') for col in (num_cols + new_num_cols)
        )
        .with_columns(
            count_per_patient = pl.col('isic_id').count().over('patient_id'),
        )
        .with_columns(
            pl.col(cat_cols).cast(pl.Categorical),
        )
        .to_pandas()
    )
    
    if oof_path:
        df_effb0_oof = pd.read_csv(oof_path)
        df_effb0_oof = df_effb0_oof[['oof_predictions_effnetb0']].reset_index(drop=True)
        df = df.reset_index(drop=True)
        df['oof_predictions_effnetb0'] = df_effb0_oof['oof_predictions_effnetb0']
        feature_cols.append('oof_predictions_effnetb0')

    
    if oof_path2:
        df_resnet18_oof = pd.read_csv(oof_path2)
        df_resnet18_oof = df_resnet18_oof[['oof_predictions_resnet18']].reset_index(drop=True)
        df = df.reset_index(drop=True)
        df['oof_predictions_resnet18'] = df_resnet18_oof['oof_predictions_resnet18']
        feature_cols.append('oof_predictions_resnet18')
    
    return df

In [9]:
def preprocess(df_train, df_test):
    global cat_cols
    
    encoder = OneHotEncoder(sparse_output=False, dtype=np.int32, handle_unknown='ignore')
    encoder.fit(df_train[cat_cols])
    
    new_cat_cols = [f'onehot_{i}' for i in range(len(encoder.get_feature_names_out()))]

    df_train[new_cat_cols] = encoder.transform(df_train[cat_cols])
    df_train[new_cat_cols] = df_train[new_cat_cols].astype('category')

    df_test[new_cat_cols] = encoder.transform(df_test[cat_cols])
    df_test[new_cat_cols] = df_test[new_cat_cols].astype('category')

    for col in cat_cols:
        feature_cols.remove(col)

    feature_cols.extend(new_cat_cols)
#     lgb_shap_features.extend(new_cat_cols)
#     xgb_shap_features.extend(new_cat_cols)
#     cb_shap_features.extend(new_cat_cols)
    selected_features.extend(new_cat_cols)
    cat_cols = new_cat_cols
    
    return df_train, df_test

In [10]:
def custom_metric(estimator, X, y_true):
    y_hat = estimator.predict_proba(X)[:, 1]
    min_tpr = 0.80
    max_fpr = abs(1 - min_tpr)
    
    v_gt = abs(y_true - 1)
    v_pred = np.array([1.0 - x for x in y_hat])
    
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    
    return partial_auc

def comp_score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, min_tpr: float=0.80):
    v_gt = abs(np.asarray(solution.values)-1)
    v_pred = np.array([1.0 - x for x in submission.values])
    max_fpr = abs(1-min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    # change scale from [0.5, 1.0] to [0.5 * max_fpr**2, max_fpr]
    # https://math.stackexchange.com/questions/914823/shift-numbers-into-a-different-range
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc

### Data Read & Feature Engineering

In [11]:
# Usage
# , oof_path2='/kaggle/input/384-5-fold-resnest-oof-predictions/oof_predictions_resnest101.csv'
df_train = read_data(train_path, oof_path='/kaggle/input/384x384-1-50-ratio-5fold-effnetb0-oof-predictions/oof_predictions.csv')
df_test = read_data(test_path)

# hog_df = pd.read_csv('/kaggle/input/hog-featuresv2/hog_features (3).csv')
# hog_df = hog_df.iloc[:, :25]
# hog_df['image_id'] = hog_df['image_id'].str.replace('.jpg', '')

# df_train = pd.merge(df_train, hog_df, how='outer', left_on='isic_id', right_on='image_id')
# df_train = df_train.drop(columns=['image_id'])

# hog_num_cols = hog_df.select_dtypes(include='number').columns.tolist()

def ugly_duckling_processing(df, num_cols):
    ud_columns = num_cols.copy()
    ud_num_cols = []
    
    #if false - only do location-based ugly ducklings
    include_patient_wide_ud = False  
    
    counter = 0
    
    def calc_ugly_duckling_scores(group, grouping):
        nonlocal counter
        counter += 1
        if counter % 10 == 0: print(".", end="", flush=True)
        z_scores = group[ud_columns].apply(lambda x: zscore(x, nan_policy='omit'))
        ud_scores = np.abs(z_scores)
        prefix = 'ud_' if grouping == 'patient' else 'ud_loc_'
        ud_scores.columns = [f'{prefix}{col}' for col in ud_columns]
        return ud_scores

    print("Analyzing ducklings", end="", flush=True)
    ud_location_col = 'tbp_lv_location'
    ud_scores_loc = df.groupby(['patient_id', ud_location_col])[ud_columns + ['patient_id', ud_location_col]].apply(
        lambda x: calc_ugly_duckling_scores(x, 'location')
    ).reset_index(level=[0, 1], drop=True)
    
    print("\nConcat ducklings")
    df = pd.concat([df, ud_scores_loc], axis=1)
    
    if include_patient_wide_ud:
        print("Analyzing ducklings (part 2)", end="", flush=True)
        ud_scores_patient = df.groupby('patient_id')[ud_columns + ['patient_id']].apply(
            lambda x: calc_ugly_duckling_scores(x, 'patient')
        ).reset_index(level=0, drop=True)
        df = pd.concat([df, ud_scores_patient], axis=1)
        print()  # New line after progress indicator

    print("Extending ducklings")
    ud_num_cols.extend([f'ud_loc_{col}' for col in ud_columns])
    if include_patient_wide_ud:
        ud_num_cols.extend([f'ud_{col}' for col in ud_columns])

    print("Enhancing ugly duckling features", end="", flush=True)
    
    # 1. Percentile-based ugly duckling scores
    def calc_percentile_ud_scores(group):
        nonlocal counter
        counter += 1
        if counter % 10 == 0: print(".", end="", flush=True)
        percentiles = group[ud_columns].rank(pct=True)
        return percentiles.add_prefix('ud_percentile_')
    
    counter = 0  # Reset counter for percentile calculation
    ud_percentiles = df.groupby('patient_id')[ud_columns].apply(calc_percentile_ud_scores).reset_index(level=0, drop=True)
    df = pd.concat([df, ud_percentiles], axis=1)
    ud_num_cols.extend([f'ud_percentile_{col}' for col in ud_columns])
    print()  # New line after progress indicator

    # 2. Ugly duckling count features
    threshold = 2.0  # You can adjust this threshold
    if include_patient_wide_ud:
        ud_count = (df[[f'ud_{col}' for col in ud_columns]].abs() > threshold).sum(axis=1)
        df['ud_count_patient'] = ud_count
        ud_num_cols.append('ud_count_patient')
    
    ud_count_loc = (df[[f'ud_loc_{col}' for col in ud_columns]].abs() > threshold).sum(axis=1)
    df['ud_count_location'] = ud_count_loc
    ud_num_cols.append('ud_count_location')

    # 3. Ugly duckling severity features
    if include_patient_wide_ud:
        df['ud_max_severity_patient'] = df[[f'ud_{col}' for col in ud_columns]].abs().max(axis=1)
        ud_num_cols.append('ud_max_severity_patient')
    df['ud_max_severity_location'] = df[[f'ud_loc_{col}' for col in ud_columns]].abs().max(axis=1)
    ud_num_cols.append('ud_max_severity_location')

    # 4. Ugly duckling consistency features
    if include_patient_wide_ud:
        df['ud_consistency_patient'] = df[[f'ud_{col}' for col in ud_columns]].abs().std(axis=1)
        ud_num_cols.append('ud_consistency_patient')
    df['ud_consistency_location'] = df[[f'ud_loc_{col}' for col in ud_columns]].abs().std(axis=1)
    ud_num_cols.append('ud_consistency_location')

    return df, ud_num_cols

if do_ud:
    df_train, ud_num_cols = ugly_duckling_processing(df_train.copy(), num_cols+new_num_cols)
    df_test, _ = ugly_duckling_processing(df_test.copy(), num_cols+new_num_cols)

  df_effb0_oof = pd.read_csv(oof_path)


Analyzing ducklings

  ud_scores_loc = df.groupby(['patient_id', ud_location_col])[ud_columns + ['patient_id', ud_location_col]].apply(


........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

  ud_scores_loc = df.groupby(['patient_id', ud_location_col])[ud_columns + ['patient_id', ud_location_col]].apply(



Concat ducklings
Extending ducklings
Enhancing ugly duckling features


In [12]:
# category_encoder = OrdinalEncoder(
#     categories='auto',
#     dtype=int,
#     handle_unknown='use_encoded_value',
#     unknown_value=-2,
#     encoded_missing_value=-1,
# )

# X_cat = category_encoder.fit_transform(df_train[cat_cols])
# for c, cat_col in enumerate(cat_cols):
#     df_train[cat_col] = X_cat[:, c]
    
# X_cat = category_encoder.transform(df_test[cat_cols])
# for c, cat_col in enumerate(cat_cols):
#     df_test[cat_col] = X_cat[:, c]
    
num_cols = num_cols + new_num_cols + norm_cols + special_cols + _patient_rank + ud_num_cols + _patient_minmax + _patient_sum_ratio
# _patient_quantile_scaled + _patient_minmax + lbp_num_cols + _patient_sum_ratio

selected_features = select_features_using_corr_matrix(df_train[num_cols])
print(len(selected_features))
    
df_train, df_test = preprocess(df_train, df_test)
print(len(selected_features))

191
238


In [13]:
print(len(selected_features))
selected_features += ['oof_predictions_effnetb0']
# lgb_shap_features += ['oof_predictions_effnetb0']
# xgb_shap_features += ['oof_predictions_effnetb0']
# cb_shap_features += ['oof_predictions_effnetb0']
# print(len(lgb_shap_features))
print(len(selected_features))

238
239


In [14]:
# feature_cols = select_features_using_corr_matrix(df_train[feature_cols])

In [15]:
len(selected_features)

239

In [16]:

N_SPLITS = 5
gkf = StratifiedGroupKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

df_train["fold"] = -1
for idx, (train_idx, val_idx) in enumerate(gkf.split(df_train, df_train["target"], groups=df_train["patient_id"])):
    df_train.loc[val_idx, "fold"] = idx
    

# val_score = cross_val_score(
#     estimator=estimator, 
#     X=X, y=y, 
#     cv=cv, 
#     groups=groups,
#     scoring=custom_metric,
# )

# np.mean(val_score), val_score

In [17]:
df_train['fold'].tail()

401054    4
401055    3
401056    1
401057    3
401058    0
Name: fold, dtype: int64

In [18]:
# lgbm_score = comp_score(oof_df["target"], oof_df["pred"], "")
# print(f"ENSEMBLE Score: {lgbm_score:.5f}")

In [19]:
%%time

if LGB:

#     lgb_params = {
#             'objective':        'binary',
#             'verbosity':        -1,
#             'num_iterations':      200,
#             'boosting_type':    'gbdt',
#             'random_state':     seed,
#             'learning_rate': 0.030498241352934777,
#             'num_leaves': 10,
#             'min_child_samples': 97,
#             'bagging_fraction': 0.8312608825135234,
#             'feature_fraction': 0.3469859081594571,
#             'feature_fraction_bynode': 0.5422511364261485,
#             'bagging_freq': 1,
#             'lambda_l1': 1.1680541125573348e-05,
#             'lambda_l2': 3.788411230011983e-05,
#             'max_depth': 5,
#             'scale_pos_weight': 1.6089759994724795
#     }
    
    lgb_params = {
            'objective':        'binary',
            'verbosity':        -1,
            'num_iterations':      200,
            'boosting_type':    'gbdt',
            'random_state':     seed,
            'learning_rate': 0.03991748424169214,
            'num_leaves': 127,
            'min_child_samples': 88,
            'bagging_fraction': 0.9983474875272581,
            'feature_fraction': 0.7307717294255454,
            'feature_fraction_bynode ': 0.34067729415062814,
            'bagging_freq': 6,
            'lambda_l1': 2.7891699382417054,
            'lambda_l2': 0.06488200497563937,
            'max_depth': 4,
            'scale_pos_weight': 2.2020616971404667,
        
        # 0.1820
    }
    


    lgb_model = Pipeline([
    #     ('feature_selection', lgb_transformer),
        ('sampler', RandomUnderSampler(sampling_strategy=sampling_ratio, random_state=seed)),
        ('classifier', lgb.LGBMClassifier(**lgb_params)),
    ])



    lgb_scores = []
    lgb_models = []
    lgb_oof_df = pd.DataFrame()
    for fold in range(N_SPLITS):
        _df_train = df_train[df_train["fold"] != fold].reset_index(drop=True)
        _df_valid = df_train[df_train["fold"] == fold].reset_index(drop=True)

        lgb_model.fit(_df_train[selected_features], _df_train["target"])

        lgb_preds = lgb_model.predict_proba(_df_valid[selected_features])[:, 1]
        lgb_score = comp_score(_df_valid[["target"]], pd.DataFrame(lgb_preds, columns=["prediction"]), "")
        print(f"fold: {fold} - Partial AUC Score: {lgb_score:.5f}")
        lgb_models.append(lgb_model)
        lgb_oof_single = _df_valid[["isic_id", "target"]].copy()
        lgb_oof_single["pred"] = lgb_preds
        lgb_oof_df = pd.concat([lgb_oof_df, lgb_oof_single])

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 11.2 µs


In [20]:
if LGB:
    lgb_score = comp_score(lgb_oof_df["target"], lgb_oof_df["pred"], "")
    print(f"LIGHTGBM Score: {lgb_score:.5f}")

In [21]:
%%time

if XGB:
    
    xgb_params = {
        'enable_categorical': True,
        'tree_method':        'hist',
        'random_state':       seed,
        'n_estimators':       200,
        'learning_rate':      0.042310779751030335, 
        'lambda':             0.0002847007250281572, 
        'alpha':              4.478266669867162e-08, 
        'max_depth':          6, 
        'subsample':          0.811997027872852, 
        'min_child_weight':   6,
    #     'colsample_bytree':   0.8437772277074493, 
    #     'colsample_bylevel':  0.5476090898823716, 
    #     'colsample_bynode':   0.9928601203635129, 
        'scale_pos_weight':   3.884899202547225,
    }
    

    xgb_model = Pipeline([
        #     ('feature_selection', xgb_transformer),
            ('sampler', RandomUnderSampler(sampling_strategy=sampling_ratio, random_state=seed)),
            ('classifier', xgb.XGBClassifier(**xgb_params)),
    ]) 

    xgb_scores = []
    xgb_models = []
    xgb_oof_df = pd.DataFrame()
    for fold in range(N_SPLITS):
        _df_train = df_train[df_train["fold"] != fold].reset_index(drop=True)
        _df_valid = df_train[df_train["fold"] == fold].reset_index(drop=True)


        xgb_model.fit(_df_train[selected_features], _df_train["target"])

        xgb_preds = xgb_model.predict_proba(_df_valid[selected_features])[:, 1]
        xgb_score = comp_score(_df_valid[["target"]], pd.DataFrame(xgb_preds, columns=["prediction"]), "")
        print(f"fold: {fold} - Partial AUC Score: {xgb_score:.5f}")
        xgb_models.append(xgb_model)
        xgb_oof_single = _df_valid[["isic_id", "target"]].copy()
        xgb_oof_single["pred"] = xgb_preds
        xgb_oof_df = pd.concat([xgb_oof_df, xgb_oof_single])

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 10.7 µs


In [22]:
if XGB:
    
    xgb_score = comp_score(xgb_oof_df["target"], xgb_oof_df["pred"], "")
    print(f"XGBOOST Score: {xgb_score:.5f}")

In [23]:
%%time

if CB:

    cb_params = {
        'loss_function':     'Logloss',
        'iterations':        200,
        'verbose':           False,
        'random_state':      seed,
        'cat_features':      cat_cols,
        'depth':             5,
        'learning_rate':     0.0721506164096434,
        'l2_leaf_reg':       7.475812134744556,
        'min_data_in_leaf':  59,
        'scale_pos_weight':  4.254188566545996,
        'subsample':         0.6016652239201599
    }
    
    cb_model = Pipeline([
    #     ('feature_selection', cb_transformer),
        ('sampler', RandomUnderSampler(sampling_strategy=sampling_ratio, random_state=seed)),
        ('classifier', cb.CatBoostClassifier(**cb_params)),
    ])


    cb_scores = []
    cb_models = []
    cb_oof_df = pd.DataFrame()
    for fold in range(N_SPLITS):
        _df_train = df_train[df_train["fold"] != fold].reset_index(drop=True)
        _df_valid = df_train[df_train["fold"] == fold].reset_index(drop=True)


        cb_model.fit(_df_train[selected_features], _df_train["target"])

        cb_preds = cb_model.predict_proba(_df_valid[selected_features])[:, 1]
        cb_score = comp_score(_df_valid[["target"]], pd.DataFrame(cb_preds, columns=["prediction"]), "")
        print(f"fold: {fold} - Partial AUC Score: {cb_score:.5f}")
        cb_models.append(cb_model)
        cb_oof_single = _df_valid[["isic_id", "target"]].copy()
        cb_oof_single["pred"] = cb_preds
        cb_oof_df = pd.concat([cb_oof_df, cb_oof_single])

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 10 µs


In [24]:
if CB:
    cb_score = comp_score(cb_oof_df["target"], cb_oof_df["pred"], "")
    print(f"CATBOOST Score: {cb_score:.5f}")

In [25]:
%%time

# LIGHTGBM Score: LIGHTGBM Score: 0.17950 score before tuning


if OPTIMIZE_OPTUNA_LGB:


    num_seeds = 1 
#     lgb_random_seeds = [random.randint(0, 10000) for _ in range(num_seeds)]
    
    lgb_random_seeds = [42]

    lgb_seed_results = {}

    for seed in lgb_random_seeds:
        print(f"Optimizing for random seed: {seed}")

        def objective(trial):
            lgb_params = {
                "objective": "binary",
                "random_state": seed,  # Use the current seed
                "verbosity": -1,
                "boosting_type": "gbdt",
                "num_iterations": trial.suggest_int('num_iterations', 200, 250),
                'learning_rate': 0.03104273262811841,
                "num_leaves": 111,
                "min_child_samples": 100,
                
                "bagging_fraction": 0.9101945804571369,
                "feature_fraction": 0.34103959543345147,
#                 'feature_fraction_bynode': trial.suggest_float('feature_fraction_bynode', 0.3, 1.0),
                
                "bagging_freq": 1,
                "lambda_l1": 3.211848972246674e-07,
                "lambda_l2": 3.720742147032093e-07,
                "max_depth": 4,
                "scale_pos_weight": 2.785263418574575,
            }

            lgb_scores = []
            lgb_models = []
            oof_df = pd.DataFrame()
            for fold in range(N_SPLITS):
                _df_train = df_train[df_train["fold"] != fold].reset_index(drop=True)
                _df_valid = df_train[df_train["fold"] == fold].reset_index(drop=True)

                lgb_model = Pipeline([
                    ('sampler', RandomUnderSampler(sampling_strategy=sampling_ratio, random_state=seed)),
                    ('classifier', lgb.LGBMClassifier(**lgb_params)),
                ])
                lgb_model.fit(_df_train[selected_features], _df_train["target"])

                preds = lgb_model.predict_proba(_df_valid[selected_features])[:, 1]
                score = comp_score(_df_valid[["target"]], pd.DataFrame(preds, columns=["prediction"]), "")
                lgb_models.append(lgb_model)
                oof_single = _df_valid[["isic_id", "target"]].copy()
                oof_single["pred"] = preds
                oof_df = pd.concat([oof_df, oof_single])

            lgbm_score_oof = comp_score(oof_df["target"], oof_df["pred"], "")

            return lgbm_score_oof

        # Run Optuna optimization
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=50)

        # Store results
        lgb_seed_results[seed] = {
            "best_params": study.best_trial.params,
            "best_value": study.best_value,
            "number_of_trials": len(study.trials)
        }

        print(f"Results for seed {seed}:")
        print("Number of finished trials:", lgb_seed_results[seed]["number_of_trials"])
        print("Best trial parameters:", lgb_seed_results[seed]["best_params"])
        print("Best trial value:", lgb_seed_results[seed]["best_value"])
        print("-" * 40)

    # Print or analyze the overall results for different seeds
    print("Summary of results for different random seeds:")
    for seed, results in lgb_seed_results.items():
        print(f"Seed: {seed}, Best Value: {results['best_value']}, Best Params: {results['best_params']}")


[I 2024-09-06 17:24:26,034] A new study created in memory with name: no-name-c7d8e723-fa4f-4b71-806b-b1a3fc10c28f


Optimizing for random seed: 42


[I 2024-09-06 17:25:27,230] Trial 0 finished with value: 0.18245210655556204 and parameters: {'num_iterations': 227}. Best is trial 0 with value: 0.18245210655556204.
[I 2024-09-06 17:26:31,217] Trial 1 finished with value: 0.18245210655556204 and parameters: {'num_iterations': 227}. Best is trial 0 with value: 0.18245210655556204.
[I 2024-09-06 17:27:37,059] Trial 2 finished with value: 0.18254242818023508 and parameters: {'num_iterations': 243}. Best is trial 2 with value: 0.18254242818023508.
[I 2024-09-06 17:28:36,879] Trial 3 finished with value: 0.18237706356321298 and parameters: {'num_iterations': 202}. Best is trial 2 with value: 0.18254242818023508.
[I 2024-09-06 17:29:38,565] Trial 4 finished with value: 0.18241489116549695 and parameters: {'num_iterations': 212}. Best is trial 2 with value: 0.18254242818023508.
[I 2024-09-06 17:30:39,858] Trial 5 finished with value: 0.18241489116549695 and parameters: {'num_iterations': 212}. Best is trial 2 with value: 0.18254242818023508

Results for seed 42:
Number of finished trials: 50
Best trial parameters: {'num_iterations': 241}
Best trial value: 0.18259668136014093
----------------------------------------
Summary of results for different random seeds:
Seed: 42, Best Value: 0.18259668136014093, Best Params: {'num_iterations': 241}
CPU times: user 47min 49s, sys: 6min 14s, total: 54min 3s
Wall time: 53min 59s


### Optuna HyperParam Tuned Models

In [26]:
%%time

# CATBOOST Score: 0.17946 before my optuna


if OPTIMIZE_OPTUNA_CB:
    
    num_seeds = 1 
#     cb_random_seeds = [random.randint(0, 10000) for _ in range(num_seeds)]
    cb_random_seeds = [42]

    cb_seed_results = {}

    for seed in cb_random_seeds:
        print(f"Optimizing for random seed: {seed}")

        def objective(trial):
            cb_params = {
                "objective": "Logloss",
                "iterations": 200,
                "verbose": False,
                "cat_features": cat_cols,
                "random_state": seed,  # Use the current seed
                "depth": trial.suggest_int('depth', 4, 6),
                "learning_rate": trial.suggest_float('learning_rate', 0.01, 0.1),
                "l2_leaf_reg": trial.suggest_float('l2_leaf_reg', 1e-3, 100),
                "min_data_in_leaf": trial.suggest_int('min_data_in_leaf', 1, 100),
                "scale_pos_weight": trial.suggest_float('scale_pos_weight', 1, 5),
                "subsample": trial.suggest_float('subsample', 0.3, 1.0),
            }

            cb_scores = []
            cb_models = []
            oof_cb = pd.DataFrame()
            for fold in range(N_SPLITS):
                _df_train = df_train[df_train["fold"] != fold].reset_index(drop=True)
                _df_valid = df_train[df_train["fold"] == fold].reset_index(drop=True)

                cb_model = Pipeline([
                    ('sampler', RandomUnderSampler(sampling_strategy=sampling_ratio, random_state=seed)),
                    ('classifier', cb.CatBoostClassifier(**cb_params)),
                ])
                cb_model.fit(_df_train[selected_features], _df_train["target"])

                preds = cb_model.predict_proba(_df_valid[selected_features])[:, 1]
                score = comp_score(_df_valid[["target"]], pd.DataFrame(preds, columns=["prediction"]), "")
                cb_scores.append(score)
                cb_models.append(cb_model)

                oof_single = _df_valid[['isic_id', 'target']].copy()
                oof_single['pred'] = preds
                oof_cb = pd.concat([oof_cb, oof_single])

            cb_score_oof = comp_score(oof_cb["target"], oof_cb["pred"], "")

            return cb_score_oof

        # Run Optuna optimization
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=100)

        # Store results
        cb_seed_results[seed] = {
            "best_params": study.best_trial.params,
            "best_value": study.best_value,
            "number_of_trials": len(study.trials)
        }

        print(f"Results for seed {seed}:")
        print("Number of finished trials:", cb_seed_results[seed]["number_of_trials"])
        print("Best trial parameters:", cb_seed_results[seed]["best_params"])
        print("Best trial value:", cb_seed_results[seed]["best_value"])
        print("-" * 40)

    # Print or analyze the overall results for different seeds
    print("Summary of results for different random seeds:")
    for seed, results in cb_seed_results.items():
        print(f"Seed: {seed}, Best Value: {results['best_value']}, Best Params: {results['best_params']}")


CPU times: user 7 µs, sys: 0 ns, total: 7 µs
Wall time: 12.6 µs


In [27]:
%%time

# XGBOOST Score: 0.17828  before my optuna tuning


if OPTIMIZE_OPTUNA_XGB:
    
    num_seeds = 1
#     xgb_random_seeds = [random.randint(0, 10000) for _ in range(num_seeds)]
    xgb_random_seeds = [42]

    xgb_seed_results = {}

    for seed in xgb_random_seeds:
        print(f"Optimizing for random seed: {seed}")

        def objective(trial):
            xgb_params = {
#                 "objective": "binary:logistic",
                "verbosity": 1,
                "n_estimators": trial.suggest_int('n_estimators', 200, 250),
                'enable_categorical': True,
                'tree_method': 'hist',
                'random_state': seed,  # Use the current seed
                'learning_rate': 0.042310779751030335,
                "subsample": 0.811997027872852,
                
#                 'colsample_bytree': 0.9104030105273816,
#                 'colsample_bylevel': 0.7008826964577788,
#                 'colsample_bynode': 0.8981511201813944,
                
                "reg_alpha": 4.478266669867162e-08,
                "reg_lambda": 0.0002847007250281572,
                "max_depth": 6,
                "scale_pos_weight": 3.884899202547225,
                "min_child_weight": 6,
            }

            xgb_scores = []
            xgb_models = []
            oof_df = pd.DataFrame()
            for fold in range(N_SPLITS):
                _df_train = df_train[df_train["fold"] != fold].reset_index(drop=True)
                _df_valid = df_train[df_train["fold"] == fold].reset_index(drop=True)

                xgb_model = Pipeline([
                    ('sampler', RandomUnderSampler(sampling_strategy=sampling_ratio, random_state=seed)),
                    ('classifier', xgb.XGBClassifier(**xgb_params)),
                ])

                xgb_model.fit(_df_train[selected_features], _df_train["target"])

                preds = xgb_model.predict_proba(_df_valid[selected_features])[:, 1]
                score = comp_score(_df_valid[["target"]], pd.DataFrame(preds, columns=["prediction"]), "")
                xgb_models.append(xgb_model)
                oof_single = _df_valid[["isic_id", "target"]].copy()
                oof_single["pred"] = preds
                oof_df = pd.concat([oof_df, oof_single])
            xgb_score_oof = comp_score(oof_df["target"], oof_df["pred"], "")

            return xgb_score_oof

        # Run Optuna optimization
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=50)

        # Store results
        xgb_seed_results[seed] = {
            "best_params": study.best_trial.params,
            "best_value": study.best_value,
            "number_of_trials": len(study.trials)
        }

        print(f"Results for seed {seed}:")
        print("Number of finished trials:", xgb_seed_results[seed]["number_of_trials"])
        print("Best trial parameters:", xgb_seed_results[seed]["best_params"])
        print("Best trial value:", xgb_seed_results[seed]["best_value"])
        print("-" * 40)

    # Print or analyze the overall results for different seeds
    print("Summary of results for different random seeds:")
    for seed, results in xgb_seed_results.items():
        print(f"Seed: {seed}, Best Value: {results['best_value']}, Best Params: {results['best_params']}")


CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 11.9 µs
