## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Importing Libraries</p>

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from mlxtend.classifier import StackingClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler, OrdinalEncoder
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from scipy.stats import mode
import optuna
from optuna.samplers import TPESampler
from sklearn.metrics import (
    roc_auc_score, roc_curve, auc, classification_report, 
    confusion_matrix, accuracy_score, matthews_corrcoef, f1_score,
    ConfusionMatrixDisplay
)
from sklearn.model_selection import (
    train_test_split, GridSearchCV, StratifiedShuffleSplit,
    StratifiedKFold, train_test_split, cross_val_score, cross_validate
)
import warnings
warnings.filterwarnings('ignore')

  ### <p style="background-color: #fdefff;color:#c12eff;display: inline-block;padding:.6rem;border-radius:.5rem;border: 1px solid #c059ff">Loading data</p>

In [2]:
cd /workspace/data/

/workspace/data


In [3]:
train_data = pd.read_csv(os.path.join(os.getcwd(), "kaggle/playground-series-s4e8/train.csv"))
test_data = pd.read_csv(os.path.join(os.getcwd(), "kaggle/playground-series-s4e8/test.csv"))
sample_submission_data = pd.read_csv(os.path.join(os.getcwd(), "kaggle/playground-series-s4e8/sample_submission.csv"))

print("train_data :", train_data.shape)
print("test_data :", test_data.shape)
print("sample_submission_data :", sample_submission_data.shape)

train_data : (3116945, 22)
test_data : (2077964, 21)
sample_submission_data : (2077964, 2)


## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Basic Info about Data</p> 

In [None]:
train_data.head(2)

In [None]:
test_data.head(2)

In [None]:
sample_submission_data.head(2)

In [None]:
train_data.info()

In [None]:
train_data.describe().T

In [None]:
print(train_data['class'].value_counts())
print(train_data['class'].value_counts() / sum(train_data['class'].value_counts()))
sns.countplot(x='class',data=train_data)
plt.xticks(rotation=60)
plt.show()

In [None]:
pd.DataFrame({
    'column': train_data.columns,
    'null-count': train_data.isna().sum().values,
    '% null-count': np.round(train_data.isna().sum().values*100/len(train_data),6)
}).sort_values(by='null-count', ascending=False).reset_index(drop=True)

## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Correlation Matrics</p> 

In [None]:
# train_data.replace(['NaN', 'None', 'null', ''], np.nan, inplace=True)
df_dropped = train_data.drop('id', axis=1)
df_encoded = df_dropped.apply(lambda x: pd.factorize(x)[0] if x.dtype == 'object' else x)
train_data.isna().sum()

correlation_matrix = df_encoded.corr()

correlation_matrix
plt.figure(figsize=(20, 20))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Checking Unique Categories
</p> 

In [None]:
# cate_col = train_data.select_dtypes(include=['object']).columns

# # Find unique categories and their counts for each categorical column
# unique_categories = {col: train_data[col].value_counts() for col in cate_col}

# # Set the size of the overall figure
# plt.figure(figsize=(15, len(cate_col) * 5))

# # Plot the count of each unique category
# for i, (col, counts) in enumerate(unique_categories.items(), 1):
#     plt.subplot(len(cate_col), 1, i)
#     sns.barplot(x=counts.index, y=counts.values, palette="viridis")
#     plt.title(f"Count of unique categories in column '{col}'")
#     plt.xlabel('Categories')
#     plt.ylabel('Count')
#     plt.xticks(rotation=45, ha='right')
#     plt.tight_layout()

# plt.show()


  ### <p style="background-color: #fdefff;color:#c12eff;display: inline-block;padding:.6rem;border-radius:.5rem;border: 1px solid #c059ff">Percentage of Missing Values by Feature</p>

In [None]:
pd.DataFrame({
    'column': train_data.drop('id', axis=1).columns,
    'null-count': train_data.drop('id', axis=1).isna().sum().values,
    '% null-count': np.round(train_data.drop('id', axis=1).isna().sum().values*100/len(train_data.drop('id', axis=1)),6),
    'test null-count': test_data.isna().sum().values,
    '% test null-count': np.round(test_data.isna().sum().values*100/len(test_data),6)
}).sort_values(by='null-count', ascending=False).reset_index(drop=True)

## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Checking Feature Importance and Dropping Useless Columns
</p> 

In [None]:
# alpha = 0.05
# values = {}

# for col in train_data.columns:
#     if col == "class":
#         continue

#     A, B = train_data[col], train_data["class"]

#     dfObserved = pd.crosstab(A, B) 
#     chi2, p, dof, expected = scipy.stats.chi2_contingency(dfObserved.values) # 카이제곱 독립 검정 수행
#     values[col] = p
#     if p < alpha:
#         # Reject null hypothesis
#         print("{} is important. (p = {})".format(col, p))
#     else:
#         # Accept null hypothesis
#         print("{} is NOT important. (p = {})".format(col, p))

In [None]:
# Index(['id', 'class', 'cap-diameter', 'cap-shape', 'cap-surface', 'cap-color',
#        'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
#        'stem-height', 'stem-width', 'stem-root', 'stem-surface', 'stem-color',
#        'veil-type', 'veil-color', 'has-ring', 'ring-type', 'spore-print-color',
#        'habitat', 'season', 'veil-info'],
#       dtype='object')

# 결측치와 특성 존재 여부를 동시에 다룰 수 있음: 앞에서 0과 1로 veil-type의 존재 여부를 표현하고, 뒤에서 'unknown'을 통해 veil-color의 결측치를 처리함으로써 두 정보를 효과적으로 결합할 수 있습니다.
# 모델 학습에 유용한 정보 제공: 결측치나 특성의 존재 여부를 단순히 무시하지 않고, 이들을 결합하여 모델이 더 많은 패턴을 학습할 수 있게 도와줍니다.
# Index(['w', 'y', 'n', 'u', 'k', 'e', 'g', 'p', 'r', 'o', 's', 'a', 't', 'd',
#        'i', 'h', 'c', 'f', 'l', 'b', 'z', '8.25', '2.49', '3.32'],
#       dtype='object', name='veil-color')
# Index(['u', 'w', 'a', 'f', 'e', 'b', 'c', 'y', 'k', 'g', 'n', 's', 'r', 'd',
#        'p', 'h', 'i', 'l', 'is None', 't', '21.11', '5.94'],
#       dtype='object', name='veil-type')

# Index(['f10', 'tnone', 't1', 't7', 't10', 't4', 't2', 't5', 't3', 't9',
#        'fnone', 'f1', 'f2', 'f7', 'f4', 't8', 'f5', 'r2', 't6', 'f3', 'l4',
#        't0', 'p5', 'z7', 'c10', 'x10', 'f9', 'f6', 's10', 'm9', 'hnone', 's1',
#        'g3', 'g5', 'h7', 'e1', 'f0', 'r4', 'dnone', 's5', 'cnone', 'h1', 'p1',
#        'h5', 'h10', 'w3', 'y2', 'a10', 'ynone', 'e10', 'p7', '10.310', 's2',
#        'o2', 'g10', 'h2', 'g1', 's3', 'p3', 'knone', 'inone', 'nnone', 'rnone',
#        'l5', 'c1', 'n10', 'c3', 'o10', 'e4', 'd10', 'f has-ring10', 'lnone',
#        'c7', 'e3', 'y1', 'k10'],
#       dtype='object', name='has-ring-type')

train_data['veil-info'] = train_data['veil-type'].notna().astype(int).astype(str) + train_data['veil-color'].fillna('unknown')
cap_shape_mapping = {'b': 0, 'c': 1, 'x': 2, 'f': 3, 's': 4, 'p': 5, 'o': 6}
cap_color_mapping = {'n': 0, 'b': 1, 'g': 2, 'r': 3, 'p': 4, 'u': 5, 'e': 6, 'w': 7, 'y': 8, 'l': 9, 'o': 10, 'k': 11}
ring_type_mapping = {'c': 0, 'e': 1, 'r': 2, 'g': 3, 'l': 4, 'p': 5, 's': 6, 'z': 7, 'y': 8, 'm': 9, 'f': 10}
train_data['cap-shape'] = train_data['cap-shape'].map(cap_shape_mapping).astype(str).str.replace('.0', '', regex=False)
train_data['cap-color'] = train_data['cap-color'].map(cap_color_mapping).astype(str).str.replace('.0', '', regex=False)
train_data['has-ring-type'] = (
    train_data['has-ring'] + 
    train_data['ring-type'].fillna('none').map(ring_type_mapping).apply(lambda x: f"{x:.0f}" if pd.notna(x) else 'none').str.replace('.0', '', regex=False)
)

test_data['veil-info'] = test_data['veil-type'].notna().astype(int).astype(str) + test_data['veil-color'].fillna('unknown')
test_data['cap-shape'] = test_data['cap-shape'].map(cap_shape_mapping).astype(str).str.replace('.0', '', regex=False)
test_data['cap-color'] = test_data['cap-color'].map(cap_color_mapping).astype(str).str.replace('.0', '', regex=False)
test_data['has-ring-type'] = (
    test_data['has-ring'] + 
    test_data['ring-type'].fillna('none').map(ring_type_mapping).apply(lambda x: f"{x:.0f}" if pd.notna(x) else 'none').str.replace('.0', '', regex=False)
)

In [None]:
train_data['has-ring-type']

In [None]:
set(train_data['veil-info'])

In [None]:
train_data = train_data.drop(['id', 'veil-color', 'veil-type', 'has-ring', 'ring-type'], axis=1)
test_data = test_data.drop(['id', 'veil-color', 'veil-type', 'has-ring', 'ring-type'], axis=1)

In [None]:
# df_encoded = train_data
# dt_encoded = test_data

## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Handling NaN Values And Less Frequent Categories</p> 

In [None]:
# columns_to_fill_train = train_data.columns[(train_data.isna().sum() > 0) & (train_data.isna().sum() < 60)].tolist()
# columns_to_fill_test = test_data.columns[(test_data.isna().sum() > 0) & (test_data.isna().sum() < 100)].tolist()

# corr_matrix_train = df_encoded.corr()
# corr_matrix_test = dt_encoded.corr()

# def fill_missing_values(data, columns_to_fill, corr_matrix):
#     for column in columns_to_fill:
#         if column in corr_matrix.columns:
#             most_corr_features = corr_matrix[column][corr_matrix[column].abs() > 0.1].index.tolist()
#             if column in most_corr_features:
#                 most_corr_features.remove(column)

#             if most_corr_features:
#                 group_modes = data.groupby(most_corr_features)[column].apply(lambda x: x.mode()[0] if not x.mode().empty else np.nan)

#                 def fill_na(row):
#                     if pd.isna(row[column]):
#                         try:
#                             return group_modes.loc[tuple(row[most_corr_features])]
#                         except KeyError:
#                             return np.nan
#                     else:
#                         return row[column]

#                 data[column] = data.apply(fill_na, axis=1)

# fill_missing_values(df_encoded, columns_to_fill_train, corr_matrix_train)
# fill_missing_values(dt_encoded, columns_to_fill_test, corr_matrix_test)


In [None]:
train_data.isna().sum()

In [None]:
train_data.columns[(train_data.isna().sum() > 0) & (train_data.isna().sum() < 60)].tolist()

In [None]:
def cleaning(df):
    threshold = 100
    
    cat_feats = ['cap-shape', 'cap-surface', 'cap-color',
       'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
       'stem-root', 'stem-surface', 'stem-color', 'spore-print-color',
       'habitat', 'season', 'veil-info', 'has-ring-type']
    
    for feat in cat_feats:
        if df[feat].dtype.name == 'category':
            # Add 'missing' and 'noise' to categories if not present
            if 'missing' not in df[feat].cat.categories:
                df[feat] = df[feat].cat.add_categories('missing')
            if 'noise' not in df[feat].cat.categories:
                df[feat] = df[feat].cat.add_categories('noise')
        else:
            # Convert to category and add new categories
            df[feat] = df[feat].astype('category')
            df[feat] = df[feat].cat.add_categories(['missing', 'noise'])
        
        # Fill missing values with 'missing'
        df[feat] = df[feat].fillna('missing')
        
        # Replace infrequent categories with 'noise'
        counts = df[feat].value_counts(dropna=False)
        infrequent_categories = counts[counts < threshold].index
        df[feat] = df[feat].apply(lambda x: 'missing' if x in infrequent_categories else x)
    
    return df

# Example usage
train_data = cleaning(train_data)
test_data = cleaning(test_data)

In [None]:
train_data.isna().sum()

In [None]:
group_by_features = ['stem-width', 'stem-height']
group_means_train = train_data.groupby(group_by_features)['cap-diameter'].mean()
group_means_train

In [None]:
group_by_features = ['stem-width', 'stem-height']
group_means_train = train_data.groupby(group_by_features)['cap-diameter'].mean()

def fill_na_with_group_mean(row):
    if pd.isna(row['cap-diameter']):
        group = tuple(row[group_by_features])
        return group_means_train.get(group, np.nan) 
    else:
        return row['cap-diameter']

train_data['cap-diameter'] = train_data.apply(fill_na_with_group_mean, axis=1)
test_data['cap-diameter'] = test_data.apply(fill_na_with_group_mean, axis=1)

In [None]:
group_by_features = ['stem-width', 'stem-height']

# Calculate group means for the train data
group_means_train = train_data.groupby(group_by_features)['cap-diameter'].mean()

def fill_na_with_group_mean(row, group_means):
    if pd.isna(row['cap-diameter']):
        group = tuple(row[group_by_features])
        return group_means.get(group, np.nan)
    else:
        return row['cap-diameter']

# Apply to train_data using train group means
train_data['cap-diameter'] = train_data.apply(fill_na_with_group_mean, axis=1, group_means=group_means_train)

# Apply the same group means from train_data to test_data
test_data['cap-diameter'] = test_data.apply(fill_na_with_group_mean, axis=1, group_means=group_means_train)
###
###
# Calculate the mode from the training data
cap_diameter_mode = train_data['cap-diameter'].mode()[0]
stem_height_mode = train_data['stem-height'].mode()[0]

# Fill missing values in the training data using the mode calculated from the training data
train_data['cap-diameter'] = train_data['cap-diameter'].fillna(cap_diameter_mode)
train_data['stem-height'] = train_data['stem-height'].fillna(stem_height_mode)

# Fill missing values in the test data using the mode calculated from the training data
test_data['cap-diameter'] = test_data['cap-diameter'].fillna(cap_diameter_mode)
test_data['stem-height'] = test_data['stem-height'].fillna(stem_height_mode)

In [None]:
# print("Nan Values in Train data")
# print(null_percent(train_data))
# print("Nan Values in Test data")
# print(null_percent(test_data))

In [None]:
cat_feats = ['cap-shape', 'cap-surface', 'cap-color',
       'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
       'stem-root', 'stem-surface', 'stem-color', 'spore-print-color',
       'habitat', 'season', 'veil-info', 'has-ring-type']

for feat in cat_feats:
    train_data[feat] = train_data[feat].astype('category')
for feat in cat_feats:
    test_data[feat] = test_data[feat].astype('category')

## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Splitting Data</p> 

In [4]:
features = train_data.drop('class', axis=1).columns.to_list()
cat_feats = train_data[features].select_dtypes(include='object').columns.to_list()

def cleaner(df):
    for col in cat_feats:
        df[col] = df[col].fillna('missing')
        df.loc[df[col].value_counts(dropna=False)[df[col]].values < 100, col] = "noise"
        df[col] = df[col].astype('category')

    return df
    
train_data = cleaner(train_data)
test_data = cleaner(test_data)

cap_diameter_mean = train_data['cap-diameter'].mean(numeric_only=True)
train_data['cap-diameter'].fillna(cap_diameter_mean, inplace=True)
test_data['cap-diameter'].fillna(cap_diameter_mean, inplace=True)

In [5]:
X = train_data.drop(['class'], axis=1)
y = train_data['class']
print(X.shape, y.shape)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
print(y[:3])

(3116945, 21) (3116945,)
[0 1 0]


## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Using XGBoost with optuna</p> 

In [None]:
def model_report(estimator, X, y, cv=5):
    print("="*80)
    print(f"    Model: {estimator.__class__.__name__}")
    print("="*80)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=1/cv, shuffle=True, stratify=y, random_state=42)
    
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    mcc = matthews_corrcoef(y_test, y_pred)
    print(f"F1 Score : {f1.mean():.6f}")
    print(f"MCC Score: {mcc.mean():.6f}")
    
    ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()
    plt.title("Confusion Matrix")
    plt.show()

    print()

In [None]:
# xgb_clf = XGBClassifier(enable_categorical=True, device="cuda", tree_method="hist")

# model_report(xgb_clf, X, y)

# cat_clf = CatBoostClassifier(
#     cat_features=cat_feats,
#     verbose=False,
#     allow_writing_files=False,
#     task_type="GPU"
# )

# model_report(cat_clf, X, y)

# lgb_clf = LGBMClassifier(device='cpu', verbosity=-1)

# model_report(lgb_clf, X, y)

In [7]:
# Callback to print the MCC score for each trial
def print_mcc_callback(study, trial):
    mcc = trial.user_attrs["mcc"]
    print(f"Trial {trial.number}: MCC = {mcc}")

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4885)

# Define the objective function for Optuna
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 2000, 3000),
        "eta": trial.suggest_float("eta", 1e-3, 1e-2),
        "gamma": trial.suggest_float("gamma", 0, 5.0),
        "max_depth": trial.suggest_int("max_depth", 2, 32),
        "min_child_weight": trial.suggest_int("min_child_weight", 40, 100),
        "subsample": trial.suggest_float("subsample", 0.1, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
        "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"]),
        "max_leaves": trial.suggest_int("max_leaves", 16, 84)
    }
    params['device'] = 'cuda'
    params['tree_method'] = 'hist'
    params['enable_categorical'] = True

    model = XGBClassifier(**params, use_label_encoder=False, eval_metric='logloss', enable_categorical=True, early_stopping_rounds=30) # , 
    # Fit the model with early stopping
    model.fit(
        X_train, y_train,
        eval_set=[(X_test, y_test)],  # Validation set
        verbose=False                 # Suppress output
    )
    
    y_pred = model.predict(X_test)
    mcc = matthews_corrcoef(y_test, y_pred)
    trial.set_user_attr("mcc", mcc)
    return mcc



storage = "sqlite:///xgb.db"

# Optimize hyperparameters with Optuna
study = optuna.create_study(storage=storage,
                            direction='maximize',
                           )
study.optimize(objective, n_trials=100, callbacks=[print_mcc_callback])

# Get the best parameters
best_params = study.best_params
print(f"Best parameters: {best_params}")

[I 2024-08-29 20:34:44,837] A new study created in RDB with name: no-name-7e7d1661-eab7-4415-9029-a3635be359b3
[W 2024-08-29 20:34:44,877] Trial 0 failed with parameters: {'n_estimators': 2594, 'eta': 0.0025406968406256765, 'gamma': 1.738599598773523, 'max_depth': 21, 'min_child_weight': 61, 'subsample': 0.8579609993964666, 'colsample_bytree': 0.727496200357238, 'grow_policy': 'depthwise', 'max_leaves': 68} because of the following error: TypeError("xgboost.sklearn.XGBClassifier() got multiple values for keyword argument 'enable_categorical'").
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_4091/1659923731.py", line 20, in objective
    model = XGBClassifier(**params, use_label_encoder=False, eval_metric='logloss', enable_categorical=True, early_stopping_rounds=30) # ,
TypeError: xgboost.sklearn.XGBClassifier() got multiple values for keyword a

TypeError: xgboost.sklearn.XGBClassifier() got multiple values for keyword argument 'enable_categorical'

In [None]:
def objective(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 100, 3000),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 1e-1, log=True),
        "depth": trial.suggest_int("depth", 4, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-8, 100.0, log=True),
        "bootstrap_type": trial.suggest_categorical("bootstrap_type", ["Bayesian"]),
        "random_strength": trial.suggest_float("random_strength", 1e-8, 10.0, log=True),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 10.0),
        "od_type": trial.suggest_categorical("od_type", ["IncToDec", "Iter"]),
        "od_wait": trial.suggest_int("od_wait", 10, 50),
        "verbose": False,
        "allow_writing_files": False,
        "task_type": 'GPU',
        "cat_features": cat_feats
    }

    model = CatBoostClassifier(**params)

    model.fit(
        X_train, y_train,
        eval_set=[(X_test, y_test)],  # Validation set # ,
        verbose=False                 # Suppress output
    )
    y_pred = model.predict(X_test)
    mcc = matthews_corrcoef(y_test, y_pred)
    trial.set_user_attr("mcc", mcc)
    return mcc


study_name = "cat"
storage = "sqlite:///cat.db"

study = optuna.create_study(storage=storage,
                            study_name=study_name,
                            direction="maximize",
                            sampler=TPESampler(n_startup_trials=20, multivariate=True),
                            load_if_exists=True)

study.optimize(objective, n_trials=50, callbacks=[print_mcc_callback])

print(study.best_params)

In [9]:
def objective(trial):
    params = {
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "device": 'cpu',
        "verbosity": -1
    }

    model = LGBMClassifier(**params) # , early_stopping_rounds=30

    model.fit(
        X_train, y_train,
        eval_set=[(X_test, y_test)],  # Validation set
        # verbose=False                 # Suppress output
    )
    y_pred = model.predict(X_test)
    mcc = matthews_corrcoef(y_test, y_pred)
    trial.set_user_attr("mcc", mcc)
    return mcc


study_name = "lgb"
storage = "sqlite:///lgb.db"

study = optuna.create_study(storage=storage,
                            study_name=study_name,
                            direction="maximize",
                            sampler=TPESampler(n_startup_trials=20, multivariate=True),
                            load_if_exists=True)

study.optimize(objective, n_trials=100, callbacks=[print_mcc_callback]) # n_trials=100

print(study.best_params)

[I 2024-08-29 20:34:45,700] Using an existing study with name 'lgb' instead of creating a new one.
[I 2024-08-29 20:34:52,678] Trial 340 finished with value: 0.9845473359853243 and parameters: {'lambda_l1': 8.581056151436107e-06, 'lambda_l2': 0.0042026986062033145, 'num_leaves': 256, 'feature_fraction': 0.5166864805789477, 'bagging_fraction': 0.9139811568384277, 'bagging_freq': 2, 'min_child_samples': 77}. Best is trial 76 with value: 0.9848032057376603.


Trial 340: MCC = 0.9845473359853243


[I 2024-08-29 20:34:58,427] Trial 341 finished with value: 0.9845313275394644 and parameters: {'lambda_l1': 2.699104258553944e-08, 'lambda_l2': 0.0003826130404363176, 'num_leaves': 254, 'feature_fraction': 0.51691133545474, 'bagging_fraction': 0.8824545218100632, 'bagging_freq': 7, 'min_child_samples': 64}. Best is trial 76 with value: 0.9848032057376603.


Trial 341: MCC = 0.9845313275394644


[I 2024-08-29 20:35:03,352] Trial 342 finished with value: 0.9844796817833903 and parameters: {'lambda_l1': 1.2945072782857798e-05, 'lambda_l2': 0.005522377790588016, 'num_leaves': 234, 'feature_fraction': 0.43511708585154973, 'bagging_fraction': 0.8935195536812587, 'bagging_freq': 1, 'min_child_samples': 75}. Best is trial 76 with value: 0.9848032057376603.


Trial 342: MCC = 0.9844796817833903


[I 2024-08-29 20:35:07,852] Trial 343 finished with value: 0.9843305404690139 and parameters: {'lambda_l1': 2.703906364972457e-06, 'lambda_l2': 0.09880308435859902, 'num_leaves': 186, 'feature_fraction': 0.5252196427008966, 'bagging_fraction': 0.5786447544612155, 'bagging_freq': 3, 'min_child_samples': 20}. Best is trial 76 with value: 0.9848032057376603.


Trial 343: MCC = 0.9843305404690139


[I 2024-08-29 20:35:12,864] Trial 344 finished with value: 0.9845476508076788 and parameters: {'lambda_l1': 1.743315613661371e-06, 'lambda_l2': 0.006041854300851777, 'num_leaves': 256, 'feature_fraction': 0.4311349849367054, 'bagging_fraction': 0.8280937215070412, 'bagging_freq': 1, 'min_child_samples': 80}. Best is trial 76 with value: 0.9848032057376603.


Trial 344: MCC = 0.9845476508076788


[I 2024-08-29 20:35:15,879] Trial 345 finished with value: 0.9809437543534408 and parameters: {'lambda_l1': 1.6308745826938899e-07, 'lambda_l2': 0.05882320213766302, 'num_leaves': 30, 'feature_fraction': 0.44251156977233047, 'bagging_fraction': 0.44783177844357275, 'bagging_freq': 3, 'min_child_samples': 20}. Best is trial 76 with value: 0.9848032057376603.


Trial 345: MCC = 0.9809437543534408


[I 2024-08-29 20:35:21,408] Trial 346 finished with value: 0.9845736547596162 and parameters: {'lambda_l1': 4.912643084425993e-07, 'lambda_l2': 1.7160407598490521, 'num_leaves': 256, 'feature_fraction': 0.44931552495275623, 'bagging_fraction': 0.8535141437298841, 'bagging_freq': 3, 'min_child_samples': 99}. Best is trial 76 with value: 0.9848032057376603.


Trial 346: MCC = 0.9845736547596162


[I 2024-08-29 20:35:26,418] Trial 347 finished with value: 0.9844763701445519 and parameters: {'lambda_l1': 3.2948306749713626e-05, 'lambda_l2': 1.3834123195863965, 'num_leaves': 253, 'feature_fraction': 0.43373516762837805, 'bagging_fraction': 0.9333797788727547, 'bagging_freq': 1, 'min_child_samples': 74}. Best is trial 76 with value: 0.9848032057376603.


Trial 347: MCC = 0.9844763701445519


[I 2024-08-29 20:35:31,893] Trial 348 finished with value: 0.9846316474968875 and parameters: {'lambda_l1': 1.645412630968679e-07, 'lambda_l2': 2.71189593659815e-05, 'num_leaves': 256, 'feature_fraction': 0.5122160205700329, 'bagging_fraction': 0.7896357960343467, 'bagging_freq': 3, 'min_child_samples': 100}. Best is trial 76 with value: 0.9848032057376603.


Trial 348: MCC = 0.9846316474968875


[I 2024-08-29 20:35:38,026] Trial 349 finished with value: 0.9845894500647441 and parameters: {'lambda_l1': 4.3046305649676485e-06, 'lambda_l2': 0.3688440151073032, 'num_leaves': 251, 'feature_fraction': 0.5067617912835194, 'bagging_fraction': 0.9766000288828561, 'bagging_freq': 5, 'min_child_samples': 85}. Best is trial 76 with value: 0.9848032057376603.


Trial 349: MCC = 0.9845894500647441


[I 2024-08-29 20:35:43,449] Trial 350 finished with value: 0.984544617883224 and parameters: {'lambda_l1': 8.200222502246806e-08, 'lambda_l2': 0.003287480160505079, 'num_leaves': 224, 'feature_fraction': 0.4078397880895625, 'bagging_fraction': 0.908134383955159, 'bagging_freq': 6, 'min_child_samples': 100}. Best is trial 76 with value: 0.9848032057376603.


Trial 350: MCC = 0.984544617883224


[I 2024-08-29 20:35:48,640] Trial 351 finished with value: 0.9845702153482274 and parameters: {'lambda_l1': 3.6311941059874716e-06, 'lambda_l2': 0.0014887372876518515, 'num_leaves': 250, 'feature_fraction': 0.5140384074662567, 'bagging_fraction': 0.9543171133977658, 'bagging_freq': 1, 'min_child_samples': 78}. Best is trial 76 with value: 0.9848032057376603.


Trial 351: MCC = 0.9845702153482274


[I 2024-08-29 20:35:52,057] Trial 352 finished with value: 0.9793816261746023 and parameters: {'lambda_l1': 0.08429254304794603, 'lambda_l2': 4.816180952638725, 'num_leaves': 23, 'feature_fraction': 0.5178324710150769, 'bagging_fraction': 0.8544185963234353, 'bagging_freq': 1, 'min_child_samples': 85}. Best is trial 76 with value: 0.9848032057376603.


Trial 352: MCC = 0.9793816261746023


[I 2024-08-29 20:35:57,835] Trial 353 finished with value: 0.9845088908789122 and parameters: {'lambda_l1': 1.123857241396269e-06, 'lambda_l2': 4.226546127941703, 'num_leaves': 231, 'feature_fraction': 0.4068007977978154, 'bagging_fraction': 0.9707356564011407, 'bagging_freq': 5, 'min_child_samples': 100}. Best is trial 76 with value: 0.9848032057376603.


Trial 353: MCC = 0.9845088908789122


[I 2024-08-29 20:36:02,951] Trial 354 finished with value: 0.9845217964881512 and parameters: {'lambda_l1': 1.6031853443700804e-05, 'lambda_l2': 0.10614516225989071, 'num_leaves': 247, 'feature_fraction': 0.4263339466289675, 'bagging_fraction': 0.9352251372639763, 'bagging_freq': 1, 'min_child_samples': 84}. Best is trial 76 with value: 0.9848032057376603.


Trial 354: MCC = 0.9845217964881512


[I 2024-08-29 20:36:08,927] Trial 355 finished with value: 0.9845866462769545 and parameters: {'lambda_l1': 1.881629917460618e-06, 'lambda_l2': 2.824539552333976e-05, 'num_leaves': 251, 'feature_fraction': 0.4216182663592092, 'bagging_fraction': 0.95037783510973, 'bagging_freq': 2, 'min_child_samples': 92}. Best is trial 76 with value: 0.9848032057376603.


Trial 355: MCC = 0.9845866462769545


[I 2024-08-29 20:36:14,489] Trial 356 finished with value: 0.9845474398953746 and parameters: {'lambda_l1': 0.2346837973870895, 'lambda_l2': 0.12013218308997932, 'num_leaves': 213, 'feature_fraction': 0.5256524156898656, 'bagging_fraction': 0.8502116414461044, 'bagging_freq': 5, 'min_child_samples': 77}. Best is trial 76 with value: 0.9848032057376603.


Trial 356: MCC = 0.9845474398953746


[I 2024-08-29 20:36:20,481] Trial 357 finished with value: 0.9845249802577212 and parameters: {'lambda_l1': 8.036490021245645e-07, 'lambda_l2': 0.6122726405288024, 'num_leaves': 227, 'feature_fraction': 0.42614785966553154, 'bagging_fraction': 0.9761881005649133, 'bagging_freq': 3, 'min_child_samples': 75}. Best is trial 76 with value: 0.9848032057376603.


Trial 357: MCC = 0.9845249802577212


[I 2024-08-29 20:36:26,157] Trial 358 finished with value: 0.9845511355596365 and parameters: {'lambda_l1': 2.5228554270435555e-06, 'lambda_l2': 0.011969397067389807, 'num_leaves': 233, 'feature_fraction': 0.407708389271061, 'bagging_fraction': 0.9454957051968388, 'bagging_freq': 6, 'min_child_samples': 74}. Best is trial 76 with value: 0.9848032057376603.


Trial 358: MCC = 0.9845511355596365


[I 2024-08-29 20:36:31,282] Trial 359 finished with value: 0.9846544019865294 and parameters: {'lambda_l1': 2.8464885177045602e-06, 'lambda_l2': 0.23443445437997562, 'num_leaves': 248, 'feature_fraction': 0.48294793014826076, 'bagging_fraction': 0.9552386382037045, 'bagging_freq': 1, 'min_child_samples': 90}. Best is trial 76 with value: 0.9848032057376603.


Trial 359: MCC = 0.9846544019865294


[I 2024-08-29 20:36:36,706] Trial 360 finished with value: 0.9846057503433846 and parameters: {'lambda_l1': 1.3739539315380856e-08, 'lambda_l2': 0.15667933698494851, 'num_leaves': 256, 'feature_fraction': 0.4725801797789121, 'bagging_fraction': 0.8341243095323019, 'bagging_freq': 6, 'min_child_samples': 88}. Best is trial 76 with value: 0.9848032057376603.


Trial 360: MCC = 0.9846057503433846


[I 2024-08-29 20:36:42,387] Trial 361 finished with value: 0.9845474817480033 and parameters: {'lambda_l1': 3.4235403883617956e-05, 'lambda_l2': 2.830218151988718, 'num_leaves': 227, 'feature_fraction': 0.5313530339482928, 'bagging_fraction': 0.8525174968604199, 'bagging_freq': 3, 'min_child_samples': 79}. Best is trial 76 with value: 0.9848032057376603.


Trial 361: MCC = 0.9845474817480033


[I 2024-08-29 20:36:48,035] Trial 362 finished with value: 0.9846476364641692 and parameters: {'lambda_l1': 6.587597500608521e-07, 'lambda_l2': 0.01897466290476621, 'num_leaves': 255, 'feature_fraction': 0.45728571904852144, 'bagging_fraction': 0.8878648281616442, 'bagging_freq': 5, 'min_child_samples': 99}. Best is trial 76 with value: 0.9848032057376603.


Trial 362: MCC = 0.9846476364641692


[I 2024-08-29 20:36:54,082] Trial 363 finished with value: 0.9838701856827449 and parameters: {'lambda_l1': 1.914265204123846e-06, 'lambda_l2': 3.4377105070114364, 'num_leaves': 123, 'feature_fraction': 0.8950569510155468, 'bagging_fraction': 0.9703201749940811, 'bagging_freq': 4, 'min_child_samples': 56}. Best is trial 76 with value: 0.9848032057376603.


Trial 363: MCC = 0.9838701856827449


[I 2024-08-29 20:36:58,975] Trial 364 finished with value: 0.9844601104320193 and parameters: {'lambda_l1': 0.0019607079817516243, 'lambda_l2': 0.03872802053188707, 'num_leaves': 183, 'feature_fraction': 0.512160371372433, 'bagging_fraction': 0.8995013016564635, 'bagging_freq': 1, 'min_child_samples': 64}. Best is trial 76 with value: 0.9848032057376603.


Trial 364: MCC = 0.9844601104320193


[I 2024-08-29 20:37:04,800] Trial 365 finished with value: 0.9845087203118399 and parameters: {'lambda_l1': 5.2460341195607516e-08, 'lambda_l2': 0.011142395050611785, 'num_leaves': 254, 'feature_fraction': 0.4405636756492749, 'bagging_fraction': 0.9555578860034328, 'bagging_freq': 4, 'min_child_samples': 90}. Best is trial 76 with value: 0.9848032057376603.


Trial 365: MCC = 0.9845087203118399


[I 2024-08-29 20:37:09,923] Trial 366 finished with value: 0.9845184423010852 and parameters: {'lambda_l1': 2.6095277173135463e-05, 'lambda_l2': 0.0019132885845563892, 'num_leaves': 250, 'feature_fraction': 0.5141022928527845, 'bagging_fraction': 0.785948150333632, 'bagging_freq': 1, 'min_child_samples': 48}. Best is trial 76 with value: 0.9848032057376603.


Trial 366: MCC = 0.9845184423010852


[I 2024-08-29 20:37:14,368] Trial 367 finished with value: 0.9839419140479098 and parameters: {'lambda_l1': 1.551878152327308e-08, 'lambda_l2': 6.7030349767882e-06, 'num_leaves': 84, 'feature_fraction': 0.6271105631190615, 'bagging_fraction': 0.5912046045547725, 'bagging_freq': 2, 'min_child_samples': 78}. Best is trial 76 with value: 0.9848032057376603.


Trial 367: MCC = 0.9839419140479098


[I 2024-08-29 20:37:17,729] Trial 368 finished with value: 0.9606818743867975 and parameters: {'lambda_l1': 1.5747951903064878e-07, 'lambda_l2': 1.4230072902405393e-07, 'num_leaves': 11, 'feature_fraction': 0.9223357675833308, 'bagging_fraction': 0.9484151285290346, 'bagging_freq': 4, 'min_child_samples': 7}. Best is trial 76 with value: 0.9848032057376603.


Trial 368: MCC = 0.9606818743867975


[I 2024-08-29 20:37:23,253] Trial 369 finished with value: 0.9844408148910024 and parameters: {'lambda_l1': 4.697751127350763e-07, 'lambda_l2': 1.1041850366397703e-05, 'num_leaves': 195, 'feature_fraction': 0.43591938335052355, 'bagging_fraction': 0.9201766091184349, 'bagging_freq': 4, 'min_child_samples': 75}. Best is trial 76 with value: 0.9848032057376603.


Trial 369: MCC = 0.9844408148910024


[I 2024-08-29 20:37:28,685] Trial 370 finished with value: 0.9846189744216275 and parameters: {'lambda_l1': 1.9147682591579955e-08, 'lambda_l2': 3.019475524072506e-05, 'num_leaves': 254, 'feature_fraction': 0.4065691652572479, 'bagging_fraction': 0.8682903509686162, 'bagging_freq': 6, 'min_child_samples': 73}. Best is trial 76 with value: 0.9848032057376603.


Trial 370: MCC = 0.9846189744216275


[I 2024-08-29 20:37:34,627] Trial 371 finished with value: 0.9845798951682236 and parameters: {'lambda_l1': 1.2389690019435133e-07, 'lambda_l2': 0.010161170330009687, 'num_leaves': 252, 'feature_fraction': 0.48014793043449566, 'bagging_fraction': 0.8982727914174852, 'bagging_freq': 2, 'min_child_samples': 46}. Best is trial 76 with value: 0.9848032057376603.


Trial 371: MCC = 0.9845798951682236


[I 2024-08-29 20:37:40,542] Trial 372 finished with value: 0.9844473105080908 and parameters: {'lambda_l1': 2.243585184246142e-08, 'lambda_l2': 0.02915294346433082, 'num_leaves': 177, 'feature_fraction': 0.44823014004361145, 'bagging_fraction': 0.9999070852661369, 'bagging_freq': 2, 'min_child_samples': 79}. Best is trial 76 with value: 0.9848032057376603.


Trial 372: MCC = 0.9844473105080908


[I 2024-08-29 20:37:46,113] Trial 373 finished with value: 0.9845607709573159 and parameters: {'lambda_l1': 7.433229567557245e-05, 'lambda_l2': 3.9699919060557267, 'num_leaves': 254, 'feature_fraction': 0.43469032482534314, 'bagging_fraction': 0.8194359693399225, 'bagging_freq': 2, 'min_child_samples': 79}. Best is trial 76 with value: 0.9848032057376603.


Trial 373: MCC = 0.9845607709573159


[I 2024-08-29 20:37:51,750] Trial 374 finished with value: 0.9844960503796508 and parameters: {'lambda_l1': 1.1986419165445364e-06, 'lambda_l2': 0.008865727331252392, 'num_leaves': 251, 'feature_fraction': 0.41008224457920384, 'bagging_fraction': 0.889841215623667, 'bagging_freq': 3, 'min_child_samples': 49}. Best is trial 76 with value: 0.9848032057376603.


Trial 374: MCC = 0.9844960503796508


[I 2024-08-29 20:37:55,716] Trial 375 finished with value: 0.9840326173868188 and parameters: {'lambda_l1': 2.6332870051590742e-05, 'lambda_l2': 5.717810177260649e-06, 'num_leaves': 123, 'feature_fraction': 0.5088456019819323, 'bagging_fraction': 0.42783350602503345, 'bagging_freq': 2, 'min_child_samples': 82}. Best is trial 76 with value: 0.9848032057376603.


Trial 375: MCC = 0.9840326173868188


[I 2024-08-29 20:38:00,817] Trial 376 finished with value: 0.9845865817750812 and parameters: {'lambda_l1': 2.3335940418991376e-06, 'lambda_l2': 1.5948535647592994, 'num_leaves': 247, 'feature_fraction': 0.44557894059128317, 'bagging_fraction': 0.9785930856833807, 'bagging_freq': 1, 'min_child_samples': 97}. Best is trial 76 with value: 0.9848032057376603.


Trial 376: MCC = 0.9845865817750812


[I 2024-08-29 20:38:07,020] Trial 377 finished with value: 0.9845607062847559 and parameters: {'lambda_l1': 9.736875261169553e-06, 'lambda_l2': 0.017118261615043928, 'num_leaves': 239, 'feature_fraction': 0.42153069369486423, 'bagging_fraction': 0.9705262904717014, 'bagging_freq': 2, 'min_child_samples': 81}. Best is trial 76 with value: 0.9848032057376603.


Trial 377: MCC = 0.9845607062847559


[I 2024-08-29 20:38:12,342] Trial 378 finished with value: 0.9846021936718649 and parameters: {'lambda_l1': 2.8342318136389503e-05, 'lambda_l2': 0.37105336252893917, 'num_leaves': 252, 'feature_fraction': 0.5757115706343506, 'bagging_fraction': 0.8687187166195729, 'bagging_freq': 1, 'min_child_samples': 76}. Best is trial 76 with value: 0.9848032057376603.


Trial 378: MCC = 0.9846021936718649


[I 2024-08-29 20:38:18,228] Trial 379 finished with value: 0.9845122685153778 and parameters: {'lambda_l1': 4.4152891807540393e-08, 'lambda_l2': 0.0040427682461457925, 'num_leaves': 254, 'feature_fraction': 0.4400811537807242, 'bagging_fraction': 0.9309991308797372, 'bagging_freq': 2, 'min_child_samples': 79}. Best is trial 76 with value: 0.9848032057376603.


Trial 379: MCC = 0.9845122685153778


[I 2024-08-29 20:38:20,518] Trial 380 finished with value: 0.8609580732082158 and parameters: {'lambda_l1': 0.1086027295909026, 'lambda_l2': 0.07199180573854753, 'num_leaves': 5, 'feature_fraction': 0.6803908435159389, 'bagging_fraction': 0.6453388501098557, 'bagging_freq': 7, 'min_child_samples': 26}. Best is trial 76 with value: 0.9848032057376603.


Trial 380: MCC = 0.8609580732082158


[I 2024-08-29 20:38:24,572] Trial 381 finished with value: 0.983951408409959 and parameters: {'lambda_l1': 0.32534356036192635, 'lambda_l2': 8.155584048095734e-07, 'num_leaves': 123, 'feature_fraction': 0.8311662609322046, 'bagging_fraction': 0.5253700881263693, 'bagging_freq': 5, 'min_child_samples': 66}. Best is trial 76 with value: 0.9848032057376603.


Trial 381: MCC = 0.983951408409959


[I 2024-08-29 20:38:31,229] Trial 382 finished with value: 0.9846992685195626 and parameters: {'lambda_l1': 1.4995319248133883e-05, 'lambda_l2': 0.006014194122358537, 'num_leaves': 254, 'feature_fraction': 0.5483541411090429, 'bagging_fraction': 0.9574641762527281, 'bagging_freq': 2, 'min_child_samples': 96}. Best is trial 76 with value: 0.9848032057376603.


Trial 382: MCC = 0.9846992685195626


[I 2024-08-29 20:38:36,125] Trial 383 finished with value: 0.9841256818071884 and parameters: {'lambda_l1': 0.02101526962254776, 'lambda_l2': 9.415896191625593e-05, 'num_leaves': 146, 'feature_fraction': 0.7520550411331534, 'bagging_fraction': 0.7371582111804834, 'bagging_freq': 7, 'min_child_samples': 86}. Best is trial 76 with value: 0.9848032057376603.


Trial 383: MCC = 0.9841256818071884


[I 2024-08-29 20:38:41,342] Trial 384 finished with value: 0.9845674413917114 and parameters: {'lambda_l1': 7.36634861101162e-05, 'lambda_l2': 0.03745710119797788, 'num_leaves': 253, 'feature_fraction': 0.40355995432467767, 'bagging_fraction': 0.7953164476677914, 'bagging_freq': 3, 'min_child_samples': 85}. Best is trial 76 with value: 0.9848032057376603.


Trial 384: MCC = 0.9845674413917114


[I 2024-08-29 20:38:45,702] Trial 385 finished with value: 0.9840683844082009 and parameters: {'lambda_l1': 0.002768021285630397, 'lambda_l2': 0.0104627698261772, 'num_leaves': 102, 'feature_fraction': 0.6297487594333083, 'bagging_fraction': 0.5613597701042454, 'bagging_freq': 2, 'min_child_samples': 47}. Best is trial 76 with value: 0.9848032057376603.


Trial 385: MCC = 0.9840683844082009


[I 2024-08-29 20:38:50,731] Trial 386 finished with value: 0.9845088052655643 and parameters: {'lambda_l1': 1.390183465218871e-05, 'lambda_l2': 0.015808754213108296, 'num_leaves': 243, 'feature_fraction': 0.4057344693303134, 'bagging_fraction': 0.8523118363251634, 'bagging_freq': 1, 'min_child_samples': 87}. Best is trial 76 with value: 0.9848032057376603.


Trial 386: MCC = 0.9845088052655643


[I 2024-08-29 20:38:53,018] Trial 387 finished with value: 0.9366962442326289 and parameters: {'lambda_l1': 0.0028577332325510386, 'lambda_l2': 4.7358258855833675e-08, 'num_leaves': 8, 'feature_fraction': 0.6822559452667996, 'bagging_fraction': 0.4028181596401247, 'bagging_freq': 5, 'min_child_samples': 78}. Best is trial 76 with value: 0.9848032057376603.


Trial 387: MCC = 0.9366962442326289


[I 2024-08-29 20:38:58,642] Trial 388 finished with value: 0.9845148614288138 and parameters: {'lambda_l1': 1.3811272396205082e-08, 'lambda_l2': 0.0036369473644438928, 'num_leaves': 219, 'feature_fraction': 0.5042680109852155, 'bagging_fraction': 0.9043836485423701, 'bagging_freq': 6, 'min_child_samples': 82}. Best is trial 76 with value: 0.9848032057376603.


Trial 388: MCC = 0.9845148614288138


[I 2024-08-29 20:39:05,405] Trial 389 finished with value: 0.9846928548211555 and parameters: {'lambda_l1': 8.300977515205005e-07, 'lambda_l2': 0.8920081395560706, 'num_leaves': 256, 'feature_fraction': 0.554674934315509, 'bagging_fraction': 0.9967848977520313, 'bagging_freq': 3, 'min_child_samples': 88}. Best is trial 76 with value: 0.9848032057376603.


Trial 389: MCC = 0.9846928548211555


[I 2024-08-29 20:39:10,979] Trial 390 finished with value: 0.9840093561803254 and parameters: {'lambda_l1': 2.5971628516508277e-07, 'lambda_l2': 0.00020118333616375825, 'num_leaves': 176, 'feature_fraction': 0.8393823937358962, 'bagging_fraction': 0.88040950634312, 'bagging_freq': 6, 'min_child_samples': 49}. Best is trial 76 with value: 0.9848032057376603.


Trial 390: MCC = 0.9840093561803254


[I 2024-08-29 20:39:14,864] Trial 391 finished with value: 0.9840459717447823 and parameters: {'lambda_l1': 5.011786940374126e-07, 'lambda_l2': 1.0480151033480826e-05, 'num_leaves': 91, 'feature_fraction': 0.5138797375348315, 'bagging_fraction': 0.6102528347407725, 'bagging_freq': 7, 'min_child_samples': 23}. Best is trial 76 with value: 0.9848032057376603.


Trial 391: MCC = 0.9840459717447823


[I 2024-08-29 20:39:20,364] Trial 392 finished with value: 0.9845864327137398 and parameters: {'lambda_l1': 0.15503825851010328, 'lambda_l2': 4.14747608011415, 'num_leaves': 235, 'feature_fraction': 0.48726014624697145, 'bagging_fraction': 0.8023795378249119, 'bagging_freq': 2, 'min_child_samples': 64}. Best is trial 76 with value: 0.9848032057376603.


Trial 392: MCC = 0.9845864327137398


[I 2024-08-29 20:39:26,108] Trial 393 finished with value: 0.9845862025563028 and parameters: {'lambda_l1': 0.21762809154122884, 'lambda_l2': 0.0034358608903730004, 'num_leaves': 222, 'feature_fraction': 0.4940397314193095, 'bagging_fraction': 0.9477977493112981, 'bagging_freq': 6, 'min_child_samples': 81}. Best is trial 76 with value: 0.9848032057376603.


Trial 393: MCC = 0.9845862025563028


[I 2024-08-29 20:39:31,518] Trial 394 finished with value: 0.9841879178393379 and parameters: {'lambda_l1': 9.81739533280264e-07, 'lambda_l2': 0.01755504277990852, 'num_leaves': 245, 'feature_fraction': 0.735737604125774, 'bagging_fraction': 0.4715810239465953, 'bagging_freq': 1, 'min_child_samples': 89}. Best is trial 76 with value: 0.9848032057376603.


Trial 394: MCC = 0.9841879178393379


[I 2024-08-29 20:39:36,501] Trial 395 finished with value: 0.9844636142983496 and parameters: {'lambda_l1': 0.015902664542384884, 'lambda_l2': 0.1389039186216791, 'num_leaves': 234, 'feature_fraction': 0.4474344142734872, 'bagging_fraction': 0.9436760329285294, 'bagging_freq': 1, 'min_child_samples': 66}. Best is trial 76 with value: 0.9848032057376603.


Trial 395: MCC = 0.9844636142983496


[I 2024-08-29 20:39:42,356] Trial 396 finished with value: 0.9845344080610646 and parameters: {'lambda_l1': 1.0620669430184776e-06, 'lambda_l2': 0.02103198484792231, 'num_leaves': 251, 'feature_fraction': 0.4545706809213407, 'bagging_fraction': 0.85440374141884, 'bagging_freq': 2, 'min_child_samples': 82}. Best is trial 76 with value: 0.9848032057376603.


Trial 396: MCC = 0.9845344080610646


[I 2024-08-29 20:39:46,998] Trial 397 finished with value: 0.982343177613625 and parameters: {'lambda_l1': 2.3246699116493267e-06, 'lambda_l2': 0.001280281789618053, 'num_leaves': 42, 'feature_fraction': 0.9359007354203198, 'bagging_fraction': 0.7880164328911932, 'bagging_freq': 2, 'min_child_samples': 10}. Best is trial 76 with value: 0.9848032057376603.


Trial 397: MCC = 0.982343177613625


[I 2024-08-29 20:39:53,233] Trial 398 finished with value: 0.9842338334741865 and parameters: {'lambda_l1': 0.0003152285602769563, 'lambda_l2': 0.0003511915176102852, 'num_leaves': 255, 'feature_fraction': 0.6618890824859188, 'bagging_fraction': 0.9057686292607974, 'bagging_freq': 6, 'min_child_samples': 15}. Best is trial 76 with value: 0.9848032057376603.


Trial 398: MCC = 0.9842338334741865


[I 2024-08-29 20:39:59,060] Trial 399 finished with value: 0.9845178857197482 and parameters: {'lambda_l1': 6.280002795922061e-07, 'lambda_l2': 0.004175012049288304, 'num_leaves': 230, 'feature_fraction': 0.553976542417736, 'bagging_fraction': 0.8081656764494719, 'bagging_freq': 3, 'min_child_samples': 99}. Best is trial 76 with value: 0.9848032057376603.


Trial 399: MCC = 0.9845178857197482


[I 2024-08-29 20:40:05,153] Trial 400 finished with value: 0.9845478658427974 and parameters: {'lambda_l1': 1.6575569508991556e-07, 'lambda_l2': 0.06086428205053765, 'num_leaves': 255, 'feature_fraction': 0.4146933408126594, 'bagging_fraction': 0.9463405574192721, 'bagging_freq': 3, 'min_child_samples': 66}. Best is trial 76 with value: 0.9848032057376603.


Trial 400: MCC = 0.9845478658427974


[I 2024-08-29 20:40:10,983] Trial 401 finished with value: 0.9845250872254319 and parameters: {'lambda_l1': 2.431012613649693e-07, 'lambda_l2': 0.0033376684594362328, 'num_leaves': 253, 'feature_fraction': 0.41503137353705866, 'bagging_fraction': 0.9598956535464279, 'bagging_freq': 5, 'min_child_samples': 91}. Best is trial 76 with value: 0.9848032057376603.


Trial 401: MCC = 0.9845250872254319


[I 2024-08-29 20:40:17,397] Trial 402 finished with value: 0.9844955621896361 and parameters: {'lambda_l1': 4.589175717532012e-05, 'lambda_l2': 1.853475645019834, 'num_leaves': 217, 'feature_fraction': 0.669375513815949, 'bagging_fraction': 0.88019967639218, 'bagging_freq': 4, 'min_child_samples': 21}. Best is trial 76 with value: 0.9848032057376603.


Trial 402: MCC = 0.9844955621896361


[I 2024-08-29 20:40:22,250] Trial 403 finished with value: 0.984476584373813 and parameters: {'lambda_l1': 4.2239713667879337e-07, 'lambda_l2': 4.317470441228411, 'num_leaves': 196, 'feature_fraction': 0.4049788355435358, 'bagging_fraction': 0.908639906971425, 'bagging_freq': 1, 'min_child_samples': 78}. Best is trial 76 with value: 0.9848032057376603.


Trial 403: MCC = 0.984476584373813


[I 2024-08-29 20:40:28,614] Trial 404 finished with value: 0.9844694797066259 and parameters: {'lambda_l1': 7.626201510424145e-07, 'lambda_l2': 2.1828014163677174e-05, 'num_leaves': 228, 'feature_fraction': 0.6081686514893657, 'bagging_fraction': 0.9727880982864546, 'bagging_freq': 7, 'min_child_samples': 98}. Best is trial 76 with value: 0.9848032057376603.


Trial 404: MCC = 0.9844694797066259


[I 2024-08-29 20:40:34,229] Trial 405 finished with value: 0.9846058133256999 and parameters: {'lambda_l1': 7.435116689787826e-06, 'lambda_l2': 0.13713342654749547, 'num_leaves': 201, 'feature_fraction': 0.5092868230785731, 'bagging_fraction': 0.8854624836894346, 'bagging_freq': 3, 'min_child_samples': 98}. Best is trial 76 with value: 0.9848032057376603.


Trial 405: MCC = 0.9846058133256999


[I 2024-08-29 20:40:39,395] Trial 406 finished with value: 0.9830184471903877 and parameters: {'lambda_l1': 0.22893218428141998, 'lambda_l2': 2.2325244786732405e-05, 'num_leaves': 109, 'feature_fraction': 0.961241486013142, 'bagging_fraction': 0.8772556066705095, 'bagging_freq': 5, 'min_child_samples': 5}. Best is trial 76 with value: 0.9848032057376603.


Trial 406: MCC = 0.9830184471903877


[I 2024-08-29 20:40:44,438] Trial 407 finished with value: 0.98468679442963 and parameters: {'lambda_l1': 5.927025218105366e-08, 'lambda_l2': 0.000709284568040612, 'num_leaves': 237, 'feature_fraction': 0.4909652606011864, 'bagging_fraction': 0.8684848207729631, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 76 with value: 0.9848032057376603.


Trial 407: MCC = 0.98468679442963


[I 2024-08-29 20:40:49,473] Trial 408 finished with value: 0.9845637833601234 and parameters: {'lambda_l1': 6.698628410375189e-07, 'lambda_l2': 0.11496671679369572, 'num_leaves': 238, 'feature_fraction': 0.4084504892531202, 'bagging_fraction': 0.9683194285480796, 'bagging_freq': 1, 'min_child_samples': 74}. Best is trial 76 with value: 0.9848032057376603.


Trial 408: MCC = 0.9845637833601234


[I 2024-08-29 20:40:54,608] Trial 409 finished with value: 0.9845086570294307 and parameters: {'lambda_l1': 1.3092313527549884e-07, 'lambda_l2': 0.004881978000517114, 'num_leaves': 245, 'feature_fraction': 0.4404471192205905, 'bagging_fraction': 0.7682083942445092, 'bagging_freq': 6, 'min_child_samples': 97}. Best is trial 76 with value: 0.9848032057376603.


Trial 409: MCC = 0.9845086570294307


[I 2024-08-29 20:40:58,959] Trial 410 finished with value: 0.9838033974554977 and parameters: {'lambda_l1': 5.628493181916979e-08, 'lambda_l2': 6.019829226546359e-06, 'num_leaves': 73, 'feature_fraction': 0.5167864385404715, 'bagging_fraction': 0.7689625349171044, 'bagging_freq': 4, 'min_child_samples': 39}. Best is trial 76 with value: 0.9848032057376603.


Trial 410: MCC = 0.9838033974554977


[I 2024-08-29 20:41:05,038] Trial 411 finished with value: 0.9845253701649369 and parameters: {'lambda_l1': 8.267486686526925e-07, 'lambda_l2': 0.41784653486546885, 'num_leaves': 256, 'feature_fraction': 0.42420676639371335, 'bagging_fraction': 0.9234289278781483, 'bagging_freq': 2, 'min_child_samples': 94}. Best is trial 76 with value: 0.9848032057376603.


Trial 411: MCC = 0.9845253701649369


[I 2024-08-29 20:41:08,543] Trial 412 finished with value: 0.9800352832008856 and parameters: {'lambda_l1': 0.0013852320720361443, 'lambda_l2': 0.09562386741482085, 'num_leaves': 24, 'feature_fraction': 0.8998599351117822, 'bagging_fraction': 0.8952270990560631, 'bagging_freq': 1, 'min_child_samples': 92}. Best is trial 76 with value: 0.9848032057376603.


Trial 412: MCC = 0.9800352832008856


[I 2024-08-29 20:41:13,877] Trial 413 finished with value: 0.9845316911630488 and parameters: {'lambda_l1': 8.406065970872751e-08, 'lambda_l2': 0.29354161762240005, 'num_leaves': 205, 'feature_fraction': 0.4009982263408136, 'bagging_fraction': 0.8642244698810657, 'bagging_freq': 5, 'min_child_samples': 99}. Best is trial 76 with value: 0.9848032057376603.


Trial 413: MCC = 0.9845316911630488


[I 2024-08-29 20:41:17,902] Trial 414 finished with value: 0.9833103213349671 and parameters: {'lambda_l1': 0.037896372646775665, 'lambda_l2': 0.046974495668598425, 'num_leaves': 86, 'feature_fraction': 0.9347844011551509, 'bagging_fraction': 0.5994245652131083, 'bagging_freq': 5, 'min_child_samples': 84}. Best is trial 76 with value: 0.9848032057376603.


Trial 414: MCC = 0.9833103213349671


[I 2024-08-29 20:41:22,004] Trial 415 finished with value: 0.9826953592763197 and parameters: {'lambda_l1': 0.0030160407254932087, 'lambda_l2': 2.172000560544901e-06, 'num_leaves': 52, 'feature_fraction': 0.8208773976147913, 'bagging_fraction': 0.7352628733415404, 'bagging_freq': 7, 'min_child_samples': 5}. Best is trial 76 with value: 0.9848032057376603.


Trial 415: MCC = 0.9826953592763197


[I 2024-08-29 20:41:25,595] Trial 416 finished with value: 0.9764602300707735 and parameters: {'lambda_l1': 8.597424906464077, 'lambda_l2': 5.400108616221039e-08, 'num_leaves': 18, 'feature_fraction': 0.6369330534641172, 'bagging_fraction': 0.49119672399250947, 'bagging_freq': 1, 'min_child_samples': 48}. Best is trial 76 with value: 0.9848032057376603.


Trial 416: MCC = 0.9764602300707735


[I 2024-08-29 20:41:31,383] Trial 417 finished with value: 0.9845931637061112 and parameters: {'lambda_l1': 1.5651174781740292e-06, 'lambda_l2': 0.0018466023450297692, 'num_leaves': 222, 'feature_fraction': 0.4044331391699505, 'bagging_fraction': 0.9769194461713563, 'bagging_freq': 3, 'min_child_samples': 93}. Best is trial 76 with value: 0.9848032057376603.


Trial 417: MCC = 0.9845931637061112


[I 2024-08-29 20:41:37,266] Trial 418 finished with value: 0.9845543835545634 and parameters: {'lambda_l1': 1.2630450879541786e-07, 'lambda_l2': 1.0496686455047417, 'num_leaves': 252, 'feature_fraction': 0.4380757499048364, 'bagging_fraction': 0.9691596942764282, 'bagging_freq': 5, 'min_child_samples': 89}. Best is trial 76 with value: 0.9848032057376603.


Trial 418: MCC = 0.9845543835545634


[I 2024-08-29 20:41:42,408] Trial 419 finished with value: 0.9846347909427968 and parameters: {'lambda_l1': 1.4773799969486728e-06, 'lambda_l2': 0.022150979666025312, 'num_leaves': 236, 'feature_fraction': 0.4746493589858783, 'bagging_fraction': 0.8839268197650328, 'bagging_freq': 1, 'min_child_samples': 99}. Best is trial 76 with value: 0.9848032057376603.


Trial 419: MCC = 0.9846347909427968


[I 2024-08-29 20:41:47,443] Trial 420 finished with value: 0.9845089123853143 and parameters: {'lambda_l1': 5.555313028984458e-05, 'lambda_l2': 0.0020794829576152476, 'num_leaves': 234, 'feature_fraction': 0.42222271323212895, 'bagging_fraction': 0.9991226725230262, 'bagging_freq': 1, 'min_child_samples': 81}. Best is trial 76 with value: 0.9848032057376603.


Trial 420: MCC = 0.9845089123853143


[I 2024-08-29 20:41:52,804] Trial 421 finished with value: 0.9846184100191476 and parameters: {'lambda_l1': 4.269593528111387e-05, 'lambda_l2': 1.755005035421519e-05, 'num_leaves': 254, 'feature_fraction': 0.5872967233582095, 'bagging_fraction': 0.8712294811660276, 'bagging_freq': 1, 'min_child_samples': 73}. Best is trial 76 with value: 0.9848032057376603.


Trial 421: MCC = 0.9846184100191476


[I 2024-08-29 20:41:58,504] Trial 422 finished with value: 0.9846673505280003 and parameters: {'lambda_l1': 1.4329152000391125e-06, 'lambda_l2': 0.0036572828506187764, 'num_leaves': 249, 'feature_fraction': 0.4820998534848437, 'bagging_fraction': 0.8923822860230913, 'bagging_freq': 6, 'min_child_samples': 98}. Best is trial 76 with value: 0.9848032057376603.


Trial 422: MCC = 0.9846673505280003


[I 2024-08-29 20:42:04,235] Trial 423 finished with value: 0.9846351707456727 and parameters: {'lambda_l1': 3.57404110492987e-05, 'lambda_l2': 0.02386188174191729, 'num_leaves': 255, 'feature_fraction': 0.4791567678523664, 'bagging_fraction': 0.9154409941491716, 'bagging_freq': 4, 'min_child_samples': 74}. Best is trial 76 with value: 0.9848032057376603.


Trial 423: MCC = 0.9846351707456727


[I 2024-08-29 20:42:08,702] Trial 424 finished with value: 0.983533555760829 and parameters: {'lambda_l1': 0.0004583999780730102, 'lambda_l2': 4.094409483825382e-06, 'num_leaves': 104, 'feature_fraction': 0.9740662832823652, 'bagging_fraction': 0.7023710647098313, 'bagging_freq': 7, 'min_child_samples': 68}. Best is trial 76 with value: 0.9848032057376603.


Trial 424: MCC = 0.983533555760829


[I 2024-08-29 20:42:13,667] Trial 425 finished with value: 0.9845993390490849 and parameters: {'lambda_l1': 0.011305544692984915, 'lambda_l2': 1.400007777909831, 'num_leaves': 207, 'feature_fraction': 0.49057802324705124, 'bagging_fraction': 0.8965467032129377, 'bagging_freq': 1, 'min_child_samples': 89}. Best is trial 76 with value: 0.9848032057376603.


Trial 425: MCC = 0.9845993390490849


[I 2024-08-29 20:42:19,812] Trial 426 finished with value: 0.9845794424774131 and parameters: {'lambda_l1': 0.05373884164767766, 'lambda_l2': 3.2496727934227186e-08, 'num_leaves': 246, 'feature_fraction': 0.5000641987529559, 'bagging_fraction': 0.9770115013613652, 'bagging_freq': 4, 'min_child_samples': 60}. Best is trial 76 with value: 0.9848032057376603.


Trial 426: MCC = 0.9845794424774131


[I 2024-08-29 20:42:25,845] Trial 427 finished with value: 0.9845959036839401 and parameters: {'lambda_l1': 2.677294500729679e-05, 'lambda_l2': 0.06443972373189961, 'num_leaves': 251, 'feature_fraction': 0.4990536171424929, 'bagging_fraction': 0.9295645832313297, 'bagging_freq': 3, 'min_child_samples': 94}. Best is trial 76 with value: 0.9848032057376603.


Trial 427: MCC = 0.9845959036839401


[I 2024-08-29 20:42:31,406] Trial 428 finished with value: 0.98451859150613 and parameters: {'lambda_l1': 1.9665354822235614e-07, 'lambda_l2': 0.0045547180388873405, 'num_leaves': 186, 'feature_fraction': 0.4521002515999228, 'bagging_fraction': 0.9830090342640062, 'bagging_freq': 5, 'min_child_samples': 72}. Best is trial 76 with value: 0.9848032057376603.


Trial 428: MCC = 0.98451859150613


[I 2024-08-29 20:42:36,947] Trial 429 finished with value: 0.9845802586427215 and parameters: {'lambda_l1': 7.783114515977163e-07, 'lambda_l2': 0.011122011962951156, 'num_leaves': 247, 'feature_fraction': 0.41890775879577963, 'bagging_fraction': 0.9021092216752971, 'bagging_freq': 7, 'min_child_samples': 81}. Best is trial 76 with value: 0.9848032057376603.


Trial 429: MCC = 0.9845802586427215


[I 2024-08-29 20:42:41,789] Trial 430 finished with value: 0.9845411765506598 and parameters: {'lambda_l1': 8.72194700915703e-06, 'lambda_l2': 0.0009953073262520876, 'num_leaves': 201, 'feature_fraction': 0.44275430640810826, 'bagging_fraction': 0.9849881320296976, 'bagging_freq': 1, 'min_child_samples': 99}. Best is trial 76 with value: 0.9848032057376603.


Trial 430: MCC = 0.9845411765506598


[I 2024-08-29 20:42:46,620] Trial 431 finished with value: 0.9845088266070618 and parameters: {'lambda_l1': 4.258270708369216e-08, 'lambda_l2': 1.4112716777000087, 'num_leaves': 254, 'feature_fraction': 0.42683810191463156, 'bagging_fraction': 0.6375420044784392, 'bagging_freq': 5, 'min_child_samples': 87}. Best is trial 76 with value: 0.9848032057376603.


Trial 431: MCC = 0.9845088266070618


[I 2024-08-29 20:42:50,451] Trial 432 finished with value: 0.9843791713403274 and parameters: {'lambda_l1': 2.3613084811205465e-05, 'lambda_l2': 2.0617828385348492e-08, 'num_leaves': 161, 'feature_fraction': 0.4882328442500705, 'bagging_fraction': 0.41860173428317343, 'bagging_freq': 7, 'min_child_samples': 36}. Best is trial 76 with value: 0.9848032057376603.


Trial 432: MCC = 0.9843791713403274


[I 2024-08-29 20:42:56,335] Trial 433 finished with value: 0.9845217751132633 and parameters: {'lambda_l1': 2.731342010529036e-05, 'lambda_l2': 6.32854208386698, 'num_leaves': 252, 'feature_fraction': 0.5095573186662997, 'bagging_fraction': 0.8626768708082863, 'bagging_freq': 4, 'min_child_samples': 93}. Best is trial 76 with value: 0.9848032057376603.


Trial 433: MCC = 0.9845217751132633


[I 2024-08-29 20:43:02,123] Trial 434 finished with value: 0.9846157265768746 and parameters: {'lambda_l1': 1.0029363447046485e-08, 'lambda_l2': 8.147715955678269e-07, 'num_leaves': 249, 'feature_fraction': 0.46723188848423053, 'bagging_fraction': 0.931412751718271, 'bagging_freq': 5, 'min_child_samples': 98}. Best is trial 76 with value: 0.9848032057376603.


Trial 434: MCC = 0.9846157265768746


[I 2024-08-29 20:43:08,092] Trial 435 finished with value: 0.9845833768618993 and parameters: {'lambda_l1': 1.3866564158800301e-06, 'lambda_l2': 0.0860441826851685, 'num_leaves': 254, 'feature_fraction': 0.5004752410680925, 'bagging_fraction': 0.8536816869074577, 'bagging_freq': 2, 'min_child_samples': 91}. Best is trial 76 with value: 0.9848032057376603.


Trial 435: MCC = 0.9845833768618993


[I 2024-08-29 20:43:14,796] Trial 436 finished with value: 0.9844792628460183 and parameters: {'lambda_l1': 0.4594860316648124, 'lambda_l2': 0.0022470608853578347, 'num_leaves': 229, 'feature_fraction': 0.6758332864663675, 'bagging_fraction': 0.9920961102240515, 'bagging_freq': 6, 'min_child_samples': 94}. Best is trial 76 with value: 0.9848032057376603.


Trial 436: MCC = 0.9844792628460183


[I 2024-08-29 20:43:20,011] Trial 437 finished with value: 0.9846156411396481 and parameters: {'lambda_l1': 1.2029360845178124e-08, 'lambda_l2': 0.0001702252834945539, 'num_leaves': 234, 'feature_fraction': 0.47566532451835714, 'bagging_fraction': 0.7773987911304873, 'bagging_freq': 4, 'min_child_samples': 71}. Best is trial 76 with value: 0.9848032057376603.


Trial 437: MCC = 0.9846156411396481


[I 2024-08-29 20:43:25,424] Trial 438 finished with value: 0.9846055225831084 and parameters: {'lambda_l1': 7.160535705238856e-08, 'lambda_l2': 0.6748997233401334, 'num_leaves': 247, 'feature_fraction': 0.5936641734363114, 'bagging_fraction': 0.9276118284863495, 'bagging_freq': 1, 'min_child_samples': 91}. Best is trial 76 with value: 0.9848032057376603.


Trial 438: MCC = 0.9846055225831084


[I 2024-08-29 20:43:31,132] Trial 439 finished with value: 0.9845445099435982 and parameters: {'lambda_l1': 2.073874781938714e-07, 'lambda_l2': 0.020001397104344187, 'num_leaves': 205, 'feature_fraction': 0.4666263153404805, 'bagging_fraction': 0.9874266789970544, 'bagging_freq': 7, 'min_child_samples': 100}. Best is trial 76 with value: 0.9848032057376603.


Trial 439: MCC = 0.9845445099435982
{'lambda_l1': 1.3323938321586053e-07, 'lambda_l2': 0.07774032281247541, 'num_leaves': 248, 'feature_fraction': 0.41087472855265783, 'bagging_fraction': 0.9928696340850532, 'bagging_freq': 4, 'min_child_samples': 97}


In [None]:
#  0.9849877748419223 
xgb_parameters={'n_estimators': 749, 'max_depth': 19, 'learning_rate': 0.08515243216339415, 'subsample': 0.8862800339551276, 'colsample_bytree': 0.6382531671740559, 'gamma': 2.1225041131747255e-07, 'lambda': 0.00018340832131612592, 'alpha': 7.642079671849836e-05, 'scale_pos_weight': 1.0077127113949682, 'eta': 0.008801461652478504, 'min_child_weight': 50, 'grow_policy': 'depthwise', 'max_leaves': 77,
               'device': 'cuda'
               }
# # 0.9848549793202802
# cat_parameters={'iterations': 1430, 'learning_rate': 0.08395155143710026, 'depth': 10, 'l2_leaf_reg': 1.7957745197789983, 'bootstrap_type': 'Bayesian', 'random_strength': 0.0010608689725419575, 'bagging_temperature': 0.40781319449377557, 'od_type': 'IncToDec', 'od_wait': 45,
#                 "task_type": 'GPU',
#                 "cat_features": cat_feats
#                }
# 0.9849163311592771
cat_parameters={'iterations': 2817, 'learning_rate': 0.07430964327633417, 'depth': 9, 'l2_leaf_reg': 4.105660971421866, 'bootstrap_type': 'Bayesian', 'random_strength': 4.279092407465261e-08, 'bagging_temperature': 0.3254799202354527, 'od_type': 'IncToDec', 'od_wait': 29,
                "task_type": 'GPU',
                "cat_features": cat_feats
               }
# 0.9848032057376603
lgb_parameters={'lambda_l1': 1.3323938321586053e-07, 'lambda_l2': 0.07774032281247541, 'num_leaves': 248, 'feature_fraction': 0.41087472855265783, 'bagging_fraction': 0.9928696340850532, 'bagging_freq': 4, 'min_child_samples': 97
               } 

In [None]:
xgb_parameters = {
    'n_estimators': 2407,
    'eta': 0.009462133032592785,
    'gamma': 0.2865859948765318,
    'max_depth': 31,
    'min_child_weight': 47,
    'subsample': 0.6956431754146083,
    'colsample_bytree': 0.3670732604094118,
    'grow_policy': 'lossguide',
    'max_leaves': 73,
    'enable_categorical': True,
    'n_jobs': -1,
    'device': 'cuda',
    'tree_method': 'hist'
} # 0.9844272567086021

cat_parameters = {
    'iterations': 1041,
    'learning_rate': 0.08777255350163136,
    'depth': 10,
    'l2_leaf_reg': 0.1259643500248322,
    'bootstrap_type': 'Bayesian',
    'random_strength': 4.276181166674371e-08,
    'bagging_temperature': 0.35995482350907326,
    'od_type': 'Iter',
    'od_wait': 39,
    "verbose": False,
    "allow_writing_files": False,
    "task_type": 'GPU',
    "cat_features": cat_feats
} # 0.9841773055825763

lgb_parameters = {
    'n_estimators': 2500,
    'random_state':42,
    'max_bin':1024,
    'colsample_bytree':0.6,
    'reg_lambda': 80,
#     'device': 'gpu',
    'verbosity': -1
}

In [None]:
def model_trainer(model, X, y, n_splits=5, random_state=101):
    skfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    oof_probs, oof_mccs = [], []
    print("="*80)
    print(f"Training {model.__class__.__name__}")
    print("="*80, end="\n")
    for fold, (train_idx, test_idx) in enumerate(skfold.split(X, y)):
        X_train, y_train = X.iloc[train_idx, :], y[train_idx]
        X_test, y_test = X.iloc[test_idx, :], y[test_idx]
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        mcc = matthews_corrcoef(y_pred, y_test)
        oof_mccs.append(mcc)
        oof_probs.append(model.predict_proba(test_data))
        print(f"--- Fold {fold+1} MCC Score: {mcc:.6f}")
    print(f"\n---> Mean MCC Score: {np.mean(oof_mccs):.6f} \xb1 {np.std(oof_mccs):.6f}\n\n")
    return oof_probs, oof_mccs

In [None]:
oof_probs = {}
oof_probs['xgb'], _ = model_trainer(XGBClassifier(**xgb_parameters), X, y, random_state=101)
oof_probs['cat'], _ = model_trainer(CatBoostClassifier(**cat_parameters), X, y, random_state=101)
oof_probs['lgb'], _ = model_trainer(LGBMClassifier(**lgb_parameters), X, y, random_state=101)

In [None]:
oof_preds = {}
for model in oof_probs.keys():
    oof_preds[model] = np.argmax(np.mean(oof_probs[model], axis=0), axis=1)

In [None]:
sub = pd.read_csv("./kaggle/playground-series-s4e8/sample_submission.csv")
preds = [pred for model, pred in oof_preds.items()]
md = mode(preds, axis=0)[0] if len(preds)>1 else preds[0]
sub['class'] = label_encoder.inverse_transform(md)
sub.to_csv('./kaggle/playground-series-s4e8/submission_xgb_cat_lgbm_0830_2.csv', index=False)