In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
!pip install rdkit
!pip install duckdb

Collecting rdkit
  Downloading rdkit-2023.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Downloading rdkit-2023.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.9/34.9 MB[0m [31m49.3 MB/s[0m eta [36m0:00:00[0m
[0mInstalling collected packages: rdkit
Successfully installed rdkit-2023.9.6
Collecting duckdb
  Downloading duckdb-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (762 bytes)
Downloading duckdb-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.5/18.5 MB[0m [31m70.2 MB/s[0m eta [36m0:00:00[0m
[0mInstalling collected packages: duckdb
Successfully installed duckdb-1.0.0


In [3]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors, MACCSkeys, rdMolDescriptors
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import average_precision_score
import numpy as np
import lightgbm as lgb
import os
import optuna
import duckdb
from sklearn.metrics import roc_auc_score

In [4]:
train_path = '/kaggle/input/leash-BELKA/train.parquet'
test_path = '/kaggle/input/leash-BELKA/test.parquet'

In [5]:
con = duckdb.connect()

# 각 단백질에 대해 binds=0,1 데이터를 각각 12000개씩 불러오기
df_brd4_0 = con.query(f"""
    SELECT * 
    FROM parquet_scan('{train_path}') 
    WHERE binds = 0 AND protein_name = 'BRD4'
    ORDER BY random()
    LIMIT 10000
""").df()

df_brd4_1 = con.query(f"""
    SELECT * 
    FROM parquet_scan('{train_path}') 
    WHERE binds = 1 AND protein_name = 'BRD4'
    ORDER BY random()
    LIMIT 10000
""").df()

df_hsa_0 = con.query(f"""
    SELECT * 
    FROM parquet_scan('{train_path}') 
    WHERE binds = 0 AND protein_name = 'HSA'
    ORDER BY random()
    LIMIT 10000
""").df()

df_hsa_1 = con.query(f"""
    SELECT * 
    FROM parquet_scan('{train_path}') 
    WHERE binds = 1 AND protein_name = 'HSA'
    ORDER BY random()
    LIMIT 10000
""").df()

df_seh_0 = con.query(f"""
    SELECT * 
    FROM parquet_scan('{train_path}') 
    WHERE binds = 0 AND protein_name = 'sEH'
    ORDER BY random()
    LIMIT 10000
""").df()

df_seh_1 = con.query(f"""
    SELECT * 
    FROM parquet_scan('{train_path}') 
    WHERE binds = 1 AND protein_name = 'sEH'
    ORDER BY random()
    LIMIT 10000
""").df()

# 데이터프레임 결합
df = pd.concat([df_brd4_0, df_brd4_1, df_hsa_0, df_hsa_1, df_seh_0, df_seh_1], axis=0).reset_index(drop=True)
con.close()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [6]:
# ECFP 생성 함수
def generate_ecfp(molecule, radius=2, bits=2048):
    if molecule is None:
        return None
    return list(AllChem.GetMorganFingerprintAsBitVect(molecule, radius, nBits=bits))

# MACCS keys 생성 함수
def generate_maccs_keys(molecule):
    if molecule is None:
        return None
    return list(MACCSkeys.GenMACCSKeys(molecule))

# Atom pairs fingerprints 생성 함수
def generate_atom_pairs(molecule):
    if molecule is None:
        return None
    fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(molecule, nBits=2048)
    return list(fp)

# Topological fingerprints 생성 함수
def generate_topological_torsion(molecule):
    if molecule is None:
        return None
    fp = rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(molecule, nBits=2048)
    return list(fp)

# PubChem fingerprints 생성 함수
def generate_pubchem_fingerprints(molecule):
    if molecule is None:
        return None
    return list(AllChem.GetMorganFingerprintAsBitVect(molecule, radius=2, nBits=881))  # PubChem-like fingerprints

# 분자의 물리화학적 특성을 추출하는 함수
def generate_physchem_features(molecule):
    if molecule is None:
        return [np.nan] * 8
    return [
        Descriptors.MolWt(molecule),                   # 분자량
        Descriptors.MolLogP(molecule),                 # 로그 P
        Descriptors.NumHDonors(molecule),              # 수소 결합 공여자 수
        Descriptors.NumHAcceptors(molecule),           # 수소 결합 수용체 수
        Descriptors.TPSA(molecule),                    # TPSA (극성 표면적)
        Descriptors.NumRotatableBonds(molecule),       # 회전 가능한 결합 수
        Descriptors.FpDensityMorgan1(molecule),        # 분자 복잡도 지표 예시
        Descriptors.FpDensityMorgan2(molecule)         # 분자 복잡도 지표 예시
    ]

In [7]:
# DataFrame에 새로운 특징 추가
df['molecule'] = df['molecule_smiles'].apply(Chem.MolFromSmiles)
df['physchem'] = df['molecule'].apply(generate_physchem_features)
df['ecfp'] = df['molecule'].apply(generate_ecfp)
df['maccs'] = df['molecule'].apply(generate_maccs_keys)
df['atom_pairs'] = df['molecule'].apply(generate_atom_pairs)
df['topological'] = df['molecule'].apply(generate_topological_torsion)
df['pubchem'] = df['molecule'].apply(generate_pubchem_fingerprints)

In [8]:
# 물리화학적 특성을 정규화
physchem_features = np.array(df['physchem'].tolist())
scaler = StandardScaler()
physchem_features_scaled = scaler.fit_transform(physchem_features)

In [9]:
# 모든 특징 결합
df['features'] = [ecfp + maccs + atom_pairs + topological + pubchem + physchem_scaled.tolist() 
                  for ecfp, maccs, atom_pairs, topological, pubchem, physchem_scaled 
                  in zip(df['ecfp'], df['maccs'], df['atom_pairs'], df['topological'], df['pubchem'], physchem_features_scaled)]

In [10]:
# 단백질 이름을 원-핫 인코딩
onehot_encoder = OneHotEncoder(sparse_output=False)
protein_onehot = onehot_encoder.fit_transform(df['protein_name'].values.reshape(-1, 1))

In [11]:
# 최종 입력 데이터 생성
X = np.array([features + list(protein) for features, protein in zip(df['features'].tolist(), protein_onehot.tolist())])
y = np.array(df['binds'].tolist())

In [12]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Optuna를 사용한 하이퍼파라미터 최적화
def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 6, 12),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_uniform('subsample', 0.6, 0.9),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.6, 0.9),
        'num_leaves': trial.suggest_int('num_leaves', 31, 255)
    }
    
    model = lgb.LGBMClassifier(**param)
    
    # fit 메서드에서 early_stopping_rounds와 eval_metric을 명시적으로 지정
    model.fit(X_train, y_train, 
              eval_set=[(X_test, y_test)], 
              eval_metric='auc')
    
    preds = model.predict_proba(X_test)[:, 1]
    auc = roc_auc_score(y_test, preds)
    return auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# 최적의 파라미터 출력
print("Best parameters found: ", study.best_params)

[I 2024-06-04 08:15:53,310] A new study created in memory with name: no-name-d53a678f-7205-43c1-8822-b228de27842c


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.430203 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:16:31,847] Trial 0 finished with value: 0.9654168902592102 and parameters: {'n_estimators': 124, 'max_depth': 12, 'learning_rate': 0.06760151140781205, 'subsample': 0.6192358022374235, 'colsample_bytree': 0.7188313674688657, 'num_leaves': 145}. Best is trial 0 with value: 0.9654168902592102.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.513802 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:17:54,993] Trial 1 finished with value: 0.9671706280778787 and parameters: {'n_estimators': 208, 'max_depth': 11, 'learning_rate': 0.13939368309989825, 'subsample': 0.7757648758993182, 'colsample_bytree': 0.857570532697177, 'num_leaves': 230}. Best is trial 1 with value: 0.9671706280778787.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.598654 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:19:06,396] Trial 2 finished with value: 0.9661879459679852 and parameters: {'n_estimators': 183, 'max_depth': 10, 'learning_rate': 0.18843429805727616, 'subsample': 0.6735227452066026, 'colsample_bytree': 0.8183947478662785, 'num_leaves': 95}. Best is trial 1 with value: 0.9671706280778787.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.609074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:21:25,650] Trial 3 finished with value: 0.9671148184900804 and parameters: {'n_estimators': 480, 'max_depth': 10, 'learning_rate': 0.23154547457884464, 'subsample': 0.7977136710693314, 'colsample_bytree': 0.7013738087836972, 'num_leaves': 105}. Best is trial 1 with value: 0.9671706280778787.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.619233 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:23:34,350] Trial 4 finished with value: 0.9417346236598928 and parameters: {'n_estimators': 482, 'max_depth': 6, 'learning_rate': 0.012700417140887666, 'subsample': 0.8172472975380367, 'colsample_bytree': 0.7865157877505795, 'num_leaves': 69}. Best is trial 1 with value: 0.9671706280778787.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.499087 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:25:38,861] Trial 5 finished with value: 0.9688088019914994 and parameters: {'n_estimators': 366, 'max_depth': 11, 'learning_rate': 0.0706541225235527, 'subsample': 0.8586887710030575, 'colsample_bytree': 0.8733175223350311, 'num_leaves': 178}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.565654 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:27:39,649] Trial 6 finished with value: 0.9655964032290222 and parameters: {'n_estimators': 401, 'max_depth': 11, 'learning_rate': 0.0379426938712321, 'subsample': 0.6480834605085533, 'colsample_bytree': 0.6151401386454501, 'num_leaves': 42}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.603787 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:28:42,546] Trial 7 finished with value: 0.9662066695429854 and parameters: {'n_estimators': 131, 'max_depth': 11, 'learning_rate': 0.2118544360049875, 'subsample': 0.6738673228788139, 'colsample_bytree': 0.8493196275676919, 'num_leaves': 104}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.607961 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:30:50,973] Trial 8 finished with value: 0.967423313001031 and parameters: {'n_estimators': 474, 'max_depth': 8, 'learning_rate': 0.05394955542486584, 'subsample': 0.8170583792445112, 'colsample_bytree': 0.8470032303485444, 'num_leaves': 164}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.639003 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:31:55,039] Trial 9 finished with value: 0.9660796325867878 and parameters: {'n_estimators': 117, 'max_depth': 12, 'learning_rate': 0.10090313408852003, 'subsample': 0.7901041293147844, 'colsample_bytree': 0.809235627240455, 'num_leaves': 172}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.650105 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:33:41,185] Trial 10 finished with value: 0.9572236316296076 and parameters: {'n_estimators': 323, 'max_depth': 8, 'learning_rate': 0.024359802159206696, 'subsample': 0.8998753167042715, 'colsample_bytree': 0.6449841460218586, 'num_leaves': 233}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.525956 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:35:30,003] Trial 11 finished with value: 0.9653857769001588 and parameters: {'n_estimators': 369, 'max_depth': 8, 'learning_rate': 0.05255575738180449, 'subsample': 0.872217203507312, 'colsample_bytree': 0.8951738498941092, 'num_leaves': 180}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.643623 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:37:25,022] Trial 12 finished with value: 0.9597969842209988 and parameters: {'n_estimators': 413, 'max_depth': 7, 'learning_rate': 0.031417372319372, 'subsample': 0.8507266166277949, 'colsample_bytree': 0.8998407912297598, 'num_leaves': 195}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.802028 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:38:52,074] Trial 13 finished with value: 0.9658839517821829 and parameters: {'n_estimators': 250, 'max_depth': 9, 'learning_rate': 0.08379929654361994, 'subsample': 0.7282139096906294, 'colsample_bytree': 0.7638405383176832, 'num_leaves': 137}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.644189 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:40:51,408] Trial 14 finished with value: 0.956786405595582 and parameters: {'n_estimators': 321, 'max_depth': 9, 'learning_rate': 0.017656675795707306, 'subsample': 0.7414192371912661, 'colsample_bytree': 0.8524285224303355, 'num_leaves': 212}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.636636 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:42:53,039] Trial 15 finished with value: 0.9657058000218294 and parameters: {'n_estimators': 425, 'max_depth': 8, 'learning_rate': 0.04414640790819963, 'subsample': 0.835911783392816, 'colsample_bytree': 0.8263765094182458, 'num_leaves': 143}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.698607 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:44:54,179] Trial 16 finished with value: 0.9675014853156474 and parameters: {'n_estimators': 499, 'max_depth': 7, 'learning_rate': 0.11478487625367403, 'subsample': 0.8943083205526646, 'colsample_bytree': 0.7374501945003777, 'num_leaves': 163}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.799549 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:46:12,621] Trial 17 finished with value: 0.9641524655711928 and parameters: {'n_estimators': 274, 'max_depth': 6, 'learning_rate': 0.1180195999722745, 'subsample': 0.883456441232407, 'colsample_bytree': 0.7300606225514719, 'num_leaves': 203}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.642698 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:47:49,579] Trial 18 finished with value: 0.965405833904833 and parameters: {'n_estimators': 366, 'max_depth': 7, 'learning_rate': 0.2872607372803476, 'subsample': 0.8605085072042999, 'colsample_bytree': 0.6805211534238808, 'num_leaves': 119}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.629975 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:50:03,507] Trial 19 finished with value: 0.9678212306394691 and parameters: {'n_estimators': 451, 'max_depth': 10, 'learning_rate': 0.14508394762849205, 'subsample': 0.8991617398006627, 'colsample_bytree': 0.7624553631359656, 'num_leaves': 157}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.675084 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:52:15,303] Trial 20 finished with value: 0.9669272771624415 and parameters: {'n_estimators': 435, 'max_depth': 10, 'learning_rate': 0.1609597652573303, 'subsample': 0.8421819478138881, 'colsample_bytree': 0.7723708780793308, 'num_leaves': 249}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.626631 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:54:30,035] Trial 21 finished with value: 0.9686511794921627 and parameters: {'n_estimators': 450, 'max_depth': 10, 'learning_rate': 0.08111677188921138, 'subsample': 0.8902680443001352, 'colsample_bytree': 0.7478868907913466, 'num_leaves': 159}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.663325 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:56:26,589] Trial 22 finished with value: 0.9675726293446925 and parameters: {'n_estimators': 369, 'max_depth': 10, 'learning_rate': 0.07752291286714626, 'subsample': 0.8665984970261675, 'colsample_bytree': 0.6753106165532101, 'num_leaves': 191}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.606594 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 08:58:47,617] Trial 23 finished with value: 0.9685571171406023 and parameters: {'n_estimators': 450, 'max_depth': 11, 'learning_rate': 0.06999239164013568, 'subsample': 0.898400122580647, 'colsample_bytree': 0.7888174321077853, 'num_leaves': 129}. Best is trial 5 with value: 0.9688088019914994.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.619314 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:00:53,986] Trial 24 finished with value: 0.9692845863669205 and parameters: {'n_estimators': 388, 'max_depth': 11, 'learning_rate': 0.07389054616155394, 'subsample': 0.8226861634551236, 'colsample_bytree': 0.7922483375922138, 'num_leaves': 128}. Best is trial 24 with value: 0.9692845863669205.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.649650 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:02:57,724] Trial 25 finished with value: 0.9694776836515325 and parameters: {'n_estimators': 388, 'max_depth': 12, 'learning_rate': 0.09375976736629398, 'subsample': 0.7696611885432421, 'colsample_bytree': 0.7964500437233836, 'num_leaves': 85}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.869725 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:04:47,086] Trial 26 finished with value: 0.9687677990290353 and parameters: {'n_estimators': 340, 'max_depth': 12, 'learning_rate': 0.09870714543758112, 'subsample': 0.7700719836687551, 'colsample_bytree': 0.8777860980717441, 'num_leaves': 71}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.106751 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:06:58,386] Trial 27 finished with value: 0.9671767396305495 and parameters: {'n_estimators': 385, 'max_depth': 12, 'learning_rate': 0.032046176004752826, 'subsample': 0.7564189723009042, 'colsample_bytree': 0.8054812857303367, 'num_leaves': 79}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.716262 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:08:42,530] Trial 28 finished with value: 0.9666973161033107 and parameters: {'n_estimators': 345, 'max_depth': 11, 'learning_rate': 0.06400826568985847, 'subsample': 0.7232736127403376, 'colsample_bytree': 0.8266167760606449, 'num_leaves': 36}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.675459 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:10:23,713] Trial 29 finished with value: 0.9675492665456189 and parameters: {'n_estimators': 298, 'max_depth': 12, 'learning_rate': 0.06027821856644431, 'subsample': 0.8265827674997348, 'colsample_bytree': 0.8712111882830357, 'num_leaves': 54}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.651994 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:12:38,202] Trial 30 finished with value: 0.9686224829743949 and parameters: {'n_estimators': 396, 'max_depth': 12, 'learning_rate': 0.04259148238562871, 'subsample': 0.7947932916338759, 'colsample_bytree': 0.7097801322389479, 'num_leaves': 119}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.889616 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:14:30,170] Trial 31 finished with value: 0.9689616185880541 and parameters: {'n_estimators': 343, 'max_depth': 12, 'learning_rate': 0.10456305974916716, 'subsample': 0.7659572730866453, 'colsample_bytree': 0.8798724414591049, 'num_leaves': 76}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.705196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:16:09,361] Trial 32 finished with value: 0.9682600956808017 and parameters: {'n_estimators': 290, 'max_depth': 11, 'learning_rate': 0.09433575402603361, 'subsample': 0.7161418460469231, 'colsample_bytree': 0.8728053965691153, 'num_leaves': 82}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.841269 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:17:39,734] Trial 33 finished with value: 0.9678496771391734 and parameters: {'n_estimators': 248, 'max_depth': 12, 'learning_rate': 0.1300213506292353, 'subsample': 0.7016542477606595, 'colsample_bytree': 0.8353859620809403, 'num_leaves': 59}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.665480 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:19:28,636] Trial 34 finished with value: 0.9677992290498544 and parameters: {'n_estimators': 338, 'max_depth': 11, 'learning_rate': 0.1675925148636231, 'subsample': 0.7757957418317907, 'colsample_bytree': 0.7984016542196039, 'num_leaves': 91}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.804230 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:21:35,975] Trial 35 finished with value: 0.9693265060622852 and parameters: {'n_estimators': 373, 'max_depth': 12, 'learning_rate': 0.06645351765994187, 'subsample': 0.8071190500610402, 'colsample_bytree': 0.8898204064262194, 'num_leaves': 113}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.025493 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:23:46,737] Trial 36 finished with value: 0.9685873971061076 and parameters: {'n_estimators': 386, 'max_depth': 12, 'learning_rate': 0.051742141717469485, 'subsample': 0.8072703312801601, 'colsample_bytree': 0.8839166266617849, 'num_leaves': 106}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.703577 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:25:36,160] Trial 37 finished with value: 0.9691729394115391 and parameters: {'n_estimators': 318, 'max_depth': 12, 'learning_rate': 0.10550337194975704, 'subsample': 0.7563939695997296, 'colsample_bytree': 0.7759089339119402, 'num_leaves': 96}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.637477 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:27:00,187] Trial 38 finished with value: 0.9683212945468643 and parameters: {'n_estimators': 203, 'max_depth': 12, 'learning_rate': 0.08730792915808656, 'subsample': 0.7845127604492739, 'colsample_bytree': 0.7783866548016447, 'num_leaves': 120}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.714284 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:28:45,123] Trial 39 finished with value: 0.968566201130254 and parameters: {'n_estimators': 314, 'max_depth': 11, 'learning_rate': 0.12763690743600314, 'subsample': 0.741689339603594, 'colsample_bytree': 0.7518733584687565, 'num_leaves': 96}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.688946 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:30:21,323] Trial 40 finished with value: 0.9667273182709673 and parameters: {'n_estimators': 269, 'max_depth': 11, 'learning_rate': 0.2228361815476312, 'subsample': 0.6140105297895754, 'colsample_bytree': 0.789441657367728, 'num_leaves': 108}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.657236 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:32:15,666] Trial 41 finished with value: 0.968935255572215 and parameters: {'n_estimators': 351, 'max_depth': 12, 'learning_rate': 0.10246903275107191, 'subsample': 0.7617825509345665, 'colsample_bytree': 0.8416092317515649, 'num_leaves': 88}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.724519 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:34:19,520] Trial 42 finished with value: 0.9691879127155826 and parameters: {'n_estimators': 405, 'max_depth': 12, 'learning_rate': 0.0725467060698181, 'subsample': 0.8017425778532598, 'colsample_bytree': 0.8606406699805754, 'num_leaves': 63}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.692884 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:36:21,357] Trial 43 finished with value: 0.9688575277341566 and parameters: {'n_estimators': 406, 'max_depth': 12, 'learning_rate': 0.07185345516620838, 'subsample': 0.808118589460497, 'colsample_bytree': 0.8148675831199382, 'num_leaves': 62}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.870337 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:38:34,407] Trial 44 finished with value: 0.9684569154565861 and parameters: {'n_estimators': 422, 'max_depth': 11, 'learning_rate': 0.059453374157314554, 'subsample': 0.8227659066427232, 'colsample_bytree': 0.862137580307762, 'num_leaves': 98}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.828220 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:40:48,681] Trial 45 finished with value: 0.9688967805701739 and parameters: {'n_estimators': 386, 'max_depth': 12, 'learning_rate': 0.04466067633974953, 'subsample': 0.8032975589268113, 'colsample_bytree': 0.7985839194531634, 'num_leaves': 131}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.735311 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:42:40,854] Trial 46 finished with value: 0.9682097031510527 and parameters: {'n_estimators': 361, 'max_depth': 12, 'learning_rate': 0.06832692001135797, 'subsample': 0.7843152585070108, 'colsample_bytree': 0.8583318229543029, 'num_leaves': 54}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.676624 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:43:55,533] Trial 47 finished with value: 0.9629773528915242 and parameters: {'n_estimators': 155, 'max_depth': 11, 'learning_rate': 0.04936846205376167, 'subsample': 0.8349914069423949, 'colsample_bytree': 0.83295383932127, 'num_leaves': 114}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.696385 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:45:52,995] Trial 48 finished with value: 0.9681228635435576 and parameters: {'n_estimators': 435, 'max_depth': 12, 'learning_rate': 0.08718333314774078, 'subsample': 0.7049961927896942, 'colsample_bytree': 0.8165330088891426, 'num_leaves': 41}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.683062 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:48:33,748] Trial 49 finished with value: 0.9686525129218364 and parameters: {'n_estimators': 472, 'max_depth': 12, 'learning_rate': 0.03641820994058781, 'subsample': 0.7462088333614346, 'colsample_bytree': 0.8900114160054081, 'num_leaves': 146}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.715137 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:50:29,025] Trial 50 finished with value: 0.9513776537021467 and parameters: {'n_estimators': 319, 'max_depth': 9, 'learning_rate': 0.013789005191393682, 'subsample': 0.6794616354919351, 'colsample_bytree': 0.7249120954173816, 'num_leaves': 67}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.781420 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:52:28,427] Trial 51 finished with value: 0.9686112599413086 and parameters: {'n_estimators': 380, 'max_depth': 12, 'learning_rate': 0.1088290918184981, 'subsample': 0.7671159504225991, 'colsample_bytree': 0.8652559275315981, 'num_leaves': 77}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.787846 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:54:33,354] Trial 52 finished with value: 0.968526448258109 and parameters: {'n_estimators': 402, 'max_depth': 11, 'learning_rate': 0.14183738628128084, 'subsample': 0.7766474957843208, 'colsample_bytree': 0.8992663898404158, 'num_leaves': 85}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.963068 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:56:18,823] Trial 53 finished with value: 0.967439841973027 and parameters: {'n_estimators': 335, 'max_depth': 12, 'learning_rate': 0.07854344738596991, 'subsample': 0.813986348512011, 'colsample_bytree': 0.8844808136315274, 'num_leaves': 50}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.007420 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:58:13,541] Trial 54 finished with value: 0.9683452407214199 and parameters: {'n_estimators': 360, 'max_depth': 12, 'learning_rate': 0.1154739504884068, 'subsample': 0.7955539628726022, 'colsample_bytree': 0.848028240439512, 'num_leaves': 74}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.659625 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 09:59:57,205] Trial 55 finished with value: 0.9673520300730616 and parameters: {'n_estimators': 307, 'max_depth': 11, 'learning_rate': 0.18325362657718228, 'subsample': 0.7531470619209191, 'colsample_bytree': 0.7721009071792094, 'num_leaves': 98}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.813199 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:02:17,566] Trial 56 finished with value: 0.9692670851024543 and parameters: {'n_estimators': 418, 'max_depth': 12, 'learning_rate': 0.05730758657146249, 'subsample': 0.733095705067459, 'colsample_bytree': 0.7582836512985316, 'num_leaves': 128}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.849390 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:04:33,907] Trial 57 finished with value: 0.9684273299857025 and parameters: {'n_estimators': 417, 'max_depth': 11, 'learning_rate': 0.056703830170317625, 'subsample': 0.7392617643651155, 'colsample_bytree': 0.7434319882858792, 'num_leaves': 133}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.806991 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:07:17,610] Trial 58 finished with value: 0.9680465524745275 and parameters: {'n_estimators': 464, 'max_depth': 12, 'learning_rate': 0.025714903124423736, 'subsample': 0.6486996741959624, 'colsample_bytree': 0.7615229488809536, 'num_leaves': 150}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.767794 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:09:28,695] Trial 59 finished with value: 0.9685006408379672 and parameters: {'n_estimators': 437, 'max_depth': 10, 'learning_rate': 0.0893983430155324, 'subsample': 0.8437886079077452, 'colsample_bytree': 0.6920487541607121, 'num_leaves': 125}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.921998 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:11:34,799] Trial 60 finished with value: 0.9692591956435519 and parameters: {'n_estimators': 375, 'max_depth': 12, 'learning_rate': 0.07293237339893595, 'subsample': 0.7314710194571042, 'colsample_bytree': 0.7807302930267943, 'num_leaves': 109}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.929549 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:13:40,782] Trial 61 finished with value: 0.9691344366297132 and parameters: {'n_estimators': 375, 'max_depth': 12, 'learning_rate': 0.07456047460813937, 'subsample': 0.7307652680805764, 'colsample_bytree': 0.7804752542482064, 'num_leaves': 111}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 5.938142 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:17:06,248] Trial 62 finished with value: 0.9689486732083058 and parameters: {'n_estimators': 397, 'max_depth': 12, 'learning_rate': 0.0654877947997542, 'subsample': 0.7108327414101067, 'colsample_bytree': 0.6054217024805529, 'num_leaves': 114}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.702093 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:19:26,106] Trial 63 finished with value: 0.9690679318247409 and parameters: {'n_estimators': 407, 'max_depth': 12, 'learning_rate': 0.0499770631128373, 'subsample': 0.7350925195763963, 'colsample_bytree': 0.799874426239385, 'num_leaves': 140}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.796128 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:21:23,866] Trial 64 finished with value: 0.9685581727724273 and parameters: {'n_estimators': 356, 'max_depth': 11, 'learning_rate': 0.06092794301524915, 'subsample': 0.7518213893365314, 'colsample_bytree': 0.7602931177253127, 'num_leaves': 101}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.754266 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:23:43,508] Trial 65 finished with value: 0.9689048089280008 and parameters: {'n_estimators': 427, 'max_depth': 12, 'learning_rate': 0.079011307363364, 'subsample': 0.6840905096002117, 'colsample_bytree': 0.7327809504539544, 'num_leaves': 124}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.774970 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:25:54,973] Trial 66 finished with value: 0.9681139184528305 and parameters: {'n_estimators': 392, 'max_depth': 12, 'learning_rate': 0.04111797466481744, 'subsample': 0.6955936452244352, 'colsample_bytree': 0.7555368289624248, 'num_leaves': 88}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.695430 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:27:43,387] Trial 67 finished with value: 0.9681608385094712 and parameters: {'n_estimators': 330, 'max_depth': 10, 'learning_rate': 0.09476951798605135, 'subsample': 0.7199247572201856, 'colsample_bytree': 0.7875637502327777, 'num_leaves': 153}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.952714 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:29:45,867] Trial 68 finished with value: 0.9686046205727252 and parameters: {'n_estimators': 373, 'max_depth': 11, 'learning_rate': 0.05621789310043352, 'subsample': 0.7852137713442664, 'colsample_bytree': 0.7699242542522173, 'num_leaves': 105}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.788476 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:32:05,278] Trial 69 finished with value: 0.9692021081856498 and parameters: {'n_estimators': 417, 'max_depth': 12, 'learning_rate': 0.06753301636851763, 'subsample': 0.8215963005198595, 'colsample_bytree': 0.7161037402133053, 'num_leaves': 138}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.090765 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:34:40,279] Trial 70 finished with value: 0.9688398875707658 and parameters: {'n_estimators': 493, 'max_depth': 11, 'learning_rate': 0.04654406315759618, 'subsample': 0.824586510411893, 'colsample_bytree': 0.7044219654266686, 'num_leaves': 141}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.730639 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:37:04,872] Trial 71 finished with value: 0.9689078091447663 and parameters: {'n_estimators': 443, 'max_depth': 12, 'learning_rate': 0.06694727613835974, 'subsample': 0.8349235695120519, 'colsample_bytree': 0.6554340367412356, 'num_leaves': 134}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.724359 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:39:28,521] Trial 72 finished with value: 0.9686507072358199 and parameters: {'n_estimators': 424, 'max_depth': 12, 'learning_rate': 0.07356607409778192, 'subsample': 0.815700347270153, 'colsample_bytree': 0.745849549785072, 'num_leaves': 172}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.839683 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:41:56,522] Trial 73 finished with value: 0.9694391530899219 and parameters: {'n_estimators': 459, 'max_depth': 12, 'learning_rate': 0.08435715669863955, 'subsample': 0.8495584608218911, 'colsample_bytree': 0.7776756199363897, 'num_leaves': 120}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.800609 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:44:24,939] Trial 74 finished with value: 0.9694216796052404 and parameters: {'n_estimators': 459, 'max_depth': 12, 'learning_rate': 0.0842772243907033, 'subsample': 0.8480869237176974, 'colsample_bytree': 0.7232183350114268, 'num_leaves': 126}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.803718 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:46:55,518] Trial 75 finished with value: 0.9692174426268965 and parameters: {'n_estimators': 467, 'max_depth': 12, 'learning_rate': 0.08264476570907027, 'subsample': 0.8554808311400033, 'colsample_bytree': 0.7175910428187049, 'num_leaves': 127}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.037385 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:49:24,471] Trial 76 finished with value: 0.9688921691258859 and parameters: {'n_estimators': 463, 'max_depth': 12, 'learning_rate': 0.08634536443160754, 'subsample': 0.8602411910416241, 'colsample_bytree': 0.7389745875320164, 'num_leaves': 123}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.849286 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:51:50,598] Trial 77 finished with value: 0.9685612285487626 and parameters: {'n_estimators': 484, 'max_depth': 11, 'learning_rate': 0.09314974719073904, 'subsample': 0.8768709886044037, 'colsample_bytree': 0.6878394735485439, 'num_leaves': 116}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.091947 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:53:58,634] Trial 78 finished with value: 0.9680099387180724 and parameters: {'n_estimators': 456, 'max_depth': 9, 'learning_rate': 0.12181225908827287, 'subsample': 0.8542249466035091, 'colsample_bytree': 0.7234519767305172, 'num_leaves': 128}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.772148 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:56:36,192] Trial 79 finished with value: 0.9691690502416578 and parameters: {'n_estimators': 481, 'max_depth': 12, 'learning_rate': 0.08223905323264254, 'subsample': 0.8451313972573186, 'colsample_bytree': 0.7913651111698505, 'num_leaves': 149}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.821941 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 10:59:02,201] Trial 80 finished with value: 0.9691119627837556 and parameters: {'n_estimators': 444, 'max_depth': 12, 'learning_rate': 0.053946851688741033, 'subsample': 0.8735060621725602, 'colsample_bytree': 0.8084882354672257, 'num_leaves': 109}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.784589 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:01:33,626] Trial 81 finished with value: 0.9693698147468932 and parameters: {'n_estimators': 467, 'max_depth': 12, 'learning_rate': 0.0655722915852018, 'subsample': 0.8655577086984971, 'colsample_bytree': 0.7135782999323041, 'num_leaves': 138}. Best is trial 25 with value: 0.9694776836515325.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.782918 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:04:08,934] Trial 82 finished with value: 0.9695485776625138 and parameters: {'n_estimators': 490, 'max_depth': 12, 'learning_rate': 0.061746932579634754, 'subsample': 0.8640013668089224, 'colsample_bytree': 0.7126978158148188, 'num_leaves': 118}. Best is trial 82 with value: 0.9695485776625138.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.272474 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:06:45,106] Trial 83 finished with value: 0.9690860164646895 and parameters: {'n_estimators': 494, 'max_depth': 12, 'learning_rate': 0.0633062456801406, 'subsample': 0.8652809060095777, 'colsample_bytree': 0.6675712909217041, 'num_leaves': 119}. Best is trial 82 with value: 0.9695485776625138.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.948246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:09:15,697] Trial 84 finished with value: 0.9695956921776487 and parameters: {'n_estimators': 457, 'max_depth': 12, 'learning_rate': 0.058774306374244176, 'subsample': 0.8316939723462959, 'colsample_bytree': 0.6928207584965059, 'num_leaves': 135}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.787692 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:11:45,357] Trial 85 finished with value: 0.9688422210726948 and parameters: {'n_estimators': 455, 'max_depth': 11, 'learning_rate': 0.04012446250416031, 'subsample': 0.8805660549120597, 'colsample_bytree': 0.7036154197978044, 'num_leaves': 157}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.737035 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:14:27,049] Trial 86 finished with value: 0.9689665633897604 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.058415419047810335, 'subsample': 0.8501450359419777, 'colsample_bytree': 0.6946789231810343, 'num_leaves': 144}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.144958 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:17:05,853] Trial 87 finished with value: 0.9693699536458175 and parameters: {'n_estimators': 477, 'max_depth': 12, 'learning_rate': 0.05154152173821858, 'subsample': 0.8893230142579845, 'colsample_bytree': 0.7134874882362763, 'num_leaves': 163}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.901965 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:19:06,051] Trial 88 finished with value: 0.9599625517388075 and parameters: {'n_estimators': 486, 'max_depth': 6, 'learning_rate': 0.03684410729551008, 'subsample': 0.8873770837127776, 'colsample_bytree': 0.6737795575219042, 'num_leaves': 162}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.809360 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:21:45,545] Trial 89 finished with value: 0.9692004136187728 and parameters: {'n_estimators': 470, 'max_depth': 12, 'learning_rate': 0.04589489116515632, 'subsample': 0.8696803425467324, 'colsample_bytree': 0.6970745513191057, 'num_leaves': 179}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.031936 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:23:48,427] Trial 90 finished with value: 0.9651586493790844 and parameters: {'n_estimators': 474, 'max_depth': 7, 'learning_rate': 0.050584657650859226, 'subsample': 0.8367391415363683, 'colsample_bytree': 0.7126394635848415, 'num_leaves': 169}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.166621 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:26:19,046] Trial 91 finished with value: 0.9690037883014826 and parameters: {'n_estimators': 458, 'max_depth': 12, 'learning_rate': 0.06260125461812371, 'subsample': 0.8310442113166199, 'colsample_bytree': 0.7271899814983849, 'num_leaves': 136}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.116673 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:28:46,067] Trial 92 finished with value: 0.9694569321522369 and parameters: {'n_estimators': 447, 'max_depth': 12, 'learning_rate': 0.05633725995417331, 'subsample': 0.8648412745318687, 'colsample_bytree': 0.7090791032877366, 'num_leaves': 130}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.776537 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:31:12,748] Trial 93 finished with value: 0.9693575916415516 and parameters: {'n_estimators': 432, 'max_depth': 12, 'learning_rate': 0.047026196338002604, 'subsample': 0.8650710755849303, 'colsample_bytree': 0.6871332419617965, 'num_leaves': 153}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.806076 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:33:52,415] Trial 94 finished with value: 0.9693299785353937 and parameters: {'n_estimators': 445, 'max_depth': 12, 'learning_rate': 0.04715278560220237, 'subsample': 0.8922856104346893, 'colsample_bytree': 0.7082237769899492, 'num_leaves': 152}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.785570 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:36:22,803] Trial 95 finished with value: 0.9690860720242592 and parameters: {'n_estimators': 442, 'max_depth': 12, 'learning_rate': 0.04770772320438627, 'subsample': 0.8922420082624435, 'colsample_bytree': 0.6856713816687323, 'num_leaves': 165}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.841553 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:39:07,418] Trial 96 finished with value: 0.9684365250944936 and parameters: {'n_estimators': 480, 'max_depth': 12, 'learning_rate': 0.034744590203878906, 'subsample': 0.8637532216060524, 'colsample_bytree': 0.7025540977341945, 'num_leaves': 184}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.177973 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:41:37,538] Trial 97 finished with value: 0.9693888161197424 and parameters: {'n_estimators': 448, 'max_depth': 12, 'learning_rate': 0.05301226963486094, 'subsample': 0.885530179453681, 'colsample_bytree': 0.677978171968596, 'num_leaves': 151}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.176549 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:44:04,027] Trial 98 finished with value: 0.9692927536236717 and parameters: {'n_estimators': 432, 'max_depth': 12, 'learning_rate': 0.05391885288129427, 'subsample': 0.8843689322624833, 'colsample_bytree': 0.6571064954134519, 'num_leaves': 147}. Best is trial 84 with value: 0.9695956921776487.


[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.901850 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


[I 2024-06-04 11:46:21,334] Trial 99 finished with value: 0.9637249902416563 and parameters: {'n_estimators': 489, 'max_depth': 8, 'learning_rate': 0.02993988003169846, 'subsample': 0.8789079542316851, 'colsample_bytree': 0.6786682211247747, 'num_leaves': 171}. Best is trial 84 with value: 0.9695956921776487.


Best parameters found:  {'n_estimators': 457, 'max_depth': 12, 'learning_rate': 0.058774306374244176, 'subsample': 0.8316939723462959, 'colsample_bytree': 0.6928207584965059, 'num_leaves': 135}


In [13]:
# 최적의 파라미터로 모델 학습
best_params = study.best_params
best_model = lgb.LGBMClassifier(**best_params)
best_model.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 23949, number of negative: 24051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.827901 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12597
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 5649
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498937 -> initscore=-0.004250
[LightGBM] [Info] Start training from score -0.004250


In [14]:
# 테스트 데이터 예측 및 평가
y_pred = best_model.predict_proba(X_test)[:, 1]
auc_score = roc_auc_score(y_test, y_pred)
print(f'Test AUC Score: {auc_score:.4f}')

Test AUC Score: 0.9696


In [15]:
# 테스트 데이터 예측 및 저장
test_file = '/kaggle/input/leash-BELKA/test.csv'  
output_file = '/kaggle/working/submission13.csv'  # 출력 파일 경로

In [16]:
for df_test in pd.read_csv(test_file, chunksize=20000):
    df_test['molecule'] = df_test['molecule_smiles'].apply(Chem.MolFromSmiles)
    df_test['ecfp'] = df_test['molecule'].apply(generate_ecfp)
    df_test['maccs'] = df_test['molecule'].apply(generate_maccs_keys)
    df_test['atom_pairs'] = df_test['molecule'].apply(generate_atom_pairs)
    df_test['topological'] = df_test['molecule'].apply(generate_topological_torsion)
    df_test['pubchem'] = df_test['molecule'].apply(generate_pubchem_fingerprints)
    df_test['physchem'] = df_test['molecule'].apply(generate_physchem_features)
    
    # 테스트 데이터의 물리화학적 특성 정규화
    physchem_features_test = np.array(df_test['physchem'].tolist())
    physchem_features_test_scaled = scaler.transform(physchem_features_test)
    
    df_test['features'] = [ecfp + maccs + atom_pairs + topological + pubchem + physchem_scaled.tolist() 
                           for ecfp, maccs, atom_pairs, topological, pubchem, physchem_scaled 
                           in zip(df_test['ecfp'], df_test['maccs'], df_test['atom_pairs'], df_test['topological'], df_test['pubchem'], physchem_features_test_scaled)]
    
    protein_onehot = onehot_encoder.transform(df_test['protein_name'].values.reshape(-1, 1))
    X_test = np.array([features + list(protein) for features, protein in zip(df_test['features'].tolist(), protein_onehot.tolist())])
    
    probabilities = best_model.predict_proba(X_test)[:, 1]
    output_df = pd.DataFrame({'id': df_test['id'], 'binds': probabilities})
    output_df.to_csv(output_file, index=False, mode='a', header=not os.path.exists(output_file))