# models for opcode frequency

## Import Libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os
import time
import json
import pandas as pd
from pathlib import Path
import joblib

import optuna
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.multiclass import OneVsRestClassifier


## Const

In [3]:
PATH = Path.cwd().parents[1]
DATA_PATH = os.path.join(PATH, 'data/processed')
MODEL_PATH = os.path.join(PATH, 'models')

In [4]:
with open(os.path.join(DATA_PATH, 'feature-opcode-freq_list.json')) as f:
    feature_list = json.load(f)

with open(os.path.join(DATA_PATH, 'labels-opcode-freq.json')) as f:
    labels = json.load(f)

In [5]:
train_df = pd.read_csv(os.path.join(DATA_PATH, 'train-opcode-freq.csv'))
test_df = pd.read_csv(os.path.join(DATA_PATH, 'test-opcode-freq.csv'))

X_train = train_df[feature_list]
y_train = train_df[labels]

X_test = test_df[feature_list]
y_test = test_df[labels]

In [6]:
X_train.head()

Unnamed: 0,PUSH1,MSTORE,CALLDATASIZE,LT,PUSH2,JUMPI,CALLDATALOAD,PUSH29,SWAP1,DIV,...,UNKNOWN_0xc6,UNKNOWN_0xe1,INVALID_0x70,PUSH30,DUP16,UNKNOWN_0x2b,UNKNOWN_0xd8,INVALID_0x7a,UNKNOWN_0xf9,INVALID_0x7f
0,1,0,2,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,16,2,2,0,0,2,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,40,5,5,2,32,10,2,0,7,0,...,0,0,0,0,1,1,0,0,0,0
3,53,7,5,2,43,9,2,0,8,0,...,0,0,0,0,0,0,0,0,0,0
4,178,32,7,7,134,41,6,0,66,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
y_train.head()

Unnamed: 0,mint,leak,limit
0,0,0,0
1,0,0,0
2,0,0,0
3,0,0,0
4,0,0,0


## Traditional Machine Learning Models

In [8]:
models = {
    "Logistic Regression": OneVsRestClassifier(LogisticRegression(max_iter=1000)),
    "Random Forest": OneVsRestClassifier(RandomForestClassifier()),
    "Gradient Boosting": OneVsRestClassifier(GradientBoostingClassifier()),
    "AdaBoost": OneVsRestClassifier(AdaBoostClassifier()),
    "SVM (Linear)": OneVsRestClassifier(SVC(kernel="linear")),
    "KNN": OneVsRestClassifier(KNeighborsClassifier()),
    "Naive Bayes": OneVsRestClassifier(GaussianNB()),
    "MLP Classifier": OneVsRestClassifier(MLPClassifier(max_iter=300)),
    "XGBoost": OneVsRestClassifier(XGBClassifier(use_label_encoder=False, eval_metric='logloss')),
    "LightGBM": OneVsRestClassifier(LGBMClassifier()),
    "DecisionTree": OneVsRestClassifier(DecisionTreeClassifier())
}

In [9]:
results = []

for name, model in models.items():
    start = time.time()
    model.fit(X_train, y_train)
    end = time.time()

    y_pred = model.predict(X_test)

    results.append({
        "Classifier": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average="macro"),  # change to 'macro' if multi-class
        "Recall": recall_score(y_test, y_pred, average="macro"),
        "F1-Score": f1_score(y_test, y_pred, average="macro"),
        "Training Time": round(end - start, 3)
    })

[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000508 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1889
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 94
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000514 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1889
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 94
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of 

### Result

In [10]:
df = pd.DataFrame(results)
df.sort_values(by="F1-Score", ascending=False, inplace=True)
df

Unnamed: 0,Classifier,Accuracy,Precision,Recall,F1-Score,Training Time
9,LightGBM,0.428571,0.888889,0.388889,0.484127,0.126
5,KNN,0.285714,0.571429,0.407407,0.475,0.02
8,XGBoost,0.428571,0.805556,0.388889,0.471306,0.947
0,Logistic Regression,0.285714,0.527778,0.351852,0.422222,0.692
7,MLP Classifier,0.142857,0.555556,0.37037,0.39923,0.486
10,DecisionTree,0.285714,0.654762,0.351852,0.39881,0.032
3,AdaBoost,0.285714,0.477778,0.333333,0.387302,0.503
2,Gradient Boosting,0.285714,0.694444,0.277778,0.360195,1.139
1,Random Forest,0.357143,0.833333,0.240741,0.349784,0.517
4,SVM (Linear),0.285714,0.416667,0.277778,0.318627,0.163


### Tuning

In [11]:
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 10, 500),
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 0.1, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "num_leaves": trial.suggest_int("num_leaves", 3, 255),
        "min_child_samples": trial.suggest_int("min_child_samples", 3, 100),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
        "random_state": 42,
        "n_jobs": -1
    }

    model = OneVsRestClassifier(LGBMClassifier(**params))

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return f1_score(y_test, y_pred, average="macro", zero_division=0)

In [12]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("Tuned LGBMClassifier (MultiOutput):")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="macro", zero_division=0))
print("Recall:", recall_score(y_test, y_pred, average="macro", zero_division=0))
print("F1 Score:", f1_score(y_test, y_pred, average="macro", zero_division=0))

[I 2025-07-12 00:44:23,824] A new study created in memory with name: no-name-248f0a73-aa77-4aa5-91f7-bf77a12183a8


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000832 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 637
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000361 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 637
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517


[I 2025-07-12 00:44:24,165] Trial 0 finished with value: 0.0 and parameters: {'n_estimators': 199, 'learning_rate': 0.0001791028471939859, 'max_depth': 6, 'num_leaves': 37, 'min_child_samples': 68, 'subsample': 0.5300659071798055, 'colsample_bytree': 0.6998028561458511, 'reg_alpha': 2.400253729641664e-07, 'reg_lambda': 1.2316177407748897e-08}. Best is trial 0 with value: 0.0.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 637
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406


[I 2025-07-12 00:44:24,374] Trial 1 finished with value: 0.0 and parameters: {'n_estimators': 389, 'learning_rate': 0.00025448509085898905, 'max_depth': 13, 'num_leaves': 16, 'min_child_samples': 78, 'subsample': 0.9060108935821185, 'colsample_bytree': 0.9609102685396812, 'reg_alpha': 2.2413129702390213e-05, 'reg_lambda': 1.1735539775685162e-08}. Best is trial 0 with value: 0.0.


[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002906 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1902
[LightGBM] [Info] Number of data p

[I 2025-07-12 00:44:24,911] Trial 2 finished with value: 0.0 and parameters: {'n_estimators': 313, 'learning_rate': 0.00013044095980254224, 'max_depth': 8, 'num_leaves': 232, 'min_child_samples': 19, 'subsample': 0.5483228950949135, 'colsample_bytree': 0.542351120496154, 'reg_alpha': 2.5976270692948627e-07, 'reg_lambda': 1.4602631938405947e-06}. Best is trial 0 with value: 0.0.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008211 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1902
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 97
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006342 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1913
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 100
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info

[I 2025-07-12 00:44:25,213] Trial 3 finished with value: 0.4386724386724386 and parameters: {'n_estimators': 300, 'learning_rate': 0.006916065348152504, 'max_depth': 6, 'num_leaves': 39, 'min_child_samples': 18, 'subsample': 0.7116147052415915, 'colsample_bytree': 0.9373720698014303, 'reg_alpha': 0.00018226534447703466, 'reg_lambda': 5.064687666528522e-07}. Best is trial 3 with value: 0.4386724386724386.
[I 2025-07-12 00:44:25,327] Trial 4 finished with value: 0.0 and parameters: {'n_estimators': 151, 'learning_rate': 1.5821063373371471e-06, 'max_depth': 3, 'num_leaves': 173, 'min_child_samples': 56, 'subsample': 0.5579269190132099, 'colsample_bytree': 0.530657517809519, 'reg_alpha': 3.297715528041319e-08, 'reg_lambda': 0.0317192548746869}. Best is trial 3 with value: 0.4386724386724386.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000599 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1774
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 73
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000703 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1774
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 73
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of 

[I 2025-07-12 00:44:25,629] Trial 5 finished with value: 0.48412698412698413 and parameters: {'n_estimators': 444, 'learning_rate': 0.01833528589394393, 'max_depth': 9, 'num_leaves': 124, 'min_child_samples': 39, 'subsample': 0.682758886968722, 'colsample_bytree': 0.9925213633419318, 'reg_alpha': 5.744680788816283e-05, 'reg_lambda': 5.285558834606991e-07}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014573 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2161
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 208
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000933 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2161
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 208
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info

[I 2025-07-12 00:44:26,234] Trial 6 finished with value: 0.0 and parameters: {'n_estimators': 178, 'learning_rate': 1.3904909489027593e-06, 'max_depth': 4, 'num_leaves': 159, 'min_child_samples': 3, 'subsample': 0.8340783412042336, 'colsample_bytree': 0.8847748344083566, 'reg_alpha': 0.1087369242608832, 'reg_lambda': 2.1428639927235708e-08}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012184 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1961
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 117
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406


[I 2025-07-12 00:44:26,622] Trial 7 finished with value: 0.42857142857142855 and parameters: {'n_estimators': 17, 'learning_rate': 0.04484706309859885, 'max_depth': 9, 'num_leaves': 13, 'min_child_samples': 9, 'subsample': 0.9523960075936584, 'colsample_bytree': 0.8086780774795361, 'reg_alpha': 5.123559517713087e-08, 'reg_lambda': 1.932353625562532e-07}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000878 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1961
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 117
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000626 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1961
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 117
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224


[I 2025-07-12 00:44:26,845] Trial 8 finished with value: 0.0 and parameters: {'n_estimators': 224, 'learning_rate': 1.4299450981973248e-05, 'max_depth': 15, 'num_leaves': 178, 'min_child_samples': 51, 'subsample': 0.928129921605485, 'colsample_bytree': 0.6121515637509141, 'reg_alpha': 3.537372994923377e-07, 'reg_lambda': 6.002857988645553}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002361 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1806
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 77
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000620 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1806
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 77
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of 

[I 2025-07-12 00:44:27,156] Trial 9 finished with value: 0.0 and parameters: {'n_estimators': 214, 'learning_rate': 0.00043177705214712906, 'max_depth': 14, 'num_leaves': 71, 'min_child_samples': 19, 'subsample': 0.7449523747568303, 'colsample_bytree': 0.533501792311933, 'reg_alpha': 4.999942739515765, 'reg_lambda': 0.0014495469004995611}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001497 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1902
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 97
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data 

[I 2025-07-12 00:44:27,345] Trial 10 finished with value: 0.0 and parameters: {'n_estimators': 472, 'learning_rate': 0.06312547190238402, 'max_depth': 11, 'num_leaves': 107, 'min_child_samples': 98, 'subsample': 0.6559283291456295, 'colsample_bytree': 0.8007169384487723, 'reg_alpha': 0.01563242882095044, 'reg_lambda': 3.6070789454193476e-05}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000532 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1843
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 83
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406


[I 2025-07-12 00:44:27,638] Trial 11 finished with value: 0.4386724386724386 and parameters: {'n_estimators': 463, 'learning_rate': 0.006376172383468349, 'max_depth': 7, 'num_leaves': 97, 'min_child_samples': 41, 'subsample': 0.7020239265468461, 'colsample_bytree': 0.9896152888668943, 'reg_alpha': 0.00016517513665671848, 'reg_lambda': 7.270081011088327e-06}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000435 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1843
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 83
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000412 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1843
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 83
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of

[I 2025-07-12 00:44:28,064] Trial 12 finished with value: 0.2619047619047619 and parameters: {'n_estimators': 345, 'learning_rate': 0.0021282178311503317, 'max_depth': 11, 'num_leaves': 64, 'min_child_samples': 33, 'subsample': 0.6491905372076996, 'colsample_bytree': 0.9181111205562748, 'reg_alpha': 1.7993316488470744e-05, 'reg_lambda': 0.00016296759795001925}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1856
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 86
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000437 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1856
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 86
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of

[I 2025-07-12 00:44:28,474] Trial 13 finished with value: 0.48412698412698413 and parameters: {'n_estimators': 396, 'learning_rate': 0.00975236137790053, 'max_depth': 5, 'num_leaves': 132, 'min_child_samples': 31, 'subsample': 0.8226786203934318, 'colsample_bytree': 0.8722260070331229, 'reg_alpha': 0.004057275259735065, 'reg_lambda': 6.31877629331141e-07}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000423 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1860
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 87
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001299 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1860
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 87
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info]

[I 2025-07-12 00:44:28,879] Trial 14 finished with value: 0.48412698412698413 and parameters: {'n_estimators': 409, 'learning_rate': 0.013444599658728715, 'max_depth': 10, 'num_leaves': 143, 'min_child_samples': 31, 'subsample': 0.824272922509586, 'colsample_bytree': 0.8634030359902309, 'reg_alpha': 0.007445380516992219, 'reg_lambda': 0.005560625239013526}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000426 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1860
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 87
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000734 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1838
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 82
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of

[I 2025-07-12 00:44:29,224] Trial 15 finished with value: 0.16666666666666666 and parameters: {'n_estimators': 487, 'learning_rate': 0.0015639855905694754, 'max_depth': 5, 'num_leaves': 216, 'min_child_samples': 43, 'subsample': 0.8216517925676818, 'colsample_bytree': 0.7434505125439199, 'reg_alpha': 0.0012294380193372423, 'reg_lambda': 1.126666491591673e-05}. Best is trial 5 with value: 0.48412698412698413.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000329 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1679
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 65
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406


[I 2025-07-12 00:44:29,530] Trial 16 finished with value: 0.49999999999999994 and parameters: {'n_estimators': 404, 'learning_rate': 0.09694940079543767, 'max_depth': 8, 'num_leaves': 117, 'min_child_samples': 63, 'subsample': 0.6193460855023292, 'colsample_bytree': 0.9943021484896313, 'reg_alpha': 0.7650127131179498, 'reg_lambda': 2.7267248379195444e-07}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000324 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1679
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 65
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000319 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1679
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 65
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224


[I 2025-07-12 00:44:29,737] Trial 17 finished with value: 0.0 and parameters: {'n_estimators': 435, 'learning_rate': 0.06794268961602157, 'max_depth': 8, 'num_leaves': 100, 'min_child_samples': 70, 'subsample': 0.6123495647937452, 'colsample_bytree': 0.9961427787882937, 'reg_alpha': 9.950710890756314, 'reg_lambda': 0.2071861281852817}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224


[I 2025-07-12 00:44:29,906] Trial 18 finished with value: 0.0 and parameters: {'n_estimators': 353, 'learning_rate': 0.032944448873239454, 'max_depth': 12, 'num_leaves': 194, 'min_child_samples': 91, 'subsample': 0.6129726589508018, 'colsample_bytree': 0.8128915951620364, 'reg_alpha': 0.48061275166861495, 'reg_lambda': 1.062137279870558e-07}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 3

[I 2025-07-12 00:44:30,218] Trial 19 finished with value: 0.0 and parameters: {'n_estimators': 97, 'learning_rate': 0.001736973141601371, 'max_depth': 9, 'num_leaves': 121, 'min_child_samples': 59, 'subsample': 0.7624220342414695, 'colsample_bytree': 0.9212528344026418, 'reg_alpha': 1.7077440584579304e-05, 'reg_lambda': 0.00024307714196254964}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000420 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1740
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 69
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000635 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1740
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 69
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info]

[I 2025-07-12 00:44:30,410] Trial 20 finished with value: 0.0 and parameters: {'n_estimators': 278, 'learning_rate': 5.295790015341851e-05, 'max_depth': 10, 'num_leaves': 74, 'min_child_samples': 78, 'subsample': 0.6371894534049056, 'colsample_bytree': 0.6839965407182054, 'reg_alpha': 0.21916237248783446, 'reg_lambda': 4.411353412657483e-06}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 3

[I 2025-07-12 00:44:30,939] Trial 21 finished with value: 0.48412698412698413 and parameters: {'n_estimators': 379, 'learning_rate': 0.01626867568024626, 'max_depth': 7, 'num_leaves': 145, 'min_child_samples': 32, 'subsample': 0.9975317945061617, 'colsample_bytree': 0.8644912578191979, 'reg_alpha': 0.0036500084432930778, 'reg_lambda': 1.6872432151370097e-07}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000421 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1856
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 86
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001064 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1833
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info]

[I 2025-07-12 00:44:31,248] Trial 22 finished with value: 0.49523809523809526 and parameters: {'n_estimators': 430, 'learning_rate': 0.09616441664665319, 'max_depth': 5, 'num_leaves': 126, 'min_child_samples': 47, 'subsample': 0.7709850760146283, 'colsample_bytree': 0.9594022131848182, 'reg_alpha': 0.0595478015451909, 'reg_lambda': 1.121136730915408e-06}. Best is trial 16 with value: 0.49999999999999994.
[I 2025-07-12 00:44:31,512] Trial 23 finished with value: 0.41904761904761906 and parameters: {'n_estimators': 440, 'learning_rate': 0.07659792933086693, 'max_depth': 3, 'num_leaves': 118, 'min_child_samples': 44, 'subsample': 0.6882969424067173, 'colsample_bytree': 0.9713825256914013, 'reg_alpha': 0.04486784067502162, 'reg_lambda': 4.1448502914760706e-05}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1838
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 82
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000640 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1838
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 82
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] 

[I 2025-07-12 00:44:31,752] Trial 24 finished with value: 0.42857142857142855 and parameters: {'n_estimators': 495, 'learning_rate': 0.023891942643296367, 'max_depth': 8, 'num_leaves': 89, 'min_child_samples': 62, 'subsample': 0.7788256701878374, 'colsample_bytree': 0.9291095627144994, 'reg_alpha': 0.9153020033925681, 'reg_lambda': 2.4714260107171005e-06}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000336 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1719
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 67
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000316 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1719
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 67
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of 

[I 2025-07-12 00:44:32,023] Trial 25 finished with value: 0.2619047619047619 and parameters: {'n_estimators': 426, 'learning_rate': 0.0033829773175398804, 'max_depth': 6, 'num_leaves': 152, 'min_child_samples': 48, 'subsample': 0.7333816084070276, 'colsample_bytree': 0.9974529068657619, 'reg_alpha': 0.0005707917265382264, 'reg_lambda': 1.713368455038516e-07}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001518 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1821
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 80
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1821
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 80
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] 

[I 2025-07-12 00:44:32,491] Trial 26 finished with value: 0.47130647130647124 and parameters: {'n_estimators': 346, 'learning_rate': 0.07796141822331687, 'max_depth': 7, 'num_leaves': 252, 'min_child_samples': 38, 'subsample': 0.6716753051713292, 'colsample_bytree': 0.950400708538484, 'reg_alpha': 1.4538204252745337, 'reg_lambda': 5.745502549158486e-08}. Best is trial 16 with value: 0.49999999999999994.




[I 2025-07-12 00:44:32,745] Trial 27 finished with value: 0.0 and parameters: {'n_estimators': 451, 'learning_rate': 0.0007656154353704398, 'max_depth': 10, 'num_leaves': 192, 'min_child_samples': 66, 'subsample': 0.5008771266084845, 'colsample_bytree': 0.8983696154219255, 'reg_alpha': 4.8207482327325835e-06, 'reg_lambda': 2.8247733086124725e-05}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001029 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1467
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 52
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1467
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 52
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] 

[I 2025-07-12 00:44:33,087] Trial 28 finished with value: 0.4603174603174603 and parameters: {'n_estimators': 369, 'learning_rate': 0.02841077354012948, 'max_depth': 4, 'num_leaves': 132, 'min_child_samples': 53, 'subsample': 0.8693164765474061, 'colsample_bytree': 0.8330041697425409, 'reg_alpha': 0.03716395226914557, 'reg_lambda': 6.66405353508404e-07}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000591 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1800
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 76
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000365 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1800
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 76
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of 

[I 2025-07-12 00:44:33,312] Trial 29 finished with value: 0.0 and parameters: {'n_estimators': 255, 'learning_rate': 0.003865695461686857, 'max_depth': 5, 'num_leaves': 56, 'min_child_samples': 75, 'subsample': 0.583672001015456, 'colsample_bytree': 0.7078532510500692, 'reg_alpha': 2.369182413436045, 'reg_lambda': 3.1874101388856766e-08}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 3

[I 2025-07-12 00:44:33,471] Trial 30 finished with value: 0.0 and parameters: {'n_estimators': 320, 'learning_rate': 0.0908486779893207, 'max_depth': 8, 'num_leaves': 115, 'min_child_samples': 84, 'subsample': 0.7748430342159063, 'colsample_bytree': 0.9635053450450706, 'reg_alpha': 1.98212434485913e-06, 'reg_lambda': 1.4698348531290614e-06}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001832 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1864
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 88
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ov

[I 2025-07-12 00:44:34,167] Trial 31 finished with value: 0.48412698412698413 and parameters: {'n_estimators': 401, 'learning_rate': 0.010083674607384095, 'max_depth': 5, 'num_leaves': 136, 'min_child_samples': 26, 'subsample': 0.7893495278788043, 'colsample_bytree': 0.8978830435643632, 'reg_alpha': 0.0018751169145963553, 'reg_lambda': 4.857122318546662e-07}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000437 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1864
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 88
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002285 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1833
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info]

[I 2025-07-12 00:44:34,492] Trial 32 finished with value: 0.4603174603174603 and parameters: {'n_estimators': 405, 'learning_rate': 0.032849770149304666, 'max_depth': 4, 'num_leaves': 87, 'min_child_samples': 47, 'subsample': 0.8559812708689475, 'colsample_bytree': 0.9585872422538644, 'reg_alpha': 7.916701816117373e-05, 'reg_lambda': 1.5393222046416547e-08}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001311 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1868
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 89
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000434 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1868
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 89
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] 

[I 2025-07-12 00:44:34,833] Trial 33 finished with value: 0.48412698412698413 and parameters: {'n_estimators': 418, 'learning_rate': 0.017618187098041433, 'max_depth': 6, 'num_leaves': 164, 'min_child_samples': 25, 'subsample': 0.724911404151209, 'colsample_bytree': 0.850070690057359, 'reg_alpha': 0.13737466332143178, 'reg_lambda': 7.005895598440347e-07}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000330 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1679
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 65
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000315 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1679
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 65
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517


[I 2025-07-12 00:44:35,170] Trial 34 finished with value: 0.4386724386724386 and parameters: {'n_estimators': 498, 'learning_rate': 0.00797361476451246, 'max_depth': 9, 'num_leaves': 123, 'min_child_samples': 63, 'subsample': 0.799894039485206, 'colsample_bytree': 0.7711448629388463, 'reg_alpha': 0.02883913533541246, 'reg_lambda': 2.7384065888667346e-06}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000526 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1679
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 65
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224


[I 2025-07-12 00:44:35,519] Trial 35 finished with value: 0.2857142857142857 and parameters: {'n_estimators': 387, 'learning_rate': 0.0046197687907367346, 'max_depth': 7, 'num_leaves': 36, 'min_child_samples': 54, 'subsample': 0.8910592951802326, 'colsample_bytree': 0.9411949284949789, 'reg_alpha': 0.004101167802577576, 'reg_lambda': 3.379949083186258e-07}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000752 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1781
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 74
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000561 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1781
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 74
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of 

[I 2025-07-12 00:44:35,812] Trial 36 finished with value: 0.0 and parameters: {'n_estimators': 302, 'learning_rate': 0.0001190970170051848, 'max_depth': 5, 'num_leaves': 137, 'min_child_samples': 37, 'subsample': 0.6161996800546145, 'colsample_bytree': 0.9827734751493795, 'reg_alpha': 5.8712148075027984e-05, 'reg_lambda': 4.9089626443502995e-08}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001535 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1849
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 84
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000947 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1849
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 84
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] 

[I 2025-07-12 00:44:36,319] Trial 37 finished with value: 0.48412698412698413 and parameters: {'n_estimators': 326, 'learning_rate': 0.041782680814458795, 'max_depth': 3, 'num_leaves': 175, 'min_child_samples': 24, 'subsample': 0.7168071273493087, 'colsample_bytree': 0.8897279653486303, 'reg_alpha': 0.0005593405376377098, 'reg_lambda': 1.0789497553874653e-08}. Best is trial 16 with value: 0.49999999999999994.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001159 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1935
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 107
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000590 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1935
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 107
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517


[I 2025-07-12 00:44:36,815] Trial 38 finished with value: 0.5079365079365079 and parameters: {'n_estimators': 463, 'learning_rate': 0.01272428075621393, 'max_depth': 12, 'num_leaves': 84, 'min_child_samples': 14, 'subsample': 0.5691295119026587, 'colsample_bytree': 0.9175784751474216, 'reg_alpha': 0.504465675976591, 'reg_lambda': 1.4308114027858138e-06}. Best is trial 38 with value: 0.5079365079365079.


[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000593 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1935
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 107
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000655 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1983
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 125
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number 

[I 2025-07-12 00:44:37,617] Trial 39 finished with value: 0.5079365079365079 and parameters: {'n_estimators': 461, 'learning_rate': 0.017154957556918624, 'max_depth': 13, 'num_leaves': 51, 'min_child_samples': 8, 'subsample': 0.5446886181469158, 'colsample_bytree': 0.5905155859995537, 'reg_alpha': 0.4157150199171893, 'reg_lambda': 1.0782771216997046e-05}. Best is trial 38 with value: 0.5079365079365079.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000642 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1953
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 114
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001676 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1953
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 114
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info

[I 2025-07-12 00:44:38,366] Trial 40 finished with value: 0.0 and parameters: {'n_estimators': 465, 'learning_rate': 3.5843585130300706e-06, 'max_depth': 13, 'num_leaves': 35, 'min_child_samples': 10, 'subsample': 0.5458650776882084, 'colsample_bytree': 0.6094735775963416, 'reg_alpha': 0.41250808232881697, 'reg_lambda': 8.172848563089402e-05}. Best is trial 38 with value: 0.5079365079365079.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004690 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1946
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 111
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000805 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1946
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 111
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number o

[I 2025-07-12 00:44:39,734] Trial 41 finished with value: 0.47130647130647124 and parameters: {'n_estimators': 444, 'learning_rate': 0.04563217081091383, 'max_depth': 13, 'num_leaves': 78, 'min_child_samples': 12, 'subsample': 0.5902139277865369, 'colsample_bytree': 0.5707721358029558, 'reg_alpha': 1.7598622213474397, 'reg_lambda': 1.600966129394323e-05}. Best is trial 38 with value: 0.5079365079365079.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000827 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2099
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 177
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000823 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2099
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 177
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number o

[I 2025-07-12 00:44:40,345] Trial 42 finished with value: 0.5079365079365079 and parameters: {'n_estimators': 467, 'learning_rate': 0.02237618496064165, 'max_depth': 12, 'num_leaves': 26, 'min_child_samples': 4, 'subsample': 0.5721599496594998, 'colsample_bytree': 0.6534561451434852, 'reg_alpha': 0.3312448404360243, 'reg_lambda': 1.4833796705568503e-06}. Best is trial 38 with value: 0.5079365079365079.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000633 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1932
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 106
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406


[I 2025-07-12 00:44:40,678] Trial 43 finished with value: 0.5225885225885226 and parameters: {'n_estimators': 472, 'learning_rate': 0.019336263769686366, 'max_depth': 14, 'num_leaves': 4, 'min_child_samples': 15, 'subsample': 0.5163597727646679, 'colsample_bytree': 0.6485590957242615, 'reg_alpha': 0.44350840142614917, 'reg_lambda': 0.001183488177210391}. Best is trial 43 with value: 0.5225885225885226.


[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1932
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 106
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000792 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1932
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 106
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number 

[I 2025-07-12 00:44:41,045] Trial 44 finished with value: 0.4069264069264069 and parameters: {'n_estimators': 486, 'learning_rate': 0.018275809473968632, 'max_depth': 15, 'num_leaves': 46, 'min_child_samples': 3, 'subsample': 0.5185323405348118, 'colsample_bytree': 0.6636444645639041, 'reg_alpha': 4.303641610722937, 'reg_lambda': 0.01009444124603745}. Best is trial 43 with value: 0.5225885225885226.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003829 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2014
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 137
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406


[I 2025-07-12 00:44:41,416] Trial 45 finished with value: 0.2222222222222222 and parameters: {'n_estimators': 465, 'learning_rate': 0.0009879704409810292, 'max_depth': 14, 'num_leaves': 4, 'min_child_samples': 6, 'subsample': 0.5632237081161995, 'colsample_bytree': 0.6331919431726537, 'reg_alpha': 0.3407618481209477, 'reg_lambda': 0.0008550850915008631}. Best is trial 43 with value: 0.5225885225885226.


[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000712 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2014
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 137
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number of positive: 32, number of negative: 105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000719 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2014
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 137
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.233577 -> initscore=-1.188224
[LightGBM] [Info] Start training from score -1.188224
[LightGBM] [Info] Number 

[I 2025-07-12 00:44:41,917] Trial 46 finished with value: 0.4386724386724386 and parameters: {'n_estimators': 473, 'learning_rate': 0.006314409544911638, 'max_depth': 12, 'num_leaves': 22, 'min_child_samples': 15, 'subsample': 0.5336474856973419, 'colsample_bytree': 0.5028372148932245, 'reg_alpha': 0.1233227736675077, 'reg_lambda': 0.0006301448958497424}. Best is trial 43 with value: 0.5225885225885226.
[I 2025-07-12 00:44:42,212] Trial 47 finished with value: 0.26666666666666666 and parameters: {'n_estimators': 121, 'learning_rate': 0.00291144561075974, 'max_depth': 14, 'num_leaves': 21, 'min_child_samples': 15, 'subsample': 0.574015193139111, 'colsample_bytree': 0.5637421899398538, 'reg_alpha': 0.8007908972242367, 'reg_lambda': 0.002889576129644346}. Best is trial 43 with value: 0.5225885225885226.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000579 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1932
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 106
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1932
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 106
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number o

[I 2025-07-12 00:44:42,927] Trial 48 finished with value: 0.2619047619047619 and parameters: {'n_estimators': 456, 'learning_rate': 0.012025228775595368, 'max_depth': 12, 'num_leaves': 50, 'min_child_samples': 20, 'subsample': 0.5019117062233864, 'colsample_bytree': 0.6461465327397555, 'reg_alpha': 8.841140067894214, 'reg_lambda': 6.307075927333081e-06}. Best is trial 43 with value: 0.5225885225885226.


[LightGBM] [Info] Number of positive: 39, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000723 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1992
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 128
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.284672 -> initscore=-0.921406
[LightGBM] [Info] Start training from score -0.921406
[LightGBM] [Info] Number of positive: 49, number of negative: 88
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000824 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1992
[LightGBM] [Info] Number of data points in the train set: 137, number of used features: 128
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.357664 -> initscore=-0.585517
[LightGBM] [Info] Start training from score -0.585517
[LightGBM] [Info] Number o

[I 2025-07-12 00:44:43,230] Trial 49 finished with value: 0.0 and parameters: {'n_estimators': 500, 'learning_rate': 0.0003012237688989336, 'max_depth': 13, 'num_leaves': 3, 'min_child_samples': 7, 'subsample': 0.5476764075515902, 'colsample_bytree': 0.6195805736821849, 'reg_alpha': 1.1004242571831873e-08, 'reg_lambda': 9.361686472376085e-05}. Best is trial 43 with value: 0.5225885225885226.


Tuned LGBMClassifier (MultiOutput):
Accuracy: 0.2857142857142857
Precision: 0.6547619047619048
Recall: 0.35185185185185186
F1 Score: 0.3988095238095238


In [13]:
joblib.dump(model, os.path.join(MODEL_PATH, f'best_lgbm-ovr_model_on_crpwarner_opcode_freq.pkl'))

['/Users/napatcholthaipanich/Dev/master/dissertation/workspace/ml/models/best_lgbm-ovr_model_on_crpwarner_opcode_freq.pkl']

### K-Fold (K=3)

In [14]:
NUM_FOLDS = 3
results = []
best_model = OneVsRestClassifier(LGBMClassifier(**study.best_params, random_state=42))
best_f1 = 0
best_fold = 0

In [15]:
for fold in range(NUM_FOLDS):
    print(f"=========== Fold-{fold} ===========")
    train_path = os.path.join(DATA_PATH, f'train_fold_{fold}-opcode-freq.csv')
    val_path = os.path.join(DATA_PATH, f'val_fold_{fold}-opcode-freq.csv')

    train_df = pd.read_csv(train_path)
    val_df   = pd.read_csv(val_path)

    X_train = train_df[feature_list]
    y_train = train_df[labels]

    X_val = val_df[feature_list]
    y_val = val_df[labels]

    # Train model
    model = OneVsRestClassifier(LGBMClassifier(**study.best_params, random_state=42))
    model.fit(X_train, y_train)

    # Evaluate
    y_pred = model.predict(X_val)
    report = classification_report(y_val, y_pred, target_names=labels, output_dict=True)
    acc = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred, average="macro", zero_division=0)

    results.append({'fold': fold, 'accuracy': acc, 'report': report})
    print(f"Accuracy: {acc}")
    print("Precision:", precision_score(y_val, y_pred, average="macro", zero_division=0))
    print("Recall:", recall_score(y_val, y_pred, average="macro", zero_division=0))
    print("F1 Score:", f1)

    if best_f1 < f1:
        best_model = model
        best_fold = fold
## Step 6: Average Performance Summary
print("\n===== Overall Summary =====")
avg_acc = sum([r['accuracy'] for r in results]) / NUM_FOLDS
print(f"Average Accuracy: {avg_acc:.4f}")

# Save model
joblib.dump(best_model, os.path.join(MODEL_PATH, f'best_ada-ovr_model_on_crpwarner_opcode_freq_from_fold{best_fold}.pkl'))

[LightGBM] [Info] Number of positive: 13, number of negative: 33
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000523 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 861
[LightGBM] [Info] Number of data points in the train set: 46, number of used features: 82
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.282609 -> initscore=-0.931558
[LightGBM] [Info] Start training from score -0.931558
[LightGBM] [Info] Number of positive: 6, number of negative: 40
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000397 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 861
[LightGBM] [Info] Number of data points in the train set: 46, number of used features: 82
[LightGBM] [Info] [binary:BoostFromScor

['/Users/napatcholthaipanich/Dev/master/dissertation/workspace/ml/models/best_ada-ovr_model_on_crpwarner_opcode_freq_from_fold2.pkl']