In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
dataset = pd.read_csv('/content/reddit_preprocessed.csv')

# Drop rows with NaN values in 'clean_comment'
cleaned_dataset = dataset.dropna()

In [4]:
# Separate features and target
X_cleaned = cleaned_dataset['clean_comment']
y_cleaned = cleaned_dataset['category']

In [5]:
# Split the cleaned data into train and test sets (80-20 split)
X_train_cleaned, X_test_cleaned, y_train_cleaned, y_test_cleaned = train_test_split(X_cleaned, y_cleaned, test_size=0.2, random_state=42)

In [6]:
# Apply TfidfVectorizer with trigram setting and max_features=1000
tfidf_cleaned = TfidfVectorizer(ngram_range=(1, 3), max_features=10000)

In [7]:
# Fit the vectorizer on the training data and transform both train and test sets
X_train_tfidf_cleaned = tfidf_cleaned.fit_transform(X_train_cleaned)
X_test_tfidf_cleaned = tfidf_cleaned.transform(X_test_cleaned)

In [8]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.8-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [9]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
import optuna

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [10]:
# Function to optimize LightGBM hyperparameters
def objective(trial):
    # Define hyperparameters to be tuned
    param = {
        "objective": "multiclass",
        "num_class": 3,  # Assuming 3 categories (-1, 0, 1)
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 1e-1),
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "max_depth": trial.suggest_int("max_depth", 3, 20),
        "metric": "multi_logloss",
        "is_unbalance": True,
        "class_weight": "balanced",
    }

    # Define the LightGBM model with the trial parameters
    model = lgb.LGBMClassifier(**param)

    # Perform cross-validation
    scores = cross_val_score(model, X_train_tfidf_cleaned, y_train_cleaned, cv=3, scoring='accuracy')

    # Return the average score across folds
    return scores.mean()

In [11]:
# Create an Optuna study to optimize the hyperparameters
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-01-17 14:10:45,494] A new study created in memory with name: no-name-e2ddc35b-f91d-48eb-b2b4-3c5cbd0b3ad9


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112554 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106621 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.179044 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:11:01,756] Trial 0 finished with value: 0.7909428357615683 and parameters: {'learning_rate': 0.06464006251973473, 'n_estimators': 110, 'max_depth': 17}. Best is trial 0 with value: 0.7909428357615683.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.102934 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.109971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.232039 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:11:13,357] Trial 1 finished with value: 0.747594852024856 and parameters: {'learning_rate': 0.06563608343465051, 'n_estimators': 52, 'max_depth': 15}. Best is trial 0 with value: 0.7909428357615683.






[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110992 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111769 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111786 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:11:20,372] Trial 2 finished with value: 0.7478881450032557 and parameters: {'learning_rate': 0.08611922159905228, 'n_estimators': 190, 'max_depth': 3}. Best is trial 0 with value: 0.7909428357615683.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.175928 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106353 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111826 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:11:27,606] Trial 3 finished with value: 0.7202018535348215 and parameters: {'learning_rate': 0.05834568586975043, 'n_estimators': 66, 'max_depth': 9}. Best is trial 0 with value: 0.7909428357615683.






[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.114970 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105171 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110524 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:12:00,095] Trial 4 finished with value: 0.8070152022013118 and parameters: {'learning_rate': 0.0652262287173908, 'n_estimators': 215, 'max_depth': 19}. Best is trial 4 with value: 0.8070152022013118.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.177761 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111087 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.116779 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:12:54,011] Trial 5 finished with value: 0.8112387057376848 and parameters: {'learning_rate': 0.076086251139309, 'n_estimators': 451, 'max_depth': 14}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.108624 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.175758 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.188262 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:13:28,775] Trial 6 finished with value: 0.8097136173477161 and parameters: {'learning_rate': 0.08542285142999426, 'n_estimators': 277, 'max_depth': 17}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.117415 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.102649 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.173537 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:13:50,653] Trial 7 finished with value: 0.8046688480512585 and parameters: {'learning_rate': 0.08462435939467725, 'n_estimators': 197, 'max_depth': 13}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.115630 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.107738 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.183989 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:14:02,128] Trial 8 finished with value: 0.7949316182032331 and parameters: {'learning_rate': 0.09525597373622562, 'n_estimators': 71, 'max_depth': 20}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.104377 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.175713 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.109377 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:14:28,297] Trial 9 finished with value: 0.6597837580558793 and parameters: {'learning_rate': 0.006620999741467658, 'n_estimators': 332, 'max_depth': 6}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106700 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.102951 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.115677 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:15:12,693] Trial 10 finished with value: 0.7931132471577204 and parameters: {'learning_rate': 0.02755391368280943, 'n_estimators': 492, 'max_depth': 10}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.116620 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.104196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.201574 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:15:59,479] Trial 11 finished with value: 0.8056660462423886 and parameters: {'learning_rate': 0.04223331024507092, 'n_estimators': 406, 'max_depth': 14}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.117762 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.104003 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.121259 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:16:42,559] Trial 12 finished with value: 0.8108281347947773 and parameters: {'learning_rate': 0.07813463351831708, 'n_estimators': 350, 'max_depth': 16}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.129141 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.118089 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:17:22,942] Trial 13 finished with value: 0.8103587235739279 and parameters: {'learning_rate': 0.07618026930544348, 'n_estimators': 430, 'max_depth': 11}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.124051 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103009 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.107660 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:18:11,004] Trial 14 finished with value: 0.8071911346323574 and parameters: {'learning_rate': 0.043480987360645325, 'n_estimators': 375, 'max_depth': 16}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111992 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105057 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.121723 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:18:58,717] Trial 15 finished with value: 0.8108865724813435 and parameters: {'learning_rate': 0.09689110433429249, 'n_estimators': 454, 'max_depth': 13}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.116927 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.176567 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111485 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:19:48,405] Trial 16 finished with value: 0.8104760634755706 and parameters: {'learning_rate': 0.09804073548303018, 'n_estimators': 499, 'max_depth': 12}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.114613 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103480 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.183005 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:20:20,399] Trial 17 finished with value: 0.8102999762016884 and parameters: {'learning_rate': 0.09992515384176337, 'n_estimators': 447, 'max_depth': 8}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.125825 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.121205 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110972 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:20:41,366] Trial 18 finished with value: 0.7959287028429496 and parameters: {'learning_rate': 0.07461476712349913, 'n_estimators': 286, 'max_depth': 7}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.108277 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.119638 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.119066 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:21:28,570] Trial 19 finished with value: 0.8085401357484439 and parameters: {'learning_rate': 0.05282656096462814, 'n_estimators': 453, 'max_depth': 12}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.122786 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.183084 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112738 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:22:17,747] Trial 20 finished with value: 0.7966327319299493 and parameters: {'learning_rate': 0.027544648806294092, 'n_estimators': 390, 'max_depth': 14}. Best is trial 5 with value: 0.8112387057376848.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.118370 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111888 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.116321 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:23:06,862] Trial 21 finished with value: 0.8118839564838775 and parameters: {'learning_rate': 0.0778074194702338, 'n_estimators': 342, 'max_depth': 18}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.173580 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.113558 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.131208 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:23:53,837] Trial 22 finished with value: 0.8115907151197567 and parameters: {'learning_rate': 0.09172924300494212, 'n_estimators': 327, 'max_depth': 18}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.117235 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103110 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.108082 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:24:39,405] Trial 23 finished with value: 0.8117080034071203 and parameters: {'learning_rate': 0.08841419087272619, 'n_estimators': 315, 'max_depth': 19}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.166838 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.214494 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.119196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:25:24,727] Trial 24 finished with value: 0.8110627939523507 and parameters: {'learning_rate': 0.08947967048066208, 'n_estimators': 310, 'max_depth': 19}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.118911 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111490 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.114372 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:26:09,982] Trial 25 finished with value: 0.81100409819439 and parameters: {'learning_rate': 0.08989055144150251, 'n_estimators': 328, 'max_depth': 18}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.119460 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.118710 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.113919 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:26:46,886] Trial 26 finished with value: 0.810182718882892 and parameters: {'learning_rate': 0.06919732029286126, 'n_estimators': 241, 'max_depth': 20}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.120529 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.107161 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.188369 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:27:20,115] Trial 27 finished with value: 0.8094789272215751 and parameters: {'learning_rate': 0.08351358831811448, 'n_estimators': 239, 'max_depth': 18}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.189219 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.102913 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.201917 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:27:42,285] Trial 28 finished with value: 0.7956354614788288 and parameters: {'learning_rate': 0.05533522088481684, 'n_estimators': 148, 'max_depth': 18}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.104254 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.107096 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:28:27,225] Trial 29 finished with value: 0.8110627836294949 and parameters: {'learning_rate': 0.09275584333257567, 'n_estimators': 352, 'max_depth': 16}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.120841 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.182607 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.191004 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:29:12,755] Trial 30 finished with value: 0.8098896013930404 and parameters: {'learning_rate': 0.07145563039128024, 'n_estimators': 298, 'max_depth': 20}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.125411 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103446 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.108108 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:30:01,787] Trial 31 finished with value: 0.8103002342730828 and parameters: {'learning_rate': 0.079946612090614, 'n_estimators': 369, 'max_depth': 17}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.137893 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123744 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.114646 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:30:35,046] Trial 32 finished with value: 0.8060179627187587 and parameters: {'learning_rate': 0.05971072136826598, 'n_estimators': 254, 'max_depth': 15}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123204 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101679 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.131434 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:31:30,251] Trial 33 finished with value: 0.8106522436551548 and parameters: {'learning_rate': 0.08062725227085148, 'n_estimators': 411, 'max_depth': 17}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.122771 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103585 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.128119 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:32:16,527] Trial 34 finished with value: 0.8116493695862944 and parameters: {'learning_rate': 0.07112262103131192, 'n_estimators': 318, 'max_depth': 19}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.127946 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103573 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.196697 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:33:04,328] Trial 35 finished with value: 0.8108281451176329 and parameters: {'learning_rate': 0.0682954834253512, 'n_estimators': 322, 'max_depth': 19}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123538 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.175612 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.192188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:33:41,988] Trial 36 finished with value: 0.8110041911000921 and parameters: {'learning_rate': 0.08906427583487996, 'n_estimators': 255, 'max_depth': 19}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.173076 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123337 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.110303 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:33:58,660] Trial 37 finished with value: 0.7803258302069015 and parameters: {'learning_rate': 0.0653636502655014, 'n_estimators': 349, 'max_depth': 4}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.125338 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105776 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112857 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:34:43,289] Trial 38 finished with value: 0.8106520062294719 and parameters: {'learning_rate': 0.06044678492819386, 'n_estimators': 298, 'max_depth': 18}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.137671 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.175776 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.120741 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:35:07,614] Trial 39 finished with value: 0.7949902520240592 and parameters: {'learning_rate': 0.04696711726626321, 'n_estimators': 149, 'max_depth': 20}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110902 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.187363 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.178849 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:35:43,021] Trial 40 finished with value: 0.8088923309419199 and parameters: {'learning_rate': 0.0839121522988039, 'n_estimators': 275, 'max_depth': 15}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.128920 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.126650 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.117718 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:36:36,713] Trial 41 finished with value: 0.8112387676748195 and parameters: {'learning_rate': 0.07259023762621147, 'n_estimators': 379, 'max_depth': 17}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.137212 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.181900 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111593 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:37:27,553] Trial 42 finished with value: 0.811825394923042 and parameters: {'learning_rate': 0.07317827621575525, 'n_estimators': 371, 'max_depth': 17}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.130167 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105142 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.114182 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:38:15,666] Trial 43 finished with value: 0.8105347489106755 and parameters: {'learning_rate': 0.09258901015149987, 'n_estimators': 336, 'max_depth': 19}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.166504 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.192845 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.115707 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:39:01,593] Trial 44 finished with value: 0.8107695112968069 and parameters: {'learning_rate': 0.08209427314027053, 'n_estimators': 312, 'max_depth': 18}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.122329 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110334 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.199074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:39:56,715] Trial 45 finished with value: 0.8094203553378837 and parameters: {'learning_rate': 0.06240779710783803, 'n_estimators': 403, 'max_depth': 16}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.133335 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105665 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.140240 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:40:51,796] Trial 46 finished with value: 0.8114146691372977 and parameters: {'learning_rate': 0.0877847347506968, 'n_estimators': 362, 'max_depth': 20}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.132301 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106462 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.220479 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:41:27,207] Trial 47 finished with value: 0.8097720860028496 and parameters: {'learning_rate': 0.07677595045713388, 'n_estimators': 274, 'max_depth': 15}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.190677 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.133021 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123401 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:42:00,462] Trial 48 finished with value: 0.8105349450449353 and parameters: {'learning_rate': 0.09414282950432179, 'n_estimators': 213, 'max_depth': 19}. Best is trial 21 with value: 0.8118839564838775.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.119439 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47506
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1901
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.109874 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 47463
[LightGBM] [Info] Number of data points in the train set: 11365, number of used features: 1897
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112103 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48986
[LightGBM] [Info] Number of data points in the train set: 11366, number of used features: 1989
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-01-17 14:42:48,396] Trial 49 finished with value: 0.8118839874524449 and parameters: {'learning_rate': 0.08646073908592253, 'n_estimators': 340, 'max_depth': 17}. Best is trial 49 with value: 0.8118839874524449.


In [12]:
# Extract the best hyperparameters
best_params = study.best_params
best_params

{'learning_rate': 0.08646073908592253, 'n_estimators': 340, 'max_depth': 17}

In [13]:
best_model = lgb.LGBMClassifier(

    objective='multiclass',
    num_class=3,
    metric="multi_logloss",
    is_unbalance= True,
    class_weight= "balanced",
    reg_alpha= 0.1,  # L1 regularization
    reg_lambda= 0.1,  # L2 regularization
    learning_rate= 0.08,
    max_depth= 20,
    n_estimators=367
)

In [14]:
# Fit the model on the resampled training data
best_model.fit(X_train_tfidf_cleaned, y_train_cleaned)



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.272758 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77162
[LightGBM] [Info] Number of data points in the train set: 17048, number of used features: 2869
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




In [15]:
# Predict on the train set
y_train_pred = best_model.predict(X_train_tfidf_cleaned)



In [16]:
# Calculate accuracy on the test set
accuracy_train = accuracy_score(y_train_cleaned, y_train_pred)
accuracy_train

0.9292585640544345

In [17]:
# Generate classification report
report_train = classification_report(y_train_cleaned, y_train_pred)
print(report_train)

              precision    recall  f1-score   support

        -1.0       0.93      0.91      0.92      3974
         0.0       0.86      0.99      0.92      5523
         1.0       0.99      0.90      0.94      7551

    accuracy                           0.93     17048
   macro avg       0.93      0.93      0.93     17048
weighted avg       0.93      0.93      0.93     17048



In [18]:
# Predict on the test set
y_pred = best_model.predict(X_test_tfidf_cleaned)



In [19]:
# Calculate accuracy on the test set
accuracy = accuracy_score(y_test_cleaned, y_pred)
accuracy

0.836931018301267

In [20]:
# Generate classification report
report = classification_report(y_test_cleaned, y_pred)
print(report)

              precision    recall  f1-score   support

        -1.0       0.77      0.74      0.75       975
         0.0       0.82      0.94      0.88      1384
         1.0       0.89      0.81      0.85      1903

    accuracy                           0.84      4262
   macro avg       0.83      0.83      0.83      4262
weighted avg       0.84      0.84      0.84      4262



In [21]:
import re
import numpy as np

# Assuming you have pre-trained tfidf_vectorizer and lgbm_model loaded
# tfidf_vectorizer: Your trained TF-IDF vectorizer
# lgbm_model: Your trained LightGBM model

# Function to clean and preprocess a YouTube comment (same as used during training)
def preprocess_comment(comment):
    # Lowercasing
    comment = comment.lower()

    # Remove special characters, URLs, punctuation, and extra spaces
    comment = re.sub(r"http\S+|www\S+|https\S+", '', comment, flags=re.MULTILINE)  # Remove URLs
    comment = re.sub(r'\W', ' ', comment)  # Remove special characters
    comment = re.sub(r'\s+', ' ', comment).strip()  # Remove extra spaces and newlines

    return comment

# Prediction function
def predict_sentiment(comment, tfidf_vectorizer, lgbm_model):
    # Step 1: Preprocess the YouTube comment
    cleaned_comment = preprocess_comment(comment)

    # Step 2: Transform the comment using the trained TF-IDF vectorizer
    comment_tfidf = tfidf_vectorizer.transform([cleaned_comment])

    # Step 3: Use the trained LightGBM model to predict the sentiment
    prediction = lgbm_model.predict(comment_tfidf)
    prediction_proba = lgbm_model.predict_proba(comment_tfidf)

    # Step 4: Get the predicted sentiment (label) and probability
    sentiment_class = np.argmax(prediction_proba)
    sentiment_proba = np.max(prediction_proba)

    # Step 5: Return the sentiment label and confidence
    return {
        'sentiment_class': int(prediction[0]),  # -1, 0, or 1 depending on your labels
        'confidence': sentiment_proba
    }

# Example usage:
comment1 = "I absolutely hate this video!"
comment2 = "The explanations were confusing and the video quality was poor."
comment3 = "I didn’t learn anything useful. Really disappointed."
comment4 = "Wow, the explanation was so clear and helpful. Definitely subscribing!"
comment5 = "This is the worst video I’ve seen on this topic, very misleading"
comment6 = "Not much to say about this, just a standard video."
comment7 = "The video is okay, but I expected more depth in the content."
comment8 = "Superb content! Mazaa aa gaya dekh ke. Best video on this topic!"
comment9 = "Poor video quality aur explanation bhi weak tha."
comment10 = "Yeh video theek tha, but I was expecting more depth."
result = predict_sentiment(comment10, tfidf_cleaned, best_model)
print(f"Predicted Sentiment: {result['sentiment_class']}, Confidence: {result['confidence']}")

Predicted Sentiment: 0, Confidence: 0.6834544827177101


