In [35]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import spacy
import optuna
from sklearn.model_selection import cross_val_score
import pickle
import lightgbm as lgb


In [36]:
import mlflow
import dagshub
dagshub.init(repo_owner='satyajeetrai007', repo_name='Youtube-Comment-Sentiment-Analysis', mlflow=True)

In [58]:
mlflow.set_experiment("lightGBM HPT on Custom Features")

2025/08/28 23:28:45 INFO mlflow.tracking.fluent: Experiment with name 'lightGBM HPT on Custom Features' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/2f1342ca992445ed9a0d602008d1373c', creation_time=1756403926717, experiment_id='11', last_update_time=1756403926717, lifecycle_stage='active', name='lightGBM HPT on Custom Features', tags={}>

In [38]:
# Load dataset
dataset = pd.read_csv('data_preprocessed.csv')

# Drop rows with NaN values in 'clean_comment'
cleaned_dataset = dataset.dropna().drop_duplicates()

In [39]:
# Separate features and target
X_cleaned = cleaned_dataset['clean_comment']
y_cleaned = cleaned_dataset['category']

# Split the cleaned data into train and test sets (80-20 split)
X_train_cleaned, X_test_cleaned, y_train_cleaned, y_test_cleaned = train_test_split(X_cleaned, y_cleaned, test_size=0.2, random_state=42)


In [40]:
# Load spacy language model for POS tagging
nlp = spacy.load('en_core_web_sm')

In [41]:
# All POS tags we care about
ALL_POS_TAGS = ['NOUN', 'VERB', 'ADJ', 'ADV', 'PRON', 'PROPN', 'INTJ', 'NUM', 'DET', 'ADP', 'CCONJ', 'PART', 'SCONJ', 'SYM', 'X', 'PUNCT', 'SPACE']

def extract_custom_features(text):
    doc = nlp(text)
    word_list = [token.text for token in doc]

    comment_length = len(text)
    word_count = len(word_list)
    avg_word_length = sum(len(word) for word in word_list) / word_count if word_count > 0 else 0
    unique_word_count = len(set(word_list))
    lexical_diversity = unique_word_count / word_count if word_count > 0 else 0

    pos_tags = [token.pos_ for token in doc]
    pos_proportion = {tag: pos_tags.count(tag) / word_count for tag in ALL_POS_TAGS}
    
    return {
        'comment_length': comment_length,
        'word_count': word_count,
        'avg_word_length': avg_word_length,
        'unique_word_count': unique_word_count,
        'lexical_diversity': lexical_diversity,
        **pos_proportion
    }


In [42]:
# Apply the custom feature extraction
train_custom_features = pd.DataFrame([extract_custom_features(text) for text in X_train_cleaned])
test_custom_features = pd.DataFrame([extract_custom_features(text) for text in X_test_cleaned])

In [43]:
train_custom_features.shape, test_custom_features.shape

((28994, 22), (7249, 22))

In [44]:
# Replace NaN values in POS tag proportions with 0
train_custom_features.fillna(0, inplace=True)
test_custom_features.fillna(0, inplace=True)

In [45]:
# Apply TfidfVectorizer with trigram setting and max_features=1000
tfidf = TfidfVectorizer(ngram_range=(1, 3), max_features=2000)
X_train_tfidf = tfidf.fit_transform(X_train_cleaned)
X_test_tfidf = tfidf.transform(X_test_cleaned)

In [46]:
# Convert TF-IDF to DataFrame
X_train_tfidf_df = pd.DataFrame(X_train_tfidf.toarray(), columns=tfidf.get_feature_names_out())
X_test_tfidf_df = pd.DataFrame(X_test_tfidf.toarray(), columns=tfidf.get_feature_names_out())

In [47]:
# Combine TF-IDF and custom features
X_train_combined = pd.concat([X_train_tfidf_df.reset_index(drop=True), train_custom_features.reset_index(drop=True)], axis=1)
X_test_combined = pd.concat([X_test_tfidf_df.reset_index(drop=True), test_custom_features.reset_index(drop=True)], axis=1)

In [48]:
X_train_combined.shape, X_test_combined.shape

((28994, 2022), (7249, 2022))

In [59]:
# Function to log results in MLflow
from sklearn.metrics import classification_report, accuracy_score
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        run_name = f"Trial_{trial_number}_{model_name}_class_weight"
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        train_cv_accuracy  =  cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()
        mlflow.log_metric("train_cv_accuracy ", train_cv_accuracy )
        accuracy = float(accuracy_score(y_test, y_pred))
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
     
        with open(f"{model_name}_model", "wb") as f:
            pickle.dump(model, f)
        mlflow.log_artifact(f"{model_name}_model", "model")
        

        return accuracy




In [60]:
# Function to optimize LightGBM hyperparameters
import lightgbm as lgb
def objective(trial):
    # Define hyperparameters to be tuned
    param = {
        "objective": "multiclass",
        "num_class": 3,  # Assuming 3 categories (-1, 0, 1)
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 1e-1),
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "max_depth": trial.suggest_int("max_depth", 3, 20),
        "metric": "multi_logloss",
        "is_unbalance": True,
        "class_weight": "balanced",
    }

    # Define the LightGBM model with the trial parameters
    model = lgb.LGBMClassifier(**param)

    # Perform cross-validation
    accuracy = log_mlflow("LightGBM", model, X_train_combined, X_test_combined, y_train_cleaned, y_test_cleaned, param, trial.number)

    # Return the average score across folds
    return accuracy

In [61]:
# Create an Optuna study to optimize the hyperparameters
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-08-28 23:28:58,529] A new study created in memory with name: no-name-03533f37-5143-465d-ba8e-e0a0287cbb20


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.074095 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.102791 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-28 23:31:39,600] Trial 0 finished with value: 0.8370809766864395 and parameters: {'learning_rate': 0.08740308671893958, 'n_estimators': 397, 'max_depth': 15}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073706 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073918 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]

[I 2025-08-28 23:33:22,288] Trial 1 finished with value: 0.6889226100151745 and parameters: {'learning_rate': 0.00900008708096594, 'n_estimators': 103, 'max_depth': 12}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.094318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.048596 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.047

[I 2025-08-28 23:35:19,615] Trial 2 finished with value: 0.799834459925507 and parameters: {'learning_rate': 0.0351757301162409, 'n_estimators': 210, 'max_depth': 17}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.095396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053333 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050

[I 2025-08-28 23:36:45,936] Trial 3 finished with value: 0.7174782728652228 and parameters: {'learning_rate': 0.04990623091769481, 'n_estimators': 59, 'max_depth': 9}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.418221 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.149980 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-28 23:39:57,794] Trial 4 finished with value: 0.8277003724651676 and parameters: {'learning_rate': 0.046325333068302436, 'n_estimators': 381, 'max_depth': 14}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.350571 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.135611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-28 23:41:42,973] Trial 5 finished with value: 0.7544488895020003 and parameters: {'learning_rate': 0.0618668950321913, 'n_estimators': 163, 'max_depth': 5}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.263941 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.206190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-28 23:45:39,328] Trial 6 finished with value: 0.8370809766864395 and parameters: {'learning_rate': 0.06957716262067284, 'n_estimators': 434, 'max_depth': 18}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.595958 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.295487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-28 23:48:14,095] Trial 7 finished with value: 0.7886605048972272 and parameters: {'learning_rate': 0.07927877557711314, 'n_estimators': 147, 'max_depth': 8}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.579357 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.325938 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:15:41,064] Trial 8 finished with value: 0.7372051317423093 and parameters: {'learning_rate': 0.046174482882176106, 'n_estimators': 162, 'max_depth': 5}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.135960 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084729 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.059

[I 2025-08-29 03:17:21,411] Trial 9 finished with value: 0.7762449993102497 and parameters: {'learning_rate': 0.06792628236210357, 'n_estimators': 213, 'max_depth': 5}. Best is trial 0 with value: 0.8370809766864395.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079834 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092816 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]

[I 2025-08-29 03:19:41,704] Trial 10 finished with value: 0.83928817767968 and parameters: {'learning_rate': 0.09746055096589865, 'n_estimators': 327, 'max_depth': 20}. Best is trial 10 with value: 0.83928817767968.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.120305 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058709 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:22:06,503] Trial 11 finished with value: 0.8405297282383777 and parameters: {'learning_rate': 0.09649770159140739, 'n_estimators': 332, 'max_depth': 20}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076774 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053462 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:24:43,871] Trial 12 finished with value: 0.8388743274934474 and parameters: {'learning_rate': 0.09965988990483571, 'n_estimators': 315, 'max_depth': 20}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.167447 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.048301 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:28:09,558] Trial 13 finished with value: 0.8399779279900675 and parameters: {'learning_rate': 0.09563106205932007, 'n_estimators': 494, 'max_depth': 20}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.130926 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.056334 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]

[I 2025-08-29 03:31:03,024] Trial 14 finished with value: 0.8377707269968272 and parameters: {'learning_rate': 0.08692343949475653, 'n_estimators': 469, 'max_depth': 17}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.146149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.063513 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071

[I 2025-08-29 03:34:05,131] Trial 15 finished with value: 0.8267347220306249 and parameters: {'learning_rate': 0.02383696205041247, 'n_estimators': 491, 'max_depth': 20}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.089159 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054677 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:36:05,504] Trial 16 finished with value: 0.8337701751965788 and parameters: {'learning_rate': 0.08032732382188758, 'n_estimators': 278, 'max_depth': 15}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.146566 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062404 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:38:19,078] Trial 17 finished with value: 0.8337701751965788 and parameters: {'learning_rate': 0.09152118597678763, 'n_estimators': 363, 'max_depth': 12}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.140049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070519 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100

[I 2025-08-29 03:41:12,029] Trial 18 finished with value: 0.8369430266243619 and parameters: {'learning_rate': 0.060053302872018814, 'n_estimators': 444, 'max_depth': 18}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.142678 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075813 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.085

[I 2025-08-29 03:43:01,020] Trial 19 finished with value: 0.8155607670023451 and parameters: {'learning_rate': 0.07798994169681558, 'n_estimators': 232, 'max_depth': 9}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.148331 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066561 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.090

[I 2025-08-29 03:45:12,825] Trial 20 finished with value: 0.8363912263760519 and parameters: {'learning_rate': 0.09991916345060411, 'n_estimators': 278, 'max_depth': 14}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098735 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075006 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:47:33,308] Trial 21 finished with value: 0.8377707269968272 and parameters: {'learning_rate': 0.09147053121691671, 'n_estimators': 328, 'max_depth': 20}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.145956 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.088205 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061

[I 2025-08-29 03:49:51,872] Trial 22 finished with value: 0.8394261277417575 and parameters: {'learning_rate': 0.09726958862872527, 'n_estimators': 326, 'max_depth': 18}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.155063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093853 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.094

[I 2025-08-29 03:52:23,294] Trial 23 finished with value: 0.8380466271209822 and parameters: {'learning_rate': 0.08234388022269665, 'n_estimators': 389, 'max_depth': 17}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098155 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.094390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:55:06,497] Trial 24 finished with value: 0.8383225272451372 and parameters: {'learning_rate': 0.07165404679363198, 'n_estimators': 431, 'max_depth': 18}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.091992 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073826 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 03:57:09,072] Trial 25 finished with value: 0.8340460753207339 and parameters: {'learning_rate': 0.09115579754460142, 'n_estimators': 256, 'max_depth': 16}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.127084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070906 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.102

[I 2025-08-29 03:59:32,966] Trial 26 finished with value: 0.8376327769347496 and parameters: {'learning_rate': 0.0947125754352327, 'n_estimators': 350, 'max_depth': 19}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.129440 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058567 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087

[I 2025-08-29 04:01:46,014] Trial 27 finished with value: 0.8357014760656643 and parameters: {'learning_rate': 0.0594045526424295, 'n_estimators': 297, 'max_depth': 19}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.101994 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.081922 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056

[I 2025-08-29 04:04:35,101] Trial 28 finished with value: 0.8355635260035867 and parameters: {'learning_rate': 0.08389324871391228, 'n_estimators': 498, 'max_depth': 16}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101882 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100732 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 04:06:59,528] Trial 29 finished with value: 0.8354255759415091 and parameters: {'learning_rate': 0.07429211823313801, 'n_estimators': 406, 'max_depth': 14}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.165582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073431 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 04:09:33,361] Trial 30 finished with value: 0.8399779279900675 and parameters: {'learning_rate': 0.08667283354687552, 'n_estimators': 404, 'max_depth': 19}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.123505 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075476 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.083

[I 2025-08-29 04:11:57,657] Trial 31 finished with value: 0.8391502276176024 and parameters: {'learning_rate': 0.08765108945338056, 'n_estimators': 353, 'max_depth': 19}. Best is trial 11 with value: 0.8405297282383777.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.155397 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076516 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 04:14:48,339] Trial 32 finished with value: 0.8413574286108428 and parameters: {'learning_rate': 0.0935676560006899, 'n_estimators': 464, 'max_depth': 19}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110298 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092551 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]

[I 2025-08-29 04:16:36,338] Trial 33 finished with value: 0.6405021382259622 and parameters: {'learning_rate': 0.0068593898196372335, 'n_estimators': 464, 'max_depth': 3}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.119187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.082716 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077

[I 2025-08-29 04:19:13,885] Trial 34 finished with value: 0.8368050765622844 and parameters: {'learning_rate': 0.08709205153933776, 'n_estimators': 419, 'max_depth': 16}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.121576 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.064658 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 04:22:12,402] Trial 35 finished with value: 0.8130776658849497 and parameters: {'learning_rate': 0.01855632796796093, 'n_estimators': 466, 'max_depth': 19}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.134995 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.081406 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.082

[I 2025-08-29 04:24:32,113] Trial 36 finished with value: 0.8337701751965788 and parameters: {'learning_rate': 0.09207109857881102, 'n_estimators': 386, 'max_depth': 12}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.111287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068111 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.063

[I 2025-08-29 04:27:13,708] Trial 37 finished with value: 0.8374948268726721 and parameters: {'learning_rate': 0.07545457129826047, 'n_estimators': 452, 'max_depth': 17}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106013 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.065238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 04:29:52,011] Trial 38 finished with value: 0.8300455235204856 and parameters: {'learning_rate': 0.03487279892214049, 'n_estimators': 407, 'max_depth': 19}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.132578 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.063991 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 04:32:39,704] Trial 39 finished with value: 0.8380466271209822 and parameters: {'learning_rate': 0.08387292419449924, 'n_estimators': 477, 'max_depth': 15}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079361 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090650 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]

[I 2025-08-29 04:34:51,535] Trial 40 finished with value: 0.8214926196716789 and parameters: {'learning_rate': 0.055708039381918616, 'n_estimators': 368, 'max_depth': 10}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.123407 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.067789 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.078

[I 2025-08-29 04:37:26,733] Trial 41 finished with value: 0.839564077803835 and parameters: {'learning_rate': 0.09576486010785676, 'n_estimators': 421, 'max_depth': 18}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.108835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.097

[I 2025-08-29 04:40:10,118] Trial 42 finished with value: 0.8391502276176024 and parameters: {'learning_rate': 0.09335343903696207, 'n_estimators': 437, 'max_depth': 20}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097431 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097302 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]

[I 2025-08-29 04:43:22,170] Trial 43 finished with value: 0.8408056283625328 and parameters: {'learning_rate': 0.0666528846630408, 'n_estimators': 409, 'max_depth': 18}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.104674 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094635 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]

[I 2025-08-29 04:46:29,325] Trial 44 finished with value: 0.8399779279900675 and parameters: {'learning_rate': 0.06744348162077736, 'n_estimators': 496, 'max_depth': 20}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.149393 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073341 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 04:49:08,678] Trial 45 finished with value: 0.8325286246378811 and parameters: {'learning_rate': 0.04297086631389606, 'n_estimators': 400, 'max_depth': 17}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.129665 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074044 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.101

[I 2025-08-29 04:51:57,224] Trial 46 finished with value: 0.8390122775555249 and parameters: {'learning_rate': 0.08782192946595158, 'n_estimators': 455, 'max_depth': 19}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110546 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069114 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]

[I 2025-08-29 04:54:35,203] Trial 47 finished with value: 0.8368050765622844 and parameters: {'learning_rate': 0.07938445534209333, 'n_estimators': 483, 'max_depth': 13}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.151043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070

[I 2025-08-29 04:56:06,960] Trial 48 finished with value: 0.8075596634018485 and parameters: {'learning_rate': 0.0999852618280191, 'n_estimators': 80, 'max_depth': 18}. Best is trial 32 with value: 0.8413574286108428.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094379 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098881 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo

[I 2025-08-29 04:58:07,556] Trial 49 finished with value: 0.8232859704786867 and parameters: {'learning_rate': 0.08480752988325226, 'n_estimators': 374, 'max_depth': 7}. Best is trial 32 with value: 0.8413574286108428.


In [63]:
model = lgb.LGBMClassifier(

    objective='multiclass',
    num_class=3,
    metric="multi_logloss",
    is_unbalance= True,
    class_weight= "balanced",
    reg_alpha= 0.1,  # L1 regularization
    reg_lambda= 0.1,  # L2 regularization
    learning_rate= 0.06631684007924099,
    max_depth= 18,
    n_estimators=449
)

In [64]:
# Fit the model on the resampled training data
model.fit(X_train_combined, y_train_cleaned)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.185734 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,18
,learning_rate,0.06631684007924099
,n_estimators,449
,subsample_for_bin,200000
,objective,'multiclass'
,class_weight,'balanced'
,min_split_gain,0.0
,min_child_weight,0.001


In [65]:
# Predict on the test set
from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test_combined)
accuracy = accuracy_score(y_test_cleaned, y_pred)
accuracy

0.8391502276176024

In [None]:
from sklearn.metrics import classification_report
# Generate classification report
report = classification_report(y_test_cleaned, y_pred)
print(report)

              precision    recall  f1-score   support

          -1       0.75      0.74      0.74      1657
           0       0.83      0.94      0.88      2393
           1       0.90      0.81      0.86      3199

    accuracy                           0.84      7249
   macro avg       0.83      0.83      0.83      7249
weighted avg       0.84      0.84      0.84      7249

