In [37]:
import mlflow
mlflow.set_tracking_uri("http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/")

In [38]:
mlflow.set_experiment("Exp 8 - improving lightGBM")

<Experiment: artifact_location='s3://ms-mlflow-bucket/339975072325781963', creation_time=1751110981025, experiment_id='339975072325781963', last_update_time=1751110981025, lifecycle_stage='active', name='Exp 8 - improving lightGBM', tags={}>

In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

In [40]:
dataset = pd.read_csv('data_preprocessed.csv')

In [41]:
cleaned_dataset = dataset.dropna().drop_duplicates()

In [42]:
# Separate features and target
X_cleaned = cleaned_dataset['clean_comment']
y_cleaned = cleaned_dataset['category']

In [43]:
# Split the cleaned data into train and test sets (80-20 split)
X_train_cleaned, X_test_cleaned, y_train_cleaned, y_test_cleaned = train_test_split(X_cleaned, y_cleaned, test_size=0.2, random_state=42)

In [44]:
# Apply TfidfVectorizer with trigram setting and max_features=1000
tfidf_cleaned = TfidfVectorizer(ngram_range=(1, 3), max_features=2000)

In [45]:
# Fit the vectorizer on the training data and transform both train and test sets
X_train_tfidf_cleaned = tfidf_cleaned.fit_transform(X_train_cleaned)
X_test_tfidf_cleaned = tfidf_cleaned.transform(X_test_cleaned)

In [46]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, accuracy_score
import optuna

In [47]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_class_weight_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy =  cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy




In [None]:
# Function to optimize LightGBM hyperparameters
def objective(trial):
    # Define hyperparameters to be tuned
    param = {
        "objective": "multiclass",
        "num_class": 3,  # Assuming 3 categories (-1, 0, 1)
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 1e-1),
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "max_depth": trial.suggest_int("max_depth", 3, 20),
        "metric": "multi_logloss",
        "is_unbalance": True,
        "class_weight": "balanced",
    }

    # Define the LightGBM model with the trial parameters
    model = lgb.LGBMClassifier(**param)

    # Perform cross-validation
    accuracy = log_mlflow("LightGBM", model, X_train_tfidf_cleaned, X_test_tfidf_cleaned, y_train_cleaned, y_test_cleaned, param, trial.number)

    # Return the average score across folds
    return accuracy

In [49]:
# Create an Optuna study to optimize the hyperparameters
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-06-28 18:07:30,138] A new study created in memory with name: no-name-8bcac5f3-06a8-434c-b06d-54d288be479a


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.109164 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.074472 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.067853 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054912 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_0_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/a30edfe6f6bf46a98cfde53a12c9d828
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:09:27,596] Trial 0 finished with value: 0.8271711324971536 and parameters: {'learning_rate': 0.08962976706654908, 'n_estimators': 374, 'max_depth': 11}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099626 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062880 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087439 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065964 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_1_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/da2dffb60b7d4b46bed1046e77352914
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:11:00,069] Trial 1 finished with value: 0.7510864013434606 and parameters: {'learning_rate': 0.07234392714673286, 'n_estimators': 72, 'max_depth': 9}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100506 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073263 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.080933 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_2_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/c4d3ddd4ec3845279a0bde47b836bd6a
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:12:37,720] Trial 2 finished with value: 0.7579844706611896 and parameters: {'learning_rate': 0.0232667062799753, 'n_estimators': 165, 'max_depth': 14}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.104435 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062983 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066974 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_3_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/92d436a2ae344630a2d4849b4f2aeaae
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:14:02,919] Trial 3 finished with value: 0.7899564637200521 and parameters: {'learning_rate': 0.06266292622246189, 'n_estimators': 135, 'max_depth': 12}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112622 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066348 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065750 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077996 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_4_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/1a1c1cdca2f34c9cbbb36f32512c7cb5
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:15:21,481] Trial 4 finished with value: 0.7251844167868632 and parameters: {'learning_rate': 0.038834489234110446, 'n_estimators': 133, 'max_depth': 6}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.097224 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072374 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077554 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085829 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_5_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/7acb46a605934deda95144b3b08a0283
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:16:35,320] Trial 5 finished with value: 0.761605906019421 and parameters: {'learning_rate': 0.07880264029460061, 'n_estimators': 52, 'max_depth': 14}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101714 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072476 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088719 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_6_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/e19197cc83db4b3896d14b0d2300436e
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:17:52,110] Trial 6 finished with value: 0.6333376444214519 and parameters: {'learning_rate': 0.0029046105814595474, 'n_estimators': 72, 'max_depth': 11}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100446 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.080576 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062285 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073446 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_7_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/c2b2a5eebe654d34b57c439246e8d573
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:20:12,491] Trial 7 finished with value: 0.8260329909587062 and parameters: {'learning_rate': 0.07433126216258033, 'n_estimators': 400, 'max_depth': 11}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.107257 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071343 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061768 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087328 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_8_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/6de2a89e082e49679ba7465affdc855e
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:21:44,314] Trial 8 finished with value: 0.7788852468283524 and parameters: {'learning_rate': 0.05022027734074156, 'n_estimators': 162, 'max_depth': 10}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.131504 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073371 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.081049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.192838 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_9_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/73f4be451f3c4cd8a21a88f4355de7d4
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:23:10,229] Trial 9 finished with value: 0.7718839148163248 and parameters: {'learning_rate': 0.057879328522191666, 'n_estimators': 70, 'max_depth': 18}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.107613 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076890 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068516 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074111 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_10_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/3e32d2db7fbe4ddf88e291b5d1072f2c
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:24:32,806] Trial 10 finished with value: 0.8060976808344439 and parameters: {'learning_rate': 0.09899068264711176, 'n_estimators': 486, 'max_depth': 3}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.120173 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.085795 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072999 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_11_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/f85bce24269a41a3ae0dfaca6c38c2db
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:27:16,350] Trial 11 finished with value: 0.8256536615984258 and parameters: {'learning_rate': 0.09604002231116768, 'n_estimators': 382, 'max_depth': 18}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.101631 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079608 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073753 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.082119 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_12_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/9ca2ca2918e14cb4b3be0cb99c9942c6
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:29:00,711] Trial 12 finished with value: 0.8192729335612786 and parameters: {'learning_rate': 0.08404746241011077, 'n_estimators': 335, 'max_depth': 8}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.157449 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.082308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.076831 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071032 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_13_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/eed6d0646c8244ce824a3e49bf9ab064
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:31:08,923] Trial 13 finished with value: 0.826515872084591 and parameters: {'learning_rate': 0.08451840701529735, 'n_estimators': 422, 'max_depth': 14}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.181215 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067470 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071937 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075952 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_14_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/30c96f78d7714596892539fff1043df3
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:33:43,284] Trial 14 finished with value: 0.8251707769037594 and parameters: {'learning_rate': 0.08800779159231314, 'n_estimators': 475, 'max_depth': 15}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.141460 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.080106 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076236 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075221 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_15_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/9abc1eec30204fdd8f0efa9e3d93d437
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:35:39,815] Trial 15 finished with value: 0.8175828977992324 and parameters: {'learning_rate': 0.0403771256364729, 'n_estimators': 255, 'max_depth': 20}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.102267 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.090667 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.061781 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073252 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_16_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/e9358285ba8f4f4b80e9d9cc942954fa
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:37:23,054] Trial 16 finished with value: 0.8260675082139076 and parameters: {'learning_rate': 0.06897710943840177, 'n_estimators': 286, 'max_depth': 16}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.089327 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066055 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079218 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066905 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_17_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/1f135aad6b7d4eb98c15547acc1d5753
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:38:54,060] Trial 17 finished with value: 0.8257570527688606 and parameters: {'learning_rate': 0.09958566056381585, 'n_estimators': 425, 'max_depth': 7}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093925 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071475 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.059614 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_18_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/783cd4acba6a49d7be0a8e13fd930de6
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:40:33,907] Trial 18 finished with value: 0.8262399838576874 and parameters: {'learning_rate': 0.08529487191019267, 'n_estimators': 339, 'max_depth': 12}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.101746 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.061835 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064499 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074034 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_19_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/6e629adf1a3d4a6f857725d2bb2af833
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:41:53,489] Trial 19 finished with value: 0.7868178845062347 and parameters: {'learning_rate': 0.05354855838344853, 'n_estimators': 439, 'max_depth': 4}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.081079 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059803 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.070533 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_20_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/a0364d746207479b87d33a248474ef17
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:43:24,279] Trial 20 finished with value: 0.8255156425405614 and parameters: {'learning_rate': 0.09130130855115973, 'n_estimators': 265, 'max_depth': 13}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.046720 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062485 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_21_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/e797937430ae4383a682fda5c65bfdaf
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:45:08,801] Trial 21 finished with value: 0.8269297508191067 and parameters: {'learning_rate': 0.08112845220351532, 'n_estimators': 340, 'max_depth': 16}. Best is trial 0 with value: 0.8271711324971536.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.088472 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064371 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072861 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_22_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/b20866c5703b4213b299e02d917dadaa
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:46:56,714] Trial 22 finished with value: 0.8282058543149139 and parameters: {'learning_rate': 0.06565656693749489, 'n_estimators': 345, 'max_depth': 17}. Best is trial 22 with value: 0.8282058543149139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092252 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061311 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_23_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/d34f1544bc924988998f5bdafcff5354
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:48:44,569] Trial 23 finished with value: 0.8276195498995603 and parameters: {'learning_rate': 0.0651443259538295, 'n_estimators': 344, 'max_depth': 17}. Best is trial 22 with value: 0.8282058543149139.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.082785 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069252 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060175 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_24_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/cf7002beef55444fb7aa96cf584c88e1
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:50:32,269] Trial 24 finished with value: 0.8282748317248121 and parameters: {'learning_rate': 0.06664610736235796, 'n_estimators': 301, 'max_depth': 20}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.083810 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068964 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067989 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073998 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_25_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/b3947878733f48c99eec2dc1066b2648
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:52:07,488] Trial 25 finished with value: 0.82506721443181 and parameters: {'learning_rate': 0.06317437231007397, 'n_estimators': 223, 'max_depth': 20}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.078033 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061030 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057220 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059535 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_26_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/fda529f76e5a4cdbb7880c94ad089bfc
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:53:52,099] Trial 26 finished with value: 0.820514551920918 and parameters: {'learning_rate': 0.04235005112709441, 'n_estimators': 309, 'max_depth': 18}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.096849 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068986 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062300 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_27_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/6e7ebc6290704661b5f3d0c84ec15eb2
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:55:26,340] Trial 27 finished with value: 0.8247568267936125 and parameters: {'learning_rate': 0.06492369049598361, 'n_estimators': 230, 'max_depth': 19}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084358 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072657 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072278 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_28_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/2dd045d09b484c1aa7c5800f2f8b762d
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:57:11,735] Trial 28 finished with value: 0.8226529337097398 and parameters: {'learning_rate': 0.048295843810812276, 'n_estimators': 306, 'max_depth': 17}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.104234 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.070490 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.091967 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.083322 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_29_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/9d91c9fee8104c5c9ae872c47a1c73cc
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 18:59:30,335] Trial 29 finished with value: 0.8186521154595048 and parameters: {'learning_rate': 0.029296699253551085, 'n_estimators': 364, 'max_depth': 20}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.109240 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.142332 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.112563 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_30_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/5417cd40ecbf4270b28114a0161926cb
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:02:04,538] Trial 30 finished with value: 0.8270676735198693 and parameters: {'learning_rate': 0.05633213523308705, 'n_estimators': 365, 'max_depth': 16}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098292 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088768 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077391 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.080146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_31_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/67fbc08abb6f48f0b5909dfa638922e0
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:03:40,669] Trial 31 finished with value: 0.815099646804827 and parameters: {'learning_rate': 0.07233743343763442, 'n_estimators': 301, 'max_depth': 9}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.123617 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073703 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100699 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.082251 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_32_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/9da5f28ed4e24307a129b4a74682dd9d
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:06:03,014] Trial 32 finished with value: 0.8264813833796417 and parameters: {'learning_rate': 0.07563339113620039, 'n_estimators': 398, 'max_depth': 19}. Best is trial 24 with value: 0.8282748317248121.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.107171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069183 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073895 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071994 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_33_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/be48c69064f0493ea7560d28ea0d4d0a
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:08:05,104] Trial 33 finished with value: 0.8285507556395313 and parameters: {'learning_rate': 0.0679484162034985, 'n_estimators': 333, 'max_depth': 17}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100213 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072977 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077240 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072250 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_34_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/09b34c629fac4ffd852a503971089404
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:09:48,873] Trial 34 finished with value: 0.8232737839305475 and parameters: {'learning_rate': 0.06740623323849047, 'n_estimators': 230, 'max_depth': 17}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.129258 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.117121 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069826 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074340 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_35_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/85a2f6bbc68648f8af2bcab62b3e7e1f
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:12:14,787] Trial 35 finished with value: 0.8266537519599749 and parameters: {'learning_rate': 0.05745960881628599, 'n_estimators': 332, 'max_depth': 17}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.106994 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077721 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072472 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.090337 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_36_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/5bfa5aa3e8af494d8baddcc67366a022
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:14:08,933] Trial 36 finished with value: 0.8250672429820624 and parameters: {'learning_rate': 0.06987963804996439, 'n_estimators': 282, 'max_depth': 15}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.118432 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079838 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066178 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069056 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_37_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/c6069a8fdb7c4c80a39fe8c4bea09296
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:16:34,960] Trial 37 finished with value: 0.8282403216071738 and parameters: {'learning_rate': 0.06020859537934641, 'n_estimators': 358, 'max_depth': 19}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.111940 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066775 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067386 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068127 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_38_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/ee9f2fab7081428b9ee08379d1c1d7bd
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:19:09,193] Trial 38 finished with value: 0.8275505974711329 and parameters: {'learning_rate': 0.061458129411875326, 'n_estimators': 452, 'max_depth': 19}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.115775 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076561 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064831 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073073 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_39_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/5f1e86105333445280f4db13da909e56
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:21:38,926] Trial 39 finished with value: 0.8271366616361121 and parameters: {'learning_rate': 0.04738232971215263, 'n_estimators': 379, 'max_depth': 19}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.114113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.078827 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.105772 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.081232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_40_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/09b42343bea04cf39bc76f70acdd73f8
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:23:25,558] Trial 40 finished with value: 0.7866109665516662 and parameters: {'learning_rate': 0.03180593660488657, 'n_estimators': 200, 'max_depth': 15}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098303 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079335 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.106674 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074441 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_41_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/2d590cd164cf4c819b6ec832d79b523a
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:25:39,294] Trial 41 finished with value: 0.8266193239243123 and parameters: {'learning_rate': 0.07753317271292204, 'n_estimators': 357, 'max_depth': 18}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094162 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.086967 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.067812 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_42_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/3fd7dcc866d4406bb6bdabc58cc25b8a
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:27:40,503] Trial 42 finished with value: 0.8269642038362403 and parameters: {'learning_rate': 0.0617980079707027, 'n_estimators': 319, 'max_depth': 17}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.119207 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.156189 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060538 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_43_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/a2607d6ed3b94fc19c6ec69123405bf6
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:30:19,551] Trial 43 finished with value: 0.8285507413644052 and parameters: {'learning_rate': 0.05346529918587592, 'n_estimators': 397, 'max_depth': 20}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.128300 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.094777 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.162024 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073670 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_44_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/34c9d0c4e4cb411680cf724f3f970375
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:33:38,247] Trial 44 finished with value: 0.7733324796807853 and parameters: {'learning_rate': 0.009642155776005669, 'n_estimators': 401, 'max_depth': 20}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.142666 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076654 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.084194 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_45_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/851636ee5e9d4290af49a47a0b30abd0
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:36:12,294] Trial 45 finished with value: 0.8276885201718954 and parameters: {'learning_rate': 0.05243618912563732, 'n_estimators': 404, 'max_depth': 19}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.139529 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.108006 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.149715 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_46_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/56a88322897e40b7bae902eb88465010
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:38:46,673] Trial 46 finished with value: 0.8263088898919545 and parameters: {'learning_rate': 0.04617684798613082, 'n_estimators': 383, 'max_depth': 18}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.120248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.074969 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074961 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069167 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_47_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/3c65e72a92284e86a2f8be1d6ce5cec9
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:40:55,504] Trial 47 finished with value: 0.8276540064854753 and parameters: {'learning_rate': 0.058893503827966445, 'n_estimators': 320, 'max_depth': 20}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.160811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.092220 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062752 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.151692 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_48_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/d9c3e22b39544c8aadc8ed0e12f61197
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:43:01,998] Trial 48 finished with value: 0.812409399342659 and parameters: {'learning_rate': 0.035293804651865573, 'n_estimators': 259, 'max_depth': 19}. Best is trial 33 with value: 0.8285507556395313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094570 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079455 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 72695
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1914
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73130
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073593 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72472
[LightGBM] [Info] Number of data points in the train set: 19330, number of used features: 1915
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_49_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963/runs/ee5010d61c224e3dbc97a69d910dc896
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/339975072325781963


[I 2025-06-28 19:45:18,310] Trial 49 finished with value: 0.8282058578836954 and parameters: {'learning_rate': 0.05437135485116905, 'n_estimators': 462, 'max_depth': 18}. Best is trial 33 with value: 0.8285507556395313.


In [50]:
# Extract the best hyperparameters
best_params = study.best_params
best_params

{'learning_rate': 0.0679484162034985, 'n_estimators': 333, 'max_depth': 17}

In [60]:
best_model = lgb.LGBMClassifier(

    objective='multiclass',
    num_class=3,
    metric="multi_logloss",
    is_unbalance= True,
    class_weight= "balanced",
    reg_alpha= 0.1,  # L1 regularization
    reg_lambda= 0.1,  # L2 regularization
    learning_rate= 0.0679484162034985,
    max_depth= 17,
    n_estimators=333
)

In [61]:
# Fit the model on the resampled training data
best_model.fit(X_train_tfidf_cleaned, y_train_cleaned)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.146897 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 102919
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


In [62]:
# Predict on the train set
y_train_pred = best_model.predict(X_train_tfidf_cleaned)



In [63]:
# Calculate accuracy on the test set
accuracy_train = accuracy_score(y_train_cleaned, y_train_pred)
accuracy_train

0.8915982617093192

In [64]:
# Generate classification report
report_train = classification_report(y_train_cleaned, y_train_pred)
print(report_train)

              precision    recall  f1-score   support

          -1       0.87      0.84      0.85      6543
           0       0.83      0.98      0.90      9984
           1       0.97      0.85      0.91     12467

    accuracy                           0.89     28994
   macro avg       0.89      0.89      0.89     28994
weighted avg       0.90      0.89      0.89     28994



In [65]:
# Predict on the test set
y_pred = best_model.predict(X_test_tfidf_cleaned)



In [66]:
# Calculate accuracy on the test set
accuracy = accuracy_score(y_test_cleaned, y_pred)
accuracy

0.8379086770589047

In [67]:
# Generate classification report
report = classification_report(y_test_cleaned, y_pred)
print(report)

              precision    recall  f1-score   support

          -1       0.76      0.74      0.75      1657
           0       0.81      0.96      0.88      2393
           1       0.91      0.80      0.85      3199

    accuracy                           0.84      7249
   macro avg       0.83      0.83      0.83      7249
weighted avg       0.84      0.84      0.84      7249



In [59]:
import re
import numpy as np

# Assuming you have pre-trained tfidf_vectorizer and lgbm_model loaded
# tfidf_vectorizer: Your trained TF-IDF vectorizer
# lgbm_model: Your trained LightGBM model

# Function to clean and preprocess a YouTube comment (same as used during training)
def preprocess_comment(comment):
    # Lowercasing
    comment = comment.lower()

    # Remove special characters, URLs, punctuation, and extra spaces
    comment = re.sub(r"http\S+|www\S+|https\S+", '', comment, flags=re.MULTILINE)  # Remove URLs
    comment = re.sub(r'\W', ' ', comment)  # Remove special characters
    comment = re.sub(r'\s+', ' ', comment).strip()  # Remove extra spaces and newlines

    return comment

# Prediction function
def predict_sentiment(comment, tfidf_vectorizer, lgbm_model):
    # Step 1: Preprocess the YouTube comment
    cleaned_comment = preprocess_comment(comment)

    # Step 2: Transform the comment using the trained TF-IDF vectorizer
    comment_tfidf = tfidf_vectorizer.transform([cleaned_comment])

    # Step 3: Use the trained LightGBM model to predict the sentiment
    prediction = lgbm_model.predict(comment_tfidf)
    prediction_proba = lgbm_model.predict_proba(comment_tfidf)

    # Step 4: Get the predicted sentiment (label) and probability
    sentiment_class = np.argmax(prediction_proba)
    sentiment_proba = np.max(prediction_proba)

    # Step 5: Return the sentiment label and confidence
    return {
        'sentiment_class': int(prediction[0]),  # -1, 0, or 1 depending on your labels
        'confidence': sentiment_proba
    }

# Example usage:
comment1 = "I absolutely hate this video!"
comment2 = "The explanations were confusing and the video quality was poor."
comment3 = "I didn’t learn anything useful. Really disappointed."
comment4 = "Wow, the explanation was so clear and helpful. Definitely subscribing!"
comment5 = "This is the worst video I’ve seen on this topic, very misleading"
comment6 = "Not much to say about this, just a standard video."
comment7 = "The video is okay, but I expected more depth in the content."
comment8 = "Superb content! Mazaa aa gaya dekh ke. Best video on this topic!"
comment9 = "Poor video quality aur explanation bhi weak tha."
comment10 = "Yeh video theek tha, but I was expecting more depth."
result = predict_sentiment(comment10, tfidf_cleaned, best_model)
print(f"Predicted Sentiment: {result['sentiment_class']}, Confidence: {result['confidence']}")

Predicted Sentiment: 0, Confidence: 0.6834945849257843


