In [78]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import spacy
import optuna
from sklearn.model_selection import cross_val_score

In [62]:
import mlflow
mlflow.set_tracking_uri("http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/")

In [63]:
mlflow.set_experiment("Exp 9 - lightGBM HPT on Custom Features")

<Experiment: artifact_location='s3://ms-mlflow-bucket/334855376457357198', creation_time=1751119394395, experiment_id='334855376457357198', last_update_time=1751119394395, lifecycle_stage='active', name='Exp 9 - lightGBM HPT on Custom Features', tags={}>

In [64]:
# Load dataset
dataset = pd.read_csv('data_preprocessed.csv')

# Drop rows with NaN values in 'clean_comment'
cleaned_dataset = dataset.dropna().drop_duplicates()

In [65]:
# Separate features and target
X_cleaned = cleaned_dataset['clean_comment']
y_cleaned = cleaned_dataset['category']

# Split the cleaned data into train and test sets (80-20 split)
X_train_cleaned, X_test_cleaned, y_train_cleaned, y_test_cleaned = train_test_split(X_cleaned, y_cleaned, test_size=0.2, random_state=42)


In [66]:
# Load spacy language model for POS tagging
nlp = spacy.load('en_core_web_sm')

In [67]:
# All POS tags we care about
ALL_POS_TAGS = ['NOUN', 'VERB', 'ADJ', 'ADV', 'PRON', 'PROPN', 'INTJ', 'NUM', 'DET', 'ADP', 'CCONJ', 'PART', 'SCONJ', 'SYM', 'X', 'PUNCT', 'SPACE']

def extract_custom_features(text):
    doc = nlp(text)
    word_list = [token.text for token in doc]

    comment_length = len(text)
    word_count = len(word_list)
    avg_word_length = sum(len(word) for word in word_list) / word_count if word_count > 0 else 0
    unique_word_count = len(set(word_list))
    lexical_diversity = unique_word_count / word_count if word_count > 0 else 0

    pos_tags = [token.pos_ for token in doc]
    pos_proportion = {tag: pos_tags.count(tag) / word_count for tag in ALL_POS_TAGS}
    
    return {
        'comment_length': comment_length,
        'word_count': word_count,
        'avg_word_length': avg_word_length,
        'unique_word_count': unique_word_count,
        'lexical_diversity': lexical_diversity,
        **pos_proportion
    }


In [68]:
# Apply the custom feature extraction
train_custom_features = pd.DataFrame([extract_custom_features(text) for text in X_train_cleaned])
test_custom_features = pd.DataFrame([extract_custom_features(text) for text in X_test_cleaned])

In [69]:
train_custom_features.shape, test_custom_features.shape

((28994, 22), (7249, 22))

In [70]:
# Replace NaN values in POS tag proportions with 0
train_custom_features.fillna(0, inplace=True)
test_custom_features.fillna(0, inplace=True)

In [71]:
# Apply TfidfVectorizer with trigram setting and max_features=1000
tfidf = TfidfVectorizer(ngram_range=(1, 3), max_features=2000)
X_train_tfidf = tfidf.fit_transform(X_train_cleaned)
X_test_tfidf = tfidf.transform(X_test_cleaned)

In [72]:
# Convert TF-IDF to DataFrame
X_train_tfidf_df = pd.DataFrame(X_train_tfidf.toarray(), columns=tfidf.get_feature_names_out())
X_test_tfidf_df = pd.DataFrame(X_test_tfidf.toarray(), columns=tfidf.get_feature_names_out())

In [73]:
# Combine TF-IDF and custom features
X_train_combined = pd.concat([X_train_tfidf_df.reset_index(drop=True), train_custom_features.reset_index(drop=True)], axis=1)
X_test_combined = pd.concat([X_test_tfidf_df.reset_index(drop=True), test_custom_features.reset_index(drop=True)], axis=1)

In [74]:
X_train_combined.shape, X_test_combined.shape

((28994, 2022), (7249, 2022))

In [79]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_class_weight_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy =  cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy




In [80]:
# Function to optimize LightGBM hyperparameters
def objective(trial):
    # Define hyperparameters to be tuned
    param = {
        "objective": "multiclass",
        "num_class": 3,  # Assuming 3 categories (-1, 0, 1)
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 1e-1),
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "max_depth": trial.suggest_int("max_depth", 3, 20),
        "metric": "multi_logloss",
        "is_unbalance": True,
        "class_weight": "balanced",
    }

    # Define the LightGBM model with the trial parameters
    model = lgb.LGBMClassifier(**param)

    # Perform cross-validation
    accuracy = log_mlflow("LightGBM", model, X_train_combined, X_test_combined, y_train_cleaned, y_test_cleaned, param, trial.number)

    # Return the average score across folds
    return accuracy

In [81]:
# Create an Optuna study to optimize the hyperparameters
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-06-28 19:46:58,510] A new study created in memory with name: no-name-cb8a593b-430e-4c6f-9f90-364c97dd8796


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.115705 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068642 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_0_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/37c1d69926884203a7863fdc013d84e0
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 19:49:36,046] Trial 0 finished with value: 0.807511703462232 and parameters: {'learning_rate': 0.029870459633479507, 'n_estimators': 461, 'max_depth': 11}. Best is trial 0 with value: 0.807511703462232.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067210 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_1_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/2a7e482076e4489d94129fd67be55b90
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 19:51:20,865] Trial 1 finished with value: 0.8100295716377225 and parameters: {'learning_rate': 0.06607609616380879, 'n_estimators': 397, 'max_depth': 6}. Best is trial 1 with value: 0.8100295716377225.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.109641 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.070045 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_2_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/92fc0b2ca91c4544b21e9bacb09bccc8
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 19:54:02,031] Trial 2 finished with value: 0.8256190372797776 and parameters: {'learning_rate': 0.05171328612527286, 'n_estimators': 500, 'max_depth': 18}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090353 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.063829 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_3_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/a6ae59ba025442f7b37c26b7f47d2806
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 19:55:34,177] Trial 3 finished with value: 0.8132026859506492 and parameters: {'learning_rate': 0.09462208771353683, 'n_estimators': 451, 'max_depth': 4}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.095867 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.067847 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071



🏃 View run Trial_4_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/1444fbb018c0480a8464b9a61591e59d
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 19:57:16,133] Trial 4 finished with value: 0.6761399294980066 and parameters: {'learning_rate': 0.008473527812055936, 'n_estimators': 320, 'max_depth': 5}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088184 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068374 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_5_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/5dacdf52cb564403a44eaf56db0d0e6d
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 19:58:42,721] Trial 5 finished with value: 0.7418775638126692 and parameters: {'learning_rate': 0.03314587138169881, 'n_estimators': 197, 'max_depth': 6}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075



🏃 View run Trial_6_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/d48e1e71ac4442dc93606d9b6653fbf6
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:00:50,065] Trial 6 finished with value: 0.7297025584737721 and parameters: {'learning_rate': 0.009997383094082372, 'n_estimators': 392, 'max_depth': 8}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.104629 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072355 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_7_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/ec61ea83110e44b3bc96d0ff5ba3a83c
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:02:18,919] Trial 7 finished with value: 0.7749188815952511 and parameters: {'learning_rate': 0.04810557290467604, 'n_estimators': 376, 'max_depth': 4}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094539 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061819 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_8_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/155bdde30d934029ab858b125d1fa476
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:03:58,640] Trial 8 finished with value: 0.8098226358392462 and parameters: {'learning_rate': 0.06408502636082439, 'n_estimators': 356, 'max_depth': 7}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.120887 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069254 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075



🏃 View run Trial_9_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/8329e234d1f249748abbb466795534ee
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:05:17,989] Trial 9 finished with value: 0.7136648502996064 and parameters: {'learning_rate': 0.03010514970224895, 'n_estimators': 52, 'max_depth': 16}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.114883 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.070



🏃 View run Trial_10_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/2034c5f7e7de45ff8f4363b293036cf0
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:07:09,679] Trial 10 finished with value: 0.8254121086188645 and parameters: {'learning_rate': 0.09548741115420503, 'n_estimators': 227, 'max_depth': 20}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.113006 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.067978 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_11_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/258d5ef83a194c128aa968f1f91713f4
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:09:00,832] Trial 11 finished with value: 0.8255155283395516 and parameters: {'learning_rate': 0.08935556114508772, 'n_estimators': 217, 'max_depth': 20}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.095383 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075644 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_12_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/209f8e476468417e874b94259b45b613
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:10:34,434] Trial 12 finished with value: 0.8169965398521554 and parameters: {'learning_rate': 0.0762829923293241, 'n_estimators': 144, 'max_depth': 20}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.113436 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073997 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]



🏃 View run Trial_13_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/2644a6b12136443aaebccfeb1bc16195
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:12:30,873] Trial 13 finished with value: 0.8245843368747067 and parameters: {'learning_rate': 0.0802702848821643, 'n_estimators': 288, 'max_depth': 16}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077917 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068



🏃 View run Trial_14_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/ac4e573427bc4ee080a8533eac9580e5
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:15:07,032] Trial 14 finished with value: 0.825446536654527 and parameters: {'learning_rate': 0.04774532887156113, 'n_estimators': 498, 'max_depth': 17}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103416 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.065835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_15_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/01d00176f88e41c79c7232eab447a018
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:16:38,286] Trial 15 finished with value: 0.8010621050786332 and parameters: {'learning_rate': 0.08180615833674175, 'n_estimators': 124, 'max_depth': 13}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100441 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070584 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.097



🏃 View run Trial_16_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/92fde01f8eca4ef1b7c0ea9184a1fa3c
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:18:30,987] Trial 16 finished with value: 0.8208249574030234 and parameters: {'learning_rate': 0.0608046846127923, 'n_estimators': 235, 'max_depth': 18}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.097100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.081212 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_17_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/4dfd8a6da7f844818ca0ea22c44d3ecc
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:20:11,492] Trial 17 finished with value: 0.7367040332370619 and parameters: {'learning_rate': 0.020985284135314065, 'n_estimators': 160, 'max_depth': 11}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099916 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073717 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_18_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/653b4624e4664bb8ac62bfa16cd837e3
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:22:06,489] Trial 18 finished with value: 0.8126507667455796 and parameters: {'learning_rate': 0.04485164776089485, 'n_estimators': 282, 'max_depth': 14}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.120481 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087058 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_19_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/57724a5c61c14c25a573cab9ab40942e
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:23:26,960] Trial 19 finished with value: 0.7752292228392883 and parameters: {'learning_rate': 0.0873024227372025, 'n_estimators': 50, 'max_depth': 19}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.115921 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097201 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]



🏃 View run Trial_20_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/4f32f2ef3415454f971e9fa7af65b657
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:25:33,651] Trial 20 finished with value: 0.8241704046084676 and parameters: {'learning_rate': 0.07107589920232903, 'n_estimators': 328, 'max_depth': 14}. Best is trial 2 with value: 0.8256190372797776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.109374 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061265 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079



🏃 View run Trial_21_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/ddfe86c1bf7b478b816b9c5acc8b1936
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:28:14,411] Trial 21 finished with value: 0.8259294249179753 and parameters: {'learning_rate': 0.05677122487711997, 'n_estimators': 497, 'max_depth': 17}. Best is trial 21 with value: 0.8259294249179753.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.108516 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071512 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_22_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/8cb35d445a8747e2be138bbec4fce326
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:30:51,089] Trial 22 finished with value: 0.8262742976923403 and parameters: {'learning_rate': 0.05542259640327083, 'n_estimators': 440, 'max_depth': 18}. Best is trial 22 with value: 0.8262742976923403.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.099679 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072



🏃 View run Trial_23_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/559c7bf95a074b208385f4cb10ec4bf6
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:33:26,085] Trial 23 finished with value: 0.826308861341702 and parameters: {'learning_rate': 0.058573389854772395, 'n_estimators': 491, 'max_depth': 17}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.104229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074426 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062



🏃 View run Trial_24_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/24c24cf6d3ea445ebc37da09f8fa1560
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:35:38,894] Trial 24 finished with value: 0.8252396436814294 and parameters: {'learning_rate': 0.05521671314671243, 'n_estimators': 432, 'max_depth': 15}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100250 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068662 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064



🏃 View run Trial_25_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/378d33affa204fbbbfeb9ce5ca325e35
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:37:59,310] Trial 25 finished with value: 0.822376974107205 and parameters: {'learning_rate': 0.03900742000379429, 'n_estimators': 423, 'max_depth': 17}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.114817 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.060303 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_26_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/b1e9df08e36e4d00b697f0e5d558ea32
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:39:53,767] Trial 26 finished with value: 0.8213077742908403 and parameters: {'learning_rate': 0.058243201508054114, 'n_estimators': 469, 'max_depth': 9}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.082113 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053075 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]



🏃 View run Trial_27_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/c0c807630a4e447eba43a786283f08a2
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:41:51,791] Trial 27 finished with value: 0.8240324854764866 and parameters: {'learning_rate': 0.0682782790093017, 'n_estimators': 423, 'max_depth': 12}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.089090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.063704 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_28_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/ec70206ccf314ea8a6ffc9e596f65d42
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:44:12,589] Trial 28 finished with value: 0.8255845021806683 and parameters: {'learning_rate': 0.04090726928158771, 'n_estimators': 479, 'max_depth': 18}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064726 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073



🏃 View run Trial_29_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/09e27b1fe0794a208fe5066a3514384c
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:46:17,020] Trial 29 finished with value: 0.7933019037879333 and parameters: {'learning_rate': 0.021909974402878445, 'n_estimators': 455, 'max_depth': 11}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.080874 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056186 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053



🏃 View run Trial_30_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/1ae09de0c4ea4497989892b0c1c539c7
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:48:25,629] Trial 30 finished with value: 0.824929256043232 and parameters: {'learning_rate': 0.07227792030570288, 'n_estimators': 442, 'max_depth': 15}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.086711 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_31_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/3bc02430095c4d43b58c8e4f5e60ce63
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:50:47,668] Trial 31 finished with value: 0.8258950432764727 and parameters: {'learning_rate': 0.055010097887390134, 'n_estimators': 486, 'max_depth': 18}. Best is trial 23 with value: 0.826308861341702.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069981 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079613 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_32_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/4cedfbca5b7b48928cc8330a27745aa5
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:53:18,444] Trial 32 finished with value: 0.8263778137701294 and parameters: {'learning_rate': 0.05502521345236491, 'n_estimators': 484, 'max_depth': 17}. Best is trial 32 with value: 0.8263778137701294.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.080277 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074160 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098



🏃 View run Trial_33_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/1e9421450ccc4318964e067dc375bd07
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:55:56,587] Trial 33 finished with value: 0.8247912512604937 and parameters: {'learning_rate': 0.06339158714369302, 'n_estimators': 404, 'max_depth': 16}. Best is trial 32 with value: 0.8263778137701294.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.113680 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057462 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066



🏃 View run Trial_34_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/3f68fa1597dc458082beb9058c7f632c
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 20:58:42,484] Trial 34 finished with value: 0.8261363357349807 and parameters: {'learning_rate': 0.05167132446888176, 'n_estimators': 500, 'max_depth': 19}. Best is trial 32 with value: 0.8263778137701294.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.151608 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.096792 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_35_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/614d04a75e7744e5a3d08a1b15cedaa0
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:01:58,513] Trial 35 finished with value: 0.824687802989554 and parameters: {'learning_rate': 0.03778027634978215, 'n_estimators': 463, 'max_depth': 19}. Best is trial 32 with value: 0.8263778137701294.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.101329 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.082512 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068



🏃 View run Trial_36_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/b33479941f784312b7d4e8bdd4f8ee99
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:04:32,772] Trial 36 finished with value: 0.8266536770155622 and parameters: {'learning_rate': 0.05083620093854858, 'n_estimators': 460, 'max_depth': 19}. Best is trial 36 with value: 0.8266536770155622.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101596 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_37_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/6c57c19aeca64a55ac6c711ab06fb973
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:06:49,530] Trial 37 finished with value: 0.8197212260563308 and parameters: {'learning_rate': 0.043220414523300395, 'n_estimators': 362, 'max_depth': 15}. Best is trial 36 with value: 0.8266536770155622.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.112941 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069918 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060



🏃 View run Trial_38_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/eeca96ab209549208379604fba335f05
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:09:17,765] Trial 38 finished with value: 0.8258259266840938 and parameters: {'learning_rate': 0.05078241475189429, 'n_estimators': 408, 'max_depth': 19}. Best is trial 36 with value: 0.8266536770155622.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.091539 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.092816 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064



🏃 View run Trial_39_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/9089c84afd644a7898f83abfb4049f03
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:12:16,306] Trial 39 finished with value: 0.8271711003781195 and parameters: {'learning_rate': 0.06631684007924099, 'n_estimators': 449, 'max_depth': 18}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.124932 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074881 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070



🏃 View run Trial_40_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/3aeac15fcb7b42049946647b68d1fe2a
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:14:53,636] Trial 40 finished with value: 0.8241704617089725 and parameters: {'learning_rate': 0.06552348683829046, 'n_estimators': 381, 'max_depth': 16}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.194420 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.075128 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_41_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/745bbf994af64428a925b4284e0f13f7
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:17:45,181] Trial 41 finished with value: 0.8257914736669602 and parameters: {'learning_rate': 0.061037718779154614, 'n_estimators': 451, 'max_depth': 18}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.102678 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068521 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_42_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/3f8b71db4c6a415faaff6ee71231fad1
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:20:22,497] Trial 42 finished with value: 0.8249637411793995 and parameters: {'learning_rate': 0.06916364408882356, 'n_estimators': 468, 'max_depth': 17}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.105821 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070606 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064



🏃 View run Trial_43_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/31f854a995754241aba6d8f6a81a505d
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:24:13,765] Trial 43 finished with value: 0.823135739891212 and parameters: {'learning_rate': 0.033836527569654676, 'n_estimators': 436, 'max_depth': 20}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.121578 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072417 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_44_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/360a90d8007b4568995fee57d44def00
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:27:09,583] Trial 44 finished with value: 0.825998373777621 and parameters: {'learning_rate': 0.04742063724333202, 'n_estimators': 410, 'max_depth': 19}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097260 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062994 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]



🏃 View run Trial_45_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/fa8c70360197439daeb4f4f98fb935a4
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:29:23,151] Trial 45 finished with value: 0.8256880503774915 and parameters: {'learning_rate': 0.07682659847071427, 'n_estimators': 339, 'max_depth': 17}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101180 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068781 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info]



🏃 View run Trial_46_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/c7af4d061e514c35b31c7ad088837664
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:32:04,846] Trial 46 finished with value: 0.825929410642849 and parameters: {'learning_rate': 0.05321914316187007, 'n_estimators': 478, 'max_depth': 18}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.181382 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.076451 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.064



🏃 View run Trial_47_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/f530ed2078554bf5a5d13b1d2d664e73
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:33:33,550] Trial 47 finished with value: 0.7715389385472946 and parameters: {'learning_rate': 0.059719801897755, 'n_estimators': 381, 'max_depth': 3}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066156 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choo



🏃 View run Trial_48_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/a624c05cd9844ece880646483a0c021c
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:35:37,504] Trial 48 finished with value: 0.8260329445645459 and parameters: {'learning_rate': 0.07369240870342915, 'n_estimators': 301, 'max_depth': 20}. Best is trial 39 with value: 0.8271711003781195.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.109725 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 77001
[LightGBM] [Info] Number of data points in the train set: 19329, number of used features: 1934
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072



🏃 View run Trial_49_LightGBM_class_weight_TFIDF_Trigrams at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198/runs/3b65f94b62e140ba80e607067870503c
🧪 View experiment at: http://ec2-18-233-10-235.compute-1.amazonaws.com:5000/#/experiments/334855376457357198


[I 2025-06-28 21:38:10,308] Trial 49 finished with value: 0.824756791105797 and parameters: {'learning_rate': 0.06519370397632611, 'n_estimators': 464, 'max_depth': 16}. Best is trial 39 with value: 0.8271711003781195.


In [83]:
model = lgb.LGBMClassifier(

    objective='multiclass',
    num_class=3,
    metric="multi_logloss",
    is_unbalance= True,
    class_weight= "balanced",
    reg_alpha= 0.1,  # L1 regularization
    reg_lambda= 0.1,  # L2 regularization
    learning_rate= 0.06631684007924099,
    max_depth= 18,
    n_estimators=449
)

In [84]:
# Fit the model on the resampled training data
model.fit(X_train_combined, y_train_cleaned)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.096177 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107438
[LightGBM] [Info] Number of data points in the train set: 28994, number of used features: 1954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


In [85]:
# Predict on the test set
from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test_combined)
accuracy = accuracy_score(y_test_cleaned, y_pred)
accuracy

0.8391502276176024

In [86]:
from sklearn.metrics import classification_report
# Generate classification report
report = classification_report(y_test_cleaned, y_pred)
print(report)

              precision    recall  f1-score   support

          -1       0.75      0.74      0.74      1657
           0       0.83      0.94      0.88      2393
           1       0.90      0.81      0.86      3199

    accuracy                           0.84      7249
   macro avg       0.83      0.83      0.83      7249
weighted avg       0.84      0.84      0.84      7249

