In [2]:
!pip install mlflow boto3 awscli optuna imbalanced-learn lightgbm

Collecting mlflow
  Downloading mlflow-3.1.1-py3-none-any.whl.metadata (29 kB)
Collecting boto3
  Downloading boto3-1.39.10-py3-none-any.whl.metadata (6.7 kB)
Collecting awscli
  Downloading awscli-1.41.10-py3-none-any.whl.metadata (11 kB)
Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting mlflow-skinny==3.1.1 (from mlflow)
  Downloading mlflow_skinny-3.1.1-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.1.1->mlflow)
  Downloading databricks_sdk-0.59.0-py3-none-any.whl.metadata (39 kB)
Collec

In [3]:
!aws configure

AWS Access Key ID [None]: AKIAXYKJRKAKCXKYFZMC
AWS Secret Access Key [None]: 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
Default region name [None]: eu-west-2
Default output format [None]: 


In [4]:
import mlflow
# Step 2: Set up the MLflow tracking server
mlflow.set_tracking_uri("http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/")

In [5]:
# Set or create an experiment
mlflow.set_experiment("LightGBM HP Tuning")

2025/07/22 01:50:55 INFO mlflow.tracking.fluent: Experiment with name 'LightGBM HP Tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlflow-buckets-25/848427543333057037', creation_time=1753149055705, experiment_id='848427543333057037', last_update_time=1753149055705, lifecycle_stage='active', name='LightGBM HP Tuning', tags={}>

In [6]:
import pandas as pd

df = pd.read_csv('/content/reddit_preprocessing.csv').dropna()
df.shape

(36662, 2)

In [10]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt

In [11]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

In [19]:
# Step 3: BOW vectorizer setup
ngram_range = (1, 2)  # Bigram
max_features = 1000  # Set max_features to 1000
vectorizer = CountVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment']).astype('float32') # Cast to float32
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [20]:
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [21]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_SMOTE_BOW_Bigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy

In [22]:
# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    # Hyperparameter space to explore
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    num_leaves = trial.suggest_int('num_leaves', 20, 150)
    min_child_samples = trial.suggest_int('min_child_samples', 10, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True)  # L2 regularization

    # Log trial parameters
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'num_leaves': num_leaves,
        'min_child_samples': min_child_samples,
        'colsample_bytree': colsample_bytree,
        'subsample': subsample,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda
    }

    # Create LightGBM model
    model = LGBMClassifier(n_estimators=n_estimators,
                           learning_rate=learning_rate,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           colsample_bytree=colsample_bytree,
                           subsample=subsample,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda,
                           random_state=42)

    # Log each trial as a separate run in MLflow
    accuracy = log_mlflow("LightGBM", model, X_train, X_test, y_train, y_test, params, trial.number)

    return accuracy

In [23]:
# Step 7: Run Optuna for LightGBM, log the best model, and plot the importance of each parameter
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=100)  # Increased to 100 trials

    # Get the best parameters
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'],
                                learning_rate=best_params['learning_rate'],
                                max_depth=best_params['max_depth'],
                                num_leaves=best_params['num_leaves'],
                                min_child_samples=best_params['min_child_samples'],
                                colsample_bytree=best_params['colsample_bytree'],
                                subsample=best_params['subsample'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                random_state=42)

    # Log the best model with MLflow and print the classification report
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test, best_params, "Best")

    # Plot parameter importance
    optuna.visualization.plot_param_importances(study).show()

    # Plot optimization history
    optuna.visualization.plot_optimization_history(study).show()

In [None]:
# Run the experiment for LightGBM
run_optuna_experiment()

[I 2025-07-22 02:01:38,053] A new study created in memory with name: no-name-ea0ea84f-31fc-43f3-946b-4877161060a2


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.266348 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6049
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 951
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_0_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/426810a6d3ea4fabacc894930deeb2ee
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:02:24,230] Trial 0 finished with value: 0.7779010779961953 and parameters: {'n_estimators': 590, 'learning_rate': 0.002845599915049487, 'max_depth': 5, 'num_leaves': 33, 'min_child_samples': 63, 'colsample_bytree': 0.8234441805128695, 'subsample': 0.5661311357133725, 'reg_alpha': 0.35448059794832476, 'reg_lambda': 0.021360688369033022}. Best is trial 0 with value: 0.7779010779961953.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.210284 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6200
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 978
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_1_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/6e4b4b500e2a42caa921deacf8f9e072
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:03:01,987] Trial 1 finished with value: 0.9032974001268231 and parameters: {'n_estimators': 232, 'learning_rate': 0.06715540961841239, 'max_depth': 9, 'num_leaves': 36, 'min_child_samples': 44, 'colsample_bytree': 0.653976811370556, 'subsample': 0.5688338858664556, 'reg_alpha': 0.00048320769299779807, 'reg_lambda': 0.041283839496143344}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.170134 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5272
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 779
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_2_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/cf0ef33d658a41fdba478a2b2e522913
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:03:38,789] Trial 2 finished with value: 0.8050095117311351 and parameters: {'n_estimators': 109, 'learning_rate': 0.0007986279026218402, 'max_depth': 13, 'num_leaves': 31, 'min_child_samples': 88, 'colsample_bytree': 0.6041460258023914, 'subsample': 0.5679069935962905, 'reg_alpha': 0.00023463451703884837, 'reg_lambda': 0.10052074688614011}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.190708 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5557
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 841
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_3_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/2c2bf29af70d4785a5e6856d3e2bbae9
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:04:33,152] Trial 3 finished with value: 0.843849080532657 and parameters: {'n_estimators': 883, 'learning_rate': 0.003225521922350978, 'max_depth': 12, 'num_leaves': 67, 'min_child_samples': 81, 'colsample_bytree': 0.5770846894173141, 'subsample': 0.8417672215268379, 'reg_alpha': 0.024816824613419795, 'reg_lambda': 0.011073713471148104}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.229487 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_4_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/b106e42c55d44214b631f17a1110476b
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:05:16,909] Trial 4 finished with value: 0.7279644895370958 and parameters: {'n_estimators': 940, 'learning_rate': 0.00014882229434356735, 'max_depth': 5, 'num_leaves': 93, 'min_child_samples': 30, 'colsample_bytree': 0.8224433040997381, 'subsample': 0.60006795028013, 'reg_alpha': 0.001723486620422834, 'reg_lambda': 0.20013918681622858}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.285890 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5170
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 757
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_5_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/ebce6a5d3e2040c8b83cddb889ddad84
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:05:58,668] Trial 5 finished with value: 0.8801521876981611 and parameters: {'n_estimators': 746, 'learning_rate': 0.01865295140331001, 'max_depth': 15, 'num_leaves': 93, 'min_child_samples': 90, 'colsample_bytree': 0.6565215760428353, 'subsample': 0.825979758310836, 'reg_alpha': 0.03727061375531657, 'reg_lambda': 0.015207273750972684}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.198774 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_6_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/009cd2aa385849a79f65e002a4926188
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:06:46,152] Trial 6 finished with value: 0.8086556753329106 and parameters: {'n_estimators': 939, 'learning_rate': 0.0007599152202845541, 'max_depth': 13, 'num_leaves': 55, 'min_child_samples': 33, 'colsample_bytree': 0.6927443509259962, 'subsample': 0.8320883215529611, 'reg_alpha': 0.21835490493039938, 'reg_lambda': 0.03180764527963085}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.348814 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5947
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 930
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_7_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/e986d7750b124641a0c4ac54a193521f
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:07:29,263] Trial 7 finished with value: 0.7660114140773621 and parameters: {'n_estimators': 135, 'learning_rate': 0.0002906073807846342, 'max_depth': 8, 'num_leaves': 148, 'min_child_samples': 69, 'colsample_bytree': 0.5991984813224913, 'subsample': 0.9536826923611536, 'reg_alpha': 0.00018351906168528195, 'reg_lambda': 0.00021035399507008006}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.227639 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6147
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 969
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_8_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/2c63c7ec592a48e3a5c8ccae418effb1
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:08:18,629] Trial 8 finished with value: 0.8138871274571972 and parameters: {'n_estimators': 513, 'learning_rate': 0.0042713397756243155, 'max_depth': 9, 'num_leaves': 108, 'min_child_samples': 54, 'colsample_bytree': 0.540222637597313, 'subsample': 0.5136669982349399, 'reg_alpha': 0.2908924526949415, 'reg_lambda': 0.0204340586077092}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.274023 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4949
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 711
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_9_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/c1269611d6e04decbcf7559b97f53799
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:08:59,690] Trial 9 finished with value: 0.7894736842105263 and parameters: {'n_estimators': 725, 'learning_rate': 0.0022542252045291964, 'max_depth': 8, 'num_leaves': 96, 'min_child_samples': 95, 'colsample_bytree': 0.7803024113409778, 'subsample': 0.7409241546358591, 'reg_alpha': 0.0002785773192694186, 'reg_lambda': 5.815877203097799}. Best is trial 1 with value: 0.9032974001268231.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.226426 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6258
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 990
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_10_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/88cb4e840fcc4f9abaedcbb40007eb43
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:09:37,916] Trial 10 finished with value: 0.907577679137603 and parameters: {'n_estimators': 317, 'learning_rate': 0.08874471237009303, 'max_depth': 10, 'num_leaves': 57, 'min_child_samples': 14, 'colsample_bytree': 0.9790618715854847, 'subsample': 0.6802157062153136, 'reg_alpha': 5.43790283171913, 'reg_lambda': 0.0009084517073472632}. Best is trial 10 with value: 0.907577679137603.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.204587 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6275
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 995
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_11_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/ceb578a35135401bb84fd4a88c23b884
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:10:16,176] Trial 11 finished with value: 0.9067850348763475 and parameters: {'n_estimators': 321, 'learning_rate': 0.09849522322098933, 'max_depth': 11, 'num_leaves': 56, 'min_child_samples': 10, 'colsample_bytree': 0.9524623912013976, 'subsample': 0.670445321248436, 'reg_alpha': 6.5747146449686475, 'reg_lambda': 0.000586039472087047}. Best is trial 10 with value: 0.907577679137603.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.214607 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6273
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 994
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_12_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/32c5e512cd8a45d7bfa43e377f025d8b
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:10:54,563] Trial 12 finished with value: 0.9036144578313253 and parameters: {'n_estimators': 352, 'learning_rate': 0.07349224485302673, 'max_depth': 11, 'num_leaves': 62, 'min_child_samples': 11, 'colsample_bytree': 0.9999646919361371, 'subsample': 0.6890081458957965, 'reg_alpha': 6.955240497057974, 'reg_lambda': 0.00021210109558238995}. Best is trial 10 with value: 0.907577679137603.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.250750 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6273
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 994
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_13_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/c7fff6d8f9e349a6b05ce58dd6550e41
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:11:33,729] Trial 13 finished with value: 0.861287254280279 and parameters: {'n_estimators': 376, 'learning_rate': 0.02049982770755933, 'max_depth': 10, 'num_leaves': 52, 'min_child_samples': 11, 'colsample_bytree': 0.9928725186233162, 'subsample': 0.6747540768653677, 'reg_alpha': 9.455620385212056, 'reg_lambda': 0.0010687096667859944}. Best is trial 10 with value: 0.907577679137603.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.223409 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_14_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/ef422392fc8242429b5b245eddcd3306
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:12:12,043] Trial 14 finished with value: 0.8704819277108434 and parameters: {'n_estimators': 334, 'learning_rate': 0.028103360749250544, 'max_depth': 7, 'num_leaves': 73, 'min_child_samples': 23, 'colsample_bytree': 0.9124256685495788, 'subsample': 0.6477788967369894, 'reg_alpha': 1.512822448479884, 'reg_lambda': 0.0013830038271508388}. Best is trial 10 with value: 0.907577679137603.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.233603 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6232
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 984
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_15_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/298e079e941b44bf977b3b68515d4bed
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:12:50,375] Trial 15 finished with value: 0.9025047558655676 and parameters: {'n_estimators': 438, 'learning_rate': 0.09993757778486617, 'max_depth': 3, 'num_leaves': 20, 'min_child_samples': 20, 'colsample_bytree': 0.9117345074488967, 'subsample': 0.7534569676104272, 'reg_alpha': 1.5255176567115303, 'reg_lambda': 0.0017973095305169426}. Best is trial 10 with value: 0.907577679137603.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.220393 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6200
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 978
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_16_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/bf1bed4f5f64488bb13d0303bddcf579
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:13:30,195] Trial 16 finished with value: 0.844007609384908 and parameters: {'n_estimators': 270, 'learning_rate': 0.009106900726653894, 'max_depth': 15, 'num_leaves': 119, 'min_child_samples': 41, 'colsample_bytree': 0.9135393272431871, 'subsample': 0.7474951101482501, 'reg_alpha': 1.8948542172338096, 'reg_lambda': 0.00010678321159151106}. Best is trial 10 with value: 0.907577679137603.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.235759 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6232
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 984
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_17_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/045efe5e00f74e5aaef108c434993fe8
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:14:13,516] Trial 17 finished with value: 0.9207355738744452 and parameters: {'n_estimators': 555, 'learning_rate': 0.03757395267280305, 'max_depth': 11, 'num_leaves': 78, 'min_child_samples': 20, 'colsample_bytree': 0.940375168052056, 'subsample': 0.6385185781397389, 'reg_alpha': 0.03900499048509108, 'reg_lambda': 0.004788703972156027}. Best is trial 17 with value: 0.9207355738744452.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.372623 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_18_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/adbb396a2ea34742b6f459a6f54bb9fe
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:14:56,014] Trial 18 finished with value: 0.9218452758402029 and parameters: {'n_estimators': 610, 'learning_rate': 0.0373004624903243, 'max_depth': 13, 'num_leaves': 74, 'min_child_samples': 22, 'colsample_bytree': 0.8524924953721988, 'subsample': 0.6275123620078058, 'reg_alpha': 0.006454804380723437, 'reg_lambda': 0.004071932747178229}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.228473 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6185
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 975
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_19_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/b584fe5fe3364e1dbdd46577763e0e70
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:15:39,263] Trial 19 finished with value: 0.8866518706404566 and parameters: {'n_estimators': 642, 'learning_rate': 0.009746488350865353, 'max_depth': 13, 'num_leaves': 78, 'min_child_samples': 46, 'colsample_bytree': 0.8642557611874823, 'subsample': 0.5084113085816065, 'reg_alpha': 0.007391485986722277, 'reg_lambda': 0.0064384081925204645}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.260163 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_20_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/751ad1cb9459431b87227ec7ad1e47b3
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:16:22,484] Trial 20 finished with value: 0.9199429296131896 and parameters: {'n_estimators': 492, 'learning_rate': 0.04158785801830294, 'max_depth': 14, 'num_leaves': 127, 'min_child_samples': 28, 'colsample_bytree': 0.8660105445440845, 'subsample': 0.6251445721768316, 'reg_alpha': 0.0060183507800524285, 'reg_lambda': 0.5154102119201739}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.381649 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_21_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/877c03b9423c476aad70d2d89bf389e6
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:17:04,268] Trial 21 finished with value: 0.9188332276474318 and parameters: {'n_estimators': 494, 'learning_rate': 0.03742792283503771, 'max_depth': 14, 'num_leaves': 125, 'min_child_samples': 29, 'colsample_bytree': 0.871121869958404, 'subsample': 0.6226420523214562, 'reg_alpha': 0.00355126259539852, 'reg_lambda': 0.8244343895855847}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.235784 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_22_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/880f472fdb784e64842517b1eac49b25
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:17:46,724] Trial 22 finished with value: 0.8929930247305009 and parameters: {'n_estimators': 666, 'learning_rate': 0.012576121773417239, 'max_depth': 12, 'num_leaves': 84, 'min_child_samples': 23, 'colsample_bytree': 0.7573070290027641, 'subsample': 0.6120439162179648, 'reg_alpha': 0.017956828624341607, 'reg_lambda': 0.679321777276865}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.214342 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6218
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 981
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_23_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/2dbe53cdc4424dd6b4326e80a0da93ed
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:18:32,462] Trial 23 finished with value: 0.9175649968294229 and parameters: {'n_estimators': 816, 'learning_rate': 0.042751378039291535, 'max_depth': 14, 'num_leaves': 137, 'min_child_samples': 36, 'colsample_bytree': 0.835610673528893, 'subsample': 0.7242583030068805, 'reg_alpha': 0.056497713810232254, 'reg_lambda': 0.006112798245920168}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.360164 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6232
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 984
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_24_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/8cff162904d447e28b3313c57b0d72ca
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:19:14,709] Trial 24 finished with value: 0.8373493975903614 and parameters: {'n_estimators': 578, 'learning_rate': 0.006255863587806483, 'max_depth': 12, 'num_leaves': 107, 'min_child_samples': 20, 'colsample_bytree': 0.9447961460971364, 'subsample': 0.7856316816363285, 'reg_alpha': 0.001055110299960831, 'reg_lambda': 9.86902084974834}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.232157 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6152
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 970
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_25_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/0b55d11a636d429bb8914e857b06561f
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:19:54,704] Trial 25 finished with value: 0.9037729866835764 and parameters: {'n_estimators': 479, 'learning_rate': 0.03840345230927226, 'max_depth': 14, 'num_leaves': 110, 'min_child_samples': 51, 'colsample_bytree': 0.7841642291364367, 'subsample': 0.9907002983389843, 'reg_alpha': 0.00875429783548703, 'reg_lambda': 0.0033602633000541495}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.267968 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6213
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 980
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_26_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/f6c387f89561437fadb31575e13e1c27
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:20:34,304] Trial 26 finished with value: 0.8706404565630945 and parameters: {'n_estimators': 407, 'learning_rate': 0.015635741878055984, 'max_depth': 11, 'num_leaves': 82, 'min_child_samples': 38, 'colsample_bytree': 0.8770138240337558, 'subsample': 0.5454613897491423, 'reg_alpha': 0.08557586131398943, 'reg_lambda': 2.2635893005326215}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.239162 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_27_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/9dd1a2546bfe42fe9fe99591db21eeb2
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:21:19,251] Trial 27 finished with value: 0.9215282181357007 and parameters: {'n_estimators': 639, 'learning_rate': 0.04950544141764092, 'max_depth': 15, 'num_leaves': 69, 'min_child_samples': 27, 'colsample_bytree': 0.8909537376926553, 'subsample': 0.6340492347170751, 'reg_alpha': 0.007671469483546962, 'reg_lambda': 0.08297091404276041}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.390422 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6250
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 988
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_28_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/a8b509ca609b40e2b9dd78dc8fdb5ae4
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:22:02,475] Trial 28 finished with value: 0.9216867469879518 and parameters: {'n_estimators': 653, 'learning_rate': 0.0258386972202193, 'max_depth': 15, 'num_leaves': 46, 'min_child_samples': 17, 'colsample_bytree': 0.7216681019168162, 'subsample': 0.7070932359842438, 'reg_alpha': 0.11457622417163191, 'reg_lambda': 0.07002113699588867}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.267452 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5979
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 936
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_29_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/dc5403d16ce641a99c9a7f3e69d0cef1
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:22:45,082] Trial 29 finished with value: 0.8723842739378567 and parameters: {'n_estimators': 641, 'learning_rate': 0.006672515637984527, 'max_depth': 15, 'num_leaves': 45, 'min_child_samples': 67, 'colsample_bytree': 0.7212959196846274, 'subsample': 0.7166884188479176, 'reg_alpha': 0.14999492989653368, 'reg_lambda': 0.11408586015494827}. Best is trial 18 with value: 0.9218452758402029.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.247377 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6253
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 989
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_30_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/91c5b99b074844fdac10e3825eef11ee
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:23:31,234] Trial 30 finished with value: 0.9235890932149651 and parameters: {'n_estimators': 761, 'learning_rate': 0.025074920066336624, 'max_depth': 15, 'num_leaves': 44, 'min_child_samples': 16, 'colsample_bytree': 0.8183461857822607, 'subsample': 0.8862729432210781, 'reg_alpha': 0.0020954734149293145, 'reg_lambda': 0.07250126459868374}. Best is trial 30 with value: 0.9235890932149651.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.321678 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6253
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 989
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_31_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/848216057d5e4fe6b469b353e172c259
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:24:16,520] Trial 31 finished with value: 0.9229549778059607 and parameters: {'n_estimators': 772, 'learning_rate': 0.02450037820730094, 'max_depth': 15, 'num_leaves': 43, 'min_child_samples': 16, 'colsample_bytree': 0.7858943139947399, 'subsample': 0.9055401457276953, 'reg_alpha': 0.0026938641395119673, 'reg_lambda': 0.048794040968970824}. Best is trial 30 with value: 0.9235890932149651.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.214002 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6253
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 989
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_32_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/2ed00a1ffd50451895d69b1aade3ca30
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:25:02,144] Trial 32 finished with value: 0.9221623335447051 and parameters: {'n_estimators': 798, 'learning_rate': 0.022789281045068135, 'max_depth': 15, 'num_leaves': 40, 'min_child_samples': 16, 'colsample_bytree': 0.805024888212414, 'subsample': 0.9028807189191558, 'reg_alpha': 0.0010856212707584527, 'reg_lambda': 0.0538447335800969}. Best is trial 30 with value: 0.9235890932149651.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.215055 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6253
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 989
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_33_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/94e391bc28c049178bc6af0893a5b2a3
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:25:48,651] Trial 33 finished with value: 0.8170577045022194 and parameters: {'n_estimators': 814, 'learning_rate': 0.0016019867755832757, 'max_depth': 13, 'num_leaves': 36, 'min_child_samples': 16, 'colsample_bytree': 0.8077243247316102, 'subsample': 0.8929316531420637, 'reg_alpha': 0.0010635741267610683, 'reg_lambda': 0.21907518623848823}. Best is trial 30 with value: 0.9235890932149651.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.403382 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6221
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 982
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_34_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/0222baed1b6847e5a07f53959e1b7e76
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:26:33,382] Trial 34 finished with value: 0.9116994292961319 and parameters: {'n_estimators': 811, 'learning_rate': 0.013643011865771083, 'max_depth': 14, 'num_leaves': 27, 'min_child_samples': 25, 'colsample_bytree': 0.8374261491305045, 'subsample': 0.910799579311398, 'reg_alpha': 0.0025190869393421082, 'reg_lambda': 0.04242622544665884}. Best is trial 30 with value: 0.9235890932149651.


[LightGBM] [Info] Number of positive: 12616, number of negative: 12616
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.242538 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6253
[LightGBM] [Info] Number of data points in the train set: 25232, number of used features: 989
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run Trial_35_LightGBM_SMOTE_BOW_Bigrams at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037/runs/2efc41bd7d084eefb2dfb65be88222e1
🧪 View experiment at: http://ec2-18-133-28-185.eu-west-2.compute.amazonaws.com:5000/#/experiments/848427543333057037


[I 2025-07-22 02:27:21,209] Trial 35 finished with value: 0.9229549778059607 and parameters: {'n_estimators': 879, 'learning_rate': 0.056342673822964956, 'max_depth': 15, 'num_leaves': 43, 'min_child_samples': 16, 'colsample_bytree': 0.8023649688850244, 'subsample': 0.8800299020018063, 'reg_alpha': 0.0006634626159107392, 'reg_lambda': 0.30809642914283025}. Best is trial 30 with value: 0.9235890932149651.
