In [2]:
import mlflow
# Step 2: Set up the MLflow tracking server
mlflow.set_tracking_uri("http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/")

In [3]:
# Set or create an experiment
mlflow.set_experiment("07 - v2 LightGBM Detailed Hyperparameter Tuning")

2025/11/27 23:21:35 INFO mlflow.tracking.fluent: Experiment with name '07 - v2 LightGBM Detailed Hyperparameter Tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://sau-s3-mlflow-bucket/358947331249833738', creation_time=1764303695042, experiment_id='358947331249833738', last_update_time=1764303695042, lifecycle_stage='active', name='07 - v2 LightGBM Detailed Hyperparameter Tuning', tags={}>

In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt
import plotly

In [5]:
df = pd.read_csv(
    '../../data/processed/reddit_clean_final.csv',
    keep_default_na=False,
    na_filter=False
)
df.shape

(36607, 2)

In [6]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

In [7]:
# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 2)  # Bigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.



In [8]:
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [9]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_SMOTE_TFIDF_Bigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy




In [10]:
# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    # Hyperparameter space to explore
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    num_leaves = trial.suggest_int('num_leaves', 20, 150)
    min_child_samples = trial.suggest_int('min_child_samples', 10, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True)  # L2 regularization

    # Log trial parameters
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'num_leaves': num_leaves,
        'min_child_samples': min_child_samples,
        'colsample_bytree': colsample_bytree,
        'subsample': subsample,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda
    }

    # Create LightGBM model
    model = LGBMClassifier(n_estimators=n_estimators,
                           learning_rate=learning_rate,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           colsample_bytree=colsample_bytree,
                           subsample=subsample,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda,
                           random_state=42)

    # Log each trial as a separate run in MLflow
    accuracy = log_mlflow("LightGBM", model, X_train, X_test, y_train, y_test, params, trial.number)

    return accuracy




In [11]:
# Step 7: Run Optuna for LightGBM, log the best model, and plot the importance of each parameter
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=50)  # Increased to 50 trials

    # Get the best parameters
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'],
                                learning_rate=best_params['learning_rate'],
                                max_depth=best_params['max_depth'],
                                num_leaves=best_params['num_leaves'],
                                min_child_samples=best_params['min_child_samples'],
                                colsample_bytree=best_params['colsample_bytree'],
                                subsample=best_params['subsample'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                random_state=42)

    # Log the best model with MLflow and print the classification report
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test, best_params, "Best")

    # Plot parameter importance
    optuna.visualization.plot_param_importances(study).show()

    # Plot optimization history
    optuna.visualization.plot_optimization_history(study).show()

In [12]:
# Run the experiment for LightGBM
run_optuna_experiment()

[I 2025-11-27 23:27:40,120] A new study created in memory with name: no-name-61ce3ff1-1448-4a81-a9dc-34158c608d66


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.261619 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100351
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 987
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_0_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/e619fa7d170b4d78b5e79761afc4e853
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:31:05,671] Trial 0 finished with value: 0.6467408585055644 and parameters: {'n_estimators': 525, 'learning_rate': 0.00017859010806568496, 'max_depth': 9, 'num_leaves': 139, 'min_child_samples': 19, 'colsample_bytree': 0.810672168607049, 'subsample': 0.5203562465047809, 'reg_alpha': 0.5948359610936697, 'reg_lambda': 0.6072350365892419}. Best is trial 0 with value: 0.6467408585055644.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069985 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100224
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_1_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/3990b3ead4f84672aad1254bb889b604
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:33:17,765] Trial 1 finished with value: 0.8014838367779544 and parameters: {'n_estimators': 860, 'learning_rate': 0.0578376966188703, 'max_depth': 5, 'num_leaves': 50, 'min_child_samples': 57, 'colsample_bytree': 0.8743238133845921, 'subsample': 0.9370500807539813, 'reg_alpha': 0.5108222569967481, 'reg_lambda': 3.6067435752063512}. Best is trial 1 with value: 0.8014838367779544.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.378829 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100299
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 982
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_2_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/10d0e44596c24b60a586d92d6b56948c
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:36:10,919] Trial 2 finished with value: 0.6414414414414414 and parameters: {'n_estimators': 963, 'learning_rate': 0.0011490094136057986, 'max_depth': 5, 'num_leaves': 28, 'min_child_samples': 44, 'colsample_bytree': 0.5533594807428952, 'subsample': 0.7657777059384134, 'reg_alpha': 0.019108262808756986, 'reg_lambda': 4.334757874124043}. Best is trial 1 with value: 0.8014838367779544.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.511315 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100325
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 984
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_3_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/87bfc9e1601d47a7a00449a43eba8e95
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:37:25,639] Trial 3 finished with value: 0.6692103868574457 and parameters: {'n_estimators': 118, 'learning_rate': 0.0010532703794951304, 'max_depth': 13, 'num_leaves': 95, 'min_child_samples': 36, 'colsample_bytree': 0.8116246169021879, 'subsample': 0.5905836240730284, 'reg_alpha': 0.17335988029481564, 'reg_lambda': 5.88872166982096}. Best is trial 1 with value: 0.8014838367779544.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087914 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100115
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 973
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_4_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/1c5df3a8597848f7a11c03ae166fb0b8
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:39:24,921] Trial 4 finished with value: 0.6788553259141494 and parameters: {'n_estimators': 412, 'learning_rate': 0.002734820086171006, 'max_depth': 11, 'num_leaves': 93, 'min_child_samples': 69, 'colsample_bytree': 0.9586371997995962, 'subsample': 0.5801632489408682, 'reg_alpha': 0.0001233572831169049, 'reg_lambda': 6.184963321269522}. Best is trial 1 with value: 0.8014838367779544.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.155105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99952
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_5_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/a4bf7c2cb1fd4d8fbff2224b216ecfa3
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:40:32,261] Trial 5 finished with value: 0.7772125066242713 and parameters: {'n_estimators': 140, 'learning_rate': 0.04656660888819768, 'max_depth': 12, 'num_leaves': 89, 'min_child_samples': 89, 'colsample_bytree': 0.5286381104866573, 'subsample': 0.9776897638855746, 'reg_alpha': 0.11151563702718832, 'reg_lambda': 0.00940398554178516}. Best is trial 1 with value: 0.8014838367779544.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.312444 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99952
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_6_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/09d42bc9a82e4cd6bb8ab839958a993b
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:43:17,061] Trial 6 finished with value: 0.809009009009009 and parameters: {'n_estimators': 740, 'learning_rate': 0.07078300079533414, 'max_depth': 15, 'num_leaves': 137, 'min_child_samples': 89, 'colsample_bytree': 0.6326377304885044, 'subsample': 0.5046696042321592, 'reg_alpha': 0.22510171500741755, 'reg_lambda': 0.06686035815548144}. Best is trial 6 with value: 0.809009009009009.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070992 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100224
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_7_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/302a9cf3e1ad458897ed720decdc0b0a
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:45:06,680] Trial 7 finished with value: 0.8122946475887652 and parameters: {'n_estimators': 611, 'learning_rate': 0.08973962957896402, 'max_depth': 9, 'num_leaves': 55, 'min_child_samples': 58, 'colsample_bytree': 0.6690420853727723, 'subsample': 0.6381414056617709, 'reg_alpha': 0.01583105030340174, 'reg_lambda': 3.8540452894286483}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087911 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100136
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_8_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/562715e8372a49f6ba7142dc1a6690b4
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:46:54,669] Trial 8 finished with value: 0.7869634340222575 and parameters: {'n_estimators': 910, 'learning_rate': 0.03856124549400994, 'max_depth': 4, 'num_leaves': 54, 'min_child_samples': 66, 'colsample_bytree': 0.9544088969181455, 'subsample': 0.727336932243458, 'reg_alpha': 1.105840369929502, 'reg_lambda': 8.78893540636802}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.160566 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100396
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 995
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_9_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/a0dc820a1643445980e8bfd25f35376b
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:51:52,655] Trial 9 finished with value: 0.6764175940646528 and parameters: {'n_estimators': 987, 'learning_rate': 0.00016544439291134628, 'max_depth': 14, 'num_leaves': 24, 'min_child_samples': 10, 'colsample_bytree': 0.85008987873028, 'subsample': 0.560489030539608, 'reg_alpha': 0.00028995451760196187, 'reg_lambda': 0.004514855981751409}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.140331 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99656
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_10_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/8258f893bd0c46bead83df4ea20ec960
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:53:23,542] Trial 10 finished with value: 0.7601483836777955 and parameters: {'n_estimators': 352, 'learning_rate': 0.01744409876799021, 'max_depth': 9, 'num_leaves': 62, 'min_child_samples': 100, 'colsample_bytree': 0.6973582648862099, 'subsample': 0.7099851611740748, 'reg_alpha': 0.004986072093461374, 'reg_lambda': 0.0004981353388375386}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.188623 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100066
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_11_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/9e1918bf5f9040e781fc1531ef7933b4
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:55:50,062] Trial 11 finished with value: 0.7460519342872284 and parameters: {'n_estimators': 714, 'learning_rate': 0.009813686300551834, 'max_depth': 8, 'num_leaves': 146, 'min_child_samples': 78, 'colsample_bytree': 0.6586800319795781, 'subsample': 0.6620822391137252, 'reg_alpha': 9.927296585160363, 'reg_lambda': 0.18225721213224166}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.574505 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100010
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 969
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_12_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/738eda9568174ab7b79bfe2cd173fa9b
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-27 23:59:35,755] Trial 12 finished with value: 0.8089030206677266 and parameters: {'n_estimators': 701, 'learning_rate': 0.08105586760254009, 'max_depth': 15, 'num_leaves': 118, 'min_child_samples': 82, 'colsample_bytree': 0.6162424179760061, 'subsample': 0.8301945011372386, 'reg_alpha': 0.00420214304282332, 'reg_lambda': 0.09876522675541771}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088449 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100299
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 982
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_13_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/67110ca7e192492faf5965d6087acaa4
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:02:40,915] Trial 13 finished with value: 0.7642819289878113 and parameters: {'n_estimators': 705, 'learning_rate': 0.011321947177275318, 'max_depth': 7, 'num_leaves': 118, 'min_child_samples': 45, 'colsample_bytree': 0.7373358751224255, 'subsample': 0.6280755054090549, 'reg_alpha': 0.02910890587070381, 'reg_lambda': 0.0007799812107195615}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.012719 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99791
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 962
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_14_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/290e7d0a1a644d72bd52d54e21b4ab22
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:06:19,244] Trial 14 finished with value: 0.7861155272919979 and parameters: {'n_estimators': 610, 'learning_rate': 0.01802506276600192, 'max_depth': 10, 'num_leaves': 68, 'min_child_samples': 97, 'colsample_bytree': 0.606824789121576, 'subsample': 0.5122909133461337, 'reg_alpha': 0.001412309316232851, 'reg_lambda': 0.5928473238413372}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100136
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_15_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/91da805ed5864ee4ac8275acfb15b9a3
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:09:45,279] Trial 15 finished with value: 0.8032856385797562 and parameters: {'n_estimators': 805, 'learning_rate': 0.0993672167390768, 'max_depth': 12, 'num_leaves': 116, 'min_child_samples': 66, 'colsample_bytree': 0.6878091092741397, 'subsample': 0.6631502730383663, 'reg_alpha': 7.358389821911227, 'reg_lambda': 0.022235569235408887}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112304 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100281
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 981
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_16_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/9563c303f9e74437a8bb0d8d8e528977
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:12:49,197] Trial 16 finished with value: 0.6976152623211447 and parameters: {'n_estimators': 499, 'learning_rate': 0.004959721285480646, 'max_depth': 7, 'num_leaves': 75, 'min_child_samples': 54, 'colsample_bytree': 0.5978312985101166, 'subsample': 0.7958690563652864, 'reg_alpha': 0.10879109664356618, 'reg_lambda': 0.00013592864517011842}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.138016 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100066
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_17_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/28a4896b6b9f45fc8b544816f4e97b23
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:15:21,371] Trial 17 finished with value: 0.7905670376258611 and parameters: {'n_estimators': 308, 'learning_rate': 0.025560022174721232, 'max_depth': 15, 'num_leaves': 38, 'min_child_samples': 78, 'colsample_bytree': 0.7533165403992385, 'subsample': 0.8465261081026597, 'reg_alpha': 0.009214700334808762, 'reg_lambda': 0.8086983370785119}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.116235 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99952
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_18_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/89a4eb8da68845399f6660e3a898e250
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:18:58,721] Trial 18 finished with value: 0.7354531001589825 and parameters: {'n_estimators': 624, 'learning_rate': 0.005034046095380543, 'max_depth': 11, 'num_leaves': 103, 'min_child_samples': 89, 'colsample_bytree': 0.5138213054731285, 'subsample': 0.5033246078464119, 'reg_alpha': 1.8380296315097886, 'reg_lambda': 0.09040371002494732}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.198065 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100313
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 983
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_19_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/273e1889fc4a43eb94aa64536b8b317b
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:21:26,411] Trial 19 finished with value: 0.5730789613142554 and parameters: {'n_estimators': 795, 'learning_rate': 0.00046573099356968483, 'max_depth': 3, 'num_leaves': 135, 'min_child_samples': 38, 'colsample_bytree': 0.649224871837673, 'subsample': 0.6285946770512337, 'reg_alpha': 0.0015223967985975777, 'reg_lambda': 1.6821324126270298}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.113419 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100325
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 984
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_20_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/6dcda5def1c3498baefc80b6cb512605
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:23:57,165] Trial 20 finished with value: 0.7937466878643349 and parameters: {'n_estimators': 459, 'learning_rate': 0.03728848303150484, 'max_depth': 7, 'num_leaves': 77, 'min_child_samples': 29, 'colsample_bytree': 0.7375584854227887, 'subsample': 0.556080805596917, 'reg_alpha': 0.05804818184236556, 'reg_lambda': 0.0018891949757858682}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.170554 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100038
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 970
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_21_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/039ddf54f27d41d490c090dc0095e4ca
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:28:08,978] Trial 21 finished with value: 0.8087970323264441 and parameters: {'n_estimators': 698, 'learning_rate': 0.08783164678183893, 'max_depth': 15, 'num_leaves': 123, 'min_child_samples': 80, 'colsample_bytree': 0.5863413674202782, 'subsample': 0.8894928009440449, 'reg_alpha': 0.0027823066333231244, 'reg_lambda': 0.10438889687463397}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.089195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99952
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_22_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/0b890d9afe40498e8dde523842e3e119
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:31:46,825] Trial 22 finished with value: 0.8083730789613143 and parameters: {'n_estimators': 613, 'learning_rate': 0.0945631459928602, 'max_depth': 14, 'num_leaves': 130, 'min_child_samples': 88, 'colsample_bytree': 0.6379212840920955, 'subsample': 0.8304398658151394, 'reg_alpha': 0.012995004026518108, 'reg_lambda': 0.04133839579959977}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055727 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100224
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_23_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/d455b72639f445f2b2e2dfc1f3af3716
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:38:23,444] Trial 23 finished with value: 0.8073131955484897 and parameters: {'n_estimators': 769, 'learning_rate': 0.027448723925954554, 'max_depth': 13, 'num_leaves': 107, 'min_child_samples': 57, 'colsample_bytree': 0.5750599764799685, 'subsample': 0.6887853559588342, 'reg_alpha': 0.0006473838667008548, 'reg_lambda': 0.3151050613305155}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.416385 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100115
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 973
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_24_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/9797ca122ea04cb896c9ce04595623a1
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:43:54,915] Trial 24 finished with value: 0.8094329623741389 and parameters: {'n_estimators': 639, 'learning_rate': 0.056281891208159594, 'max_depth': 15, 'num_leaves': 108, 'min_child_samples': 71, 'colsample_bytree': 0.6880731139337611, 'subsample': 0.7723933779664712, 'reg_alpha': 0.04767092584176215, 'reg_lambda': 0.043520037950053776}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.279101 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100136
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_25_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/1a9949a2e3504c6dba6e810381139692
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:48:27,941] Trial 25 finished with value: 0.8079491255961844 and parameters: {'n_estimators': 563, 'learning_rate': 0.05115132669719923, 'max_depth': 13, 'num_leaves': 102, 'min_child_samples': 66, 'colsample_bytree': 0.6934343998401142, 'subsample': 0.763711318284406, 'reg_alpha': 0.30437493054638864, 'reg_lambda': 0.025247486205381572}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.294514 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100090
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 972
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_26_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/c6aea862aafd4b9782fcde584c6cdf14
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 00:53:12,655] Trial 26 finished with value: 0.7649178590355061 and parameters: {'n_estimators': 641, 'learning_rate': 0.009329391147894618, 'max_depth': 10, 'num_leaves': 142, 'min_child_samples': 72, 'colsample_bytree': 0.7732195056087908, 'subsample': 0.6207397953403309, 'reg_alpha': 0.048702998940504824, 'reg_lambda': 0.008563277827532863}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.010849 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100281
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 981
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_27_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/200213c9815c4f04a384e184aeba0e9d
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:02:58,353] Trial 27 finished with value: 0.8075251722310546 and parameters: {'n_estimators': 884, 'learning_rate': 0.023098144424422762, 'max_depth': 14, 'num_leaves': 150, 'min_child_samples': 52, 'colsample_bytree': 0.6800695361652005, 'subsample': 0.733945635509585, 'reg_alpha': 0.04462228449997621, 'reg_lambda': 1.917299020242431}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110918 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100181
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 976
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_28_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/3d48ac03e8fd4fc888e5cbd52aea6518
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:05:42,657] Trial 28 finished with value: 0.8077371489136195 and parameters: {'n_estimators': 564, 'learning_rate': 0.05144461471142554, 'max_depth': 12, 'num_leaves': 80, 'min_child_samples': 62, 'colsample_bytree': 0.7297841006355928, 'subsample': 0.6713961428494278, 'reg_alpha': 3.3486775334060686, 'reg_lambda': 0.03740078468091283}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.146753 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100066
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_29_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/76989e0e9814454b849c1889e73bc252
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:09:57,212] Trial 29 finished with value: 0.6445151033386327 and parameters: {'n_estimators': 496, 'learning_rate': 0.00010105835276136147, 'max_depth': 9, 'num_leaves': 131, 'min_child_samples': 73, 'colsample_bytree': 0.7917710580706239, 'subsample': 0.5377057500663406, 'reg_alpha': 0.3004089687386896, 'reg_lambda': 0.5673124874850821}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.061020 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99857
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 964
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_30_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/c0446b0a2725425e891b063767c9fc5b
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:12:14,377] Trial 30 finished with value: 0.7176470588235294 and parameters: {'n_estimators': 226, 'learning_rate': 0.01369163691269626, 'max_depth': 8, 'num_leaves': 40, 'min_child_samples': 94, 'colsample_bytree': 0.5472042125130065, 'subsample': 0.7839730891391548, 'reg_alpha': 0.8583029745121621, 'reg_lambda': 0.22059234962989235}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.064234 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99921
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_31_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/0a968d9397be4497b9493de979dcb0f5
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:15:51,903] Trial 31 finished with value: 0.8077371489136195 and parameters: {'n_estimators': 745, 'learning_rate': 0.07291352529117379, 'max_depth': 15, 'num_leaves': 109, 'min_child_samples': 90, 'colsample_bytree': 0.6330161139869314, 'subsample': 0.8253078184531706, 'reg_alpha': 0.005796442471363138, 'reg_lambda': 0.050993562728474294}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064473 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100010
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 969
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_32_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/be82adf8b48b43ebaebea6aa889031cc
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:19:05,804] Trial 32 finished with value: 0.8102808691043986 and parameters: {'n_estimators': 664, 'learning_rate': 0.0637599989384552, 'max_depth': 15, 'num_leaves': 117, 'min_child_samples': 82, 'colsample_bytree': 0.6180387308100806, 'subsample': 0.8538766709926269, 'reg_alpha': 0.016802240214877546, 'reg_lambda': 0.07878156342947554}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.134177 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99982
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_33_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/9771d4945a2c4f0ea59c2f4562871375
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:23:12,343] Trial 33 finished with value: 0.8076311605723371 and parameters: {'n_estimators': 830, 'learning_rate': 0.03126657565553578, 'max_depth': 14, 'num_leaves': 127, 'min_child_samples': 85, 'colsample_bytree': 0.7139647431647479, 'subsample': 0.8864210516310838, 'reg_alpha': 0.02506548574677747, 'reg_lambda': 0.015118023931134424}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.085621 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100066
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_34_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/c7f4f03d2baa4a759c582d7d0791dcff
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:26:32,966] Trial 34 finished with value: 0.8094329623741389 and parameters: {'n_estimators': 676, 'learning_rate': 0.05828758310170494, 'max_depth': 13, 'num_leaves': 137, 'min_child_samples': 74, 'colsample_bytree': 0.6659999399788462, 'subsample': 0.8983920037836022, 'reg_alpha': 0.011625863880566654, 'reg_lambda': 0.0041631089752664635}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.084754 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100224
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_35_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/02c5b5c3da914060895da56fa6d7d675
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:29:43,605] Trial 35 finished with value: 0.8100688924218336 and parameters: {'n_estimators': 657, 'learning_rate': 0.05402341099106062, 'max_depth': 13, 'num_leaves': 112, 'min_child_samples': 60, 'colsample_bytree': 0.6709762830819982, 'subsample': 0.9071543636913602, 'reg_alpha': 0.013931921405455181, 'reg_lambda': 0.003379971209416784}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073469 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100224
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_36_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/931a9bf8ccbe4ed29c6800f2bbdb84de
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:33:48,878] Trial 36 finished with value: 0.6749337572866985 and parameters: {'n_estimators': 568, 'learning_rate': 0.0014844171808331482, 'max_depth': 11, 'num_leaves': 100, 'min_child_samples': 60, 'colsample_bytree': 0.8315062146155363, 'subsample': 0.9502665205723341, 'reg_alpha': 0.0856617318648081, 'reg_lambda': 0.0012807043712902144}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.136903 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100299
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 982
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_37_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/2ec45672abf047109ebcf0647d6da869
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:36:24,144] Trial 37 finished with value: 0.5927927927927928 and parameters: {'n_estimators': 434, 'learning_rate': 0.0004080736883415128, 'max_depth': 6, 'num_leaves': 86, 'min_child_samples': 51, 'colsample_bytree': 0.9092459420749446, 'subsample': 0.9305201636046965, 'reg_alpha': 0.018826956134053677, 'reg_lambda': 0.00015690762107677976}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.143913 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100299
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 982
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_38_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/1409dd159309461b83842e55b8581014
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:40:18,941] Trial 38 finished with value: 0.8117647058823529 and parameters: {'n_estimators': 650, 'learning_rate': 0.036855878994265984, 'max_depth': 14, 'num_leaves': 112, 'min_child_samples': 43, 'colsample_bytree': 0.5696602289719176, 'subsample': 0.9927235118250012, 'reg_alpha': 0.0023954638951377834, 'reg_lambda': 0.0038284191245487025}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.120325 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100325
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 984
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names



🏃 View run Trial_39_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/dff8e65f263a43d8b516ed803ccfafad
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:43:19,810] Trial 39 finished with value: 0.80646528881823 and parameters: {'n_estimators': 526, 'learning_rate': 0.03677568086227212, 'max_depth': 10, 'num_leaves': 94, 'min_child_samples': 27, 'colsample_bytree': 0.5596318052182192, 'subsample': 0.9951268510576419, 'reg_alpha': 0.0025919272473836855, 'reg_lambda': 0.0030077165600188673}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.143156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100299
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 982
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_40_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/d5aaa110a80444a5ae63dbd75d7e5930
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:48:28,440] Trial 40 finished with value: 0.7722310545839958 and parameters: {'n_estimators': 920, 'learning_rate': 0.0061309096841553334, 'max_depth': 12, 'num_leaves': 50, 'min_child_samples': 46, 'colsample_bytree': 0.5645236520146294, 'subsample': 0.9503292427488169, 'reg_alpha': 0.0006774833952746703, 'reg_lambda': 0.0003969476066230525}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.169598 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100313
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 983
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_41_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/cf43660877cd44f4bb57ccc5cc3d6066
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:52:20,677] Trial 41 finished with value: 0.8118706942236354 and parameters: {'n_estimators': 653, 'learning_rate': 0.06267489138190002, 'max_depth': 14, 'num_leaves': 111, 'min_child_samples': 40, 'colsample_bytree': 0.6243121741362495, 'subsample': 0.9235582661766256, 'reg_alpha': 0.008521186795109612, 'reg_lambda': 0.00811422705142716}. Best is trial 7 with value: 0.8122946475887652.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.089679 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100313
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 983
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_42_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/8a6e3f310dc54ccfae4a9ff56b9f77c4
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 01:56:40,897] Trial 42 finished with value: 0.8124006359300477 and parameters: {'n_estimators': 671, 'learning_rate': 0.04025448163678857, 'max_depth': 14, 'num_leaves': 122, 'min_child_samples': 39, 'colsample_bytree': 0.6169781804193013, 'subsample': 0.9195900608945853, 'reg_alpha': 0.006475818259572617, 'reg_lambda': 0.007088113412153916}. Best is trial 42 with value: 0.8124006359300477.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.088112 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100325
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 984
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_43_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/ed4948227d1843c380eedae6c0edb539
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 02:01:42,964] Trial 43 finished with value: 0.8030736618971913 and parameters: {'n_estimators': 594, 'learning_rate': 0.019158991624876207, 'max_depth': 14, 'num_leaves': 120, 'min_child_samples': 37, 'colsample_bytree': 0.6149087275598959, 'subsample': 0.9729933608421992, 'reg_alpha': 0.008185044559428785, 'reg_lambda': 0.008519298143396229}. Best is trial 42 with value: 0.8124006359300477.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090242 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100325
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 984
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_44_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/18c5fea0db894f169928a8260eb9d5c0
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 02:06:14,802] Trial 44 finished with value: 0.8112347641759406 and parameters: {'n_estimators': 745, 'learning_rate': 0.04013462467629797, 'max_depth': 14, 'num_leaves': 125, 'min_child_samples': 31, 'colsample_bytree': 0.5384550528830923, 'subsample': 0.8673812074942309, 'reg_alpha': 0.002941299129386297, 'reg_lambda': 0.013441333564083703}. Best is trial 42 with value: 0.8124006359300477.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073382 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100335
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 985
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_45_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/91b33d19035245c486d1eed9b837502d
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 02:12:09,282] Trial 45 finished with value: 0.8116587175410704 and parameters: {'n_estimators': 753, 'learning_rate': 0.04128704079080681, 'max_depth': 14, 'num_leaves': 127, 'min_child_samples': 26, 'colsample_bytree': 0.5359633143892455, 'subsample': 0.9267012405056692, 'reg_alpha': 0.0011204997367478024, 'reg_lambda': 0.013472619626568871}. Best is trial 42 with value: 0.8124006359300477.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.113976 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100351
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 987
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_46_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/cc3a32b979ed4cc2835ddd4f8939a053
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 02:17:15,274] Trial 46 finished with value: 0.7983041865394807 and parameters: {'n_estimators': 834, 'learning_rate': 0.014193679518822472, 'max_depth': 12, 'num_leaves': 61, 'min_child_samples': 20, 'colsample_bytree': 0.507726121115124, 'subsample': 0.9279929739730022, 'reg_alpha': 0.000598139311964721, 'reg_lambda': 0.006448647748242632}. Best is trial 42 with value: 0.8124006359300477.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100313
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 983
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_47_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/be35c1c1bb094f309b668515b63caf7d
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 02:22:07,751] Trial 47 finished with value: 0.8093269740328564 and parameters: {'n_estimators': 782, 'learning_rate': 0.026650788853019334, 'max_depth': 13, 'num_leaves': 90, 'min_child_samples': 41, 'colsample_bytree': 0.5865327513176661, 'subsample': 0.9734172276441797, 'reg_alpha': 0.0002590524082699712, 'reg_lambda': 0.0020545679956121767}. Best is trial 42 with value: 0.8124006359300477.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059027 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100351
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 987
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_48_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/98d7b931e9964156bc87c61b329d6ab6
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 02:26:34,699] Trial 48 finished with value: 0.7194488606253312 and parameters: {'n_estimators': 737, 'learning_rate': 0.0024037383673927272, 'max_depth': 11, 'num_leaves': 31, 'min_child_samples': 19, 'colsample_bytree': 0.5155312516577535, 'subsample': 0.9958707874421352, 'reg_alpha': 0.0014249246216833929, 'reg_lambda': 0.015919691368990744}. Best is trial 42 with value: 0.8124006359300477.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.136454 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100335
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 985
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_49_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/10158d75852c4740b969c5e58d06b87e
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738


[I 2025-11-28 02:31:26,447] Trial 49 finished with value: 0.7488076311605724 and parameters: {'n_estimators': 687, 'learning_rate': 0.00734993347039244, 'max_depth': 8, 'num_leaves': 99, 'min_child_samples': 25, 'colsample_bytree': 0.5430865951802151, 'subsample': 0.9545306469075271, 'reg_alpha': 0.005789293653612766, 'reg_lambda': 0.005796890821963834}. Best is trial 42 with value: 0.8124006359300477.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.089909 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100313
[LightGBM] [Info] Number of data points in the train set: 37737, number of used features: 983
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612



X does not have valid feature names, but LGBMClassifier was fitted with feature names


Boto3 will no longer support Python 3.9 starting April 29, 2026. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.10 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



🏃 View run Trial_Best_LightGBM_SMOTE_TFIDF_Bigrams at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738/runs/7ab45e54ae06443d82a812d68d68e551
🧪 View experiment at: http://ec2-98-84-163-86.compute-1.amazonaws.com:5000/#/experiments/358947331249833738
