In [None]:
import mlflow
mlflow.set_tracking_uri("https://dagshub.com/prasu202324/YTintel-extension.mlflow")

import dagshub
dagshub.init(repo_owner="prasu202324", repo_name="YTintel-extension", mlflow=True)

# Set or create an experiment
mlflow.set_experiment("LightGBM HP Tuning")


Experiment restored!


<Experiment: artifact_location='mlflow-artifacts:/ab7793e0dd8e4bafba950e69e130bcc4', creation_time=1771783504156, experiment_id='6', last_update_time=1771835754427, lifecycle_stage='active', name='LightGBM HP Tuning', tags={}, workspace='default'>

In [4]:
import pandas as pd

df = pd.read_csv('reddit_preprocessing.csv').dropna(subset=['clean_comment'])
df.shape

(36662, 2)

In [5]:

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt
     

In [6]:

# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

In [7]:
# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [8]:
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [9]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy

In [10]:

# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    # Hyperparameter space to explore
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    num_leaves = trial.suggest_int('num_leaves', 20, 150)
    min_child_samples = trial.suggest_int('min_child_samples', 10, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True)  # L2 regularization

    # Log trial parameters
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'num_leaves': num_leaves,
        'min_child_samples': min_child_samples,
        'colsample_bytree': colsample_bytree,
        'subsample': subsample,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda
    }

    # Create LightGBM model
    model = LGBMClassifier(n_estimators=n_estimators,
                           learning_rate=learning_rate,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           colsample_bytree=colsample_bytree,
                           subsample=subsample,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda,
                           random_state=42)

    # Log each trial as a separate run in MLflow
    accuracy = log_mlflow("LightGBM", model, X_train, X_test, y_train, y_test, params, trial.number)

    return accuracy



In [11]:
# Step 7: Run Optuna for LightGBM, log the best model, and plot the importance of each parameter
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=100)  # Increased to 100 trials

    # Get the best parameters
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'],
                                learning_rate=best_params['learning_rate'],
                                max_depth=best_params['max_depth'],
                                num_leaves=best_params['num_leaves'],
                                min_child_samples=best_params['min_child_samples'],
                                colsample_bytree=best_params['colsample_bytree'],
                                subsample=best_params['subsample'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                random_state=42)

    # Log the best model with MLflow and print the classification report
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test, best_params, "Best")

    # Plot parameter importance
    optuna.visualization.plot_param_importances(study).show()

    # Plot optimization history
    optuna.visualization.plot_optimization_history(study).show()
     

In [None]:

# Run the experiment for LightGBM
run_optuna_experiment()

[32m[I 2026-02-23 14:06:26,126][0m A new study created in memory with name: no-name-5a0649b0-88e7-487e-86f2-0f7905667db7[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040656 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_0_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/cdce2aeac217474a935121fcfa252d0a
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:07:24,561][0m Trial 0 finished with value: 0.7366307334601564 and parameters: {'n_estimators': 165, 'learning_rate': 0.023169510326043537, 'max_depth': 8, 'num_leaves': 116, 'min_child_samples': 51, 'colsample_bytree': 0.5010890006033746, 'subsample': 0.7745787640196993, 'reg_alpha': 0.0035047154947546724, 'reg_lambda': 0.0011710804688286032}. Best is trial 0 with value: 0.7366307334601564.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.045745 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_1_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/a094d5439a974e4387d5492e28c4ea77
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:08:33,560][0m Trial 1 finished with value: 0.6786091735362503 and parameters: {'n_estimators': 248, 'learning_rate': 0.01468989136044178, 'max_depth': 4, 'num_leaves': 138, 'min_child_samples': 74, 'colsample_bytree': 0.8572897164805038, 'subsample': 0.8123254645500186, 'reg_alpha': 0.0015339648959279877, 'reg_lambda': 5.922025610022475}. Best is trial 0 with value: 0.7366307334601564.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044643 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_2_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/31e5e18af55c489080ee73aa2ba6b221
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:09:51,540][0m Trial 2 finished with value: 0.7293384062566054 and parameters: {'n_estimators': 408, 'learning_rate': 0.004413419589863355, 'max_depth': 14, 'num_leaves': 131, 'min_child_samples': 36, 'colsample_bytree': 0.7656109638283267, 'subsample': 0.8622594821492313, 'reg_alpha': 0.334889387257571, 'reg_lambda': 0.1339564157979948}. Best is trial 0 with value: 0.7366307334601564.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043283 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98718
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_3_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/7f33c7df46024766b5a9f64a6039e263
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:11:00,776][0m Trial 3 finished with value: 0.8117734094271825 and parameters: {'n_estimators': 686, 'learning_rate': 0.06823848847952121, 'max_depth': 11, 'num_leaves': 104, 'min_child_samples': 88, 'colsample_bytree': 0.9562625033610535, 'subsample': 0.973925121117048, 'reg_alpha': 0.0023197946378910425, 'reg_lambda': 0.032393105326898104}. Best is trial 3 with value: 0.8117734094271825.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043001 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98399
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 944
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_4_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/9dcae786a86446aeb27be288bc46960a
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:12:10,173][0m Trial 4 finished with value: 0.8097653772986684 and parameters: {'n_estimators': 819, 'learning_rate': 0.04227721199041577, 'max_depth': 11, 'num_leaves': 134, 'min_child_samples': 98, 'colsample_bytree': 0.7349952561502526, 'subsample': 0.9430950273453613, 'reg_alpha': 0.01047414099877167, 'reg_lambda': 4.660004909428659}. Best is trial 3 with value: 0.8117734094271825.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042994 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98993
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_5_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/6986d361f3544dae9844a02bd1d2799c
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:13:20,656][0m Trial 5 finished with value: 0.7043965335024308 and parameters: {'n_estimators': 631, 'learning_rate': 0.005490115932710248, 'max_depth': 6, 'num_leaves': 130, 'min_child_samples': 28, 'colsample_bytree': 0.5627712735669039, 'subsample': 0.968532902772065, 'reg_alpha': 0.05398449420147051, 'reg_lambda': 4.53837768758099}. Best is trial 3 with value: 0.8117734094271825.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042878 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98983
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_6_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/95607758f1fe46bfbd883d5444b13869
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:14:29,531][0m Trial 6 finished with value: 0.8136757556541957 and parameters: {'n_estimators': 601, 'learning_rate': 0.09254510231071608, 'max_depth': 6, 'num_leaves': 124, 'min_child_samples': 30, 'colsample_bytree': 0.9562298819625964, 'subsample': 0.6033341310778805, 'reg_alpha': 3.6631998521218674, 'reg_lambda': 0.016870372174041085}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.045139 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99001
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 969
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_7_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/38b16e26b2504725b053ca5c835b4fad
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:15:38,532][0m Trial 7 finished with value: 0.6049461001902346 and parameters: {'n_estimators': 868, 'learning_rate': 0.0010867283327614264, 'max_depth': 4, 'num_leaves': 97, 'min_child_samples': 22, 'colsample_bytree': 0.9309401887104521, 'subsample': 0.6410944450805101, 'reg_alpha': 0.01820345450830562, 'reg_lambda': 2.393683172629853}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043070 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_8_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/6574b2d602494ddeba5ed3c97c1f27b6
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:16:47,532][0m Trial 8 finished with value: 0.7581906573663073 and parameters: {'n_estimators': 322, 'learning_rate': 0.019665664918873556, 'max_depth': 7, 'num_leaves': 96, 'min_child_samples': 38, 'colsample_bytree': 0.8768000181991145, 'subsample': 0.6387327391402575, 'reg_alpha': 0.04288776770246451, 'reg_lambda': 1.8973061436363052}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042579 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98843
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_9_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/0a83ef896103403b960276a46db7747f
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:17:59,904][0m Trial 9 finished with value: 0.694567744662862 and parameters: {'n_estimators': 618, 'learning_rate': 0.00013818712444485946, 'max_depth': 14, 'num_leaves': 75, 'min_child_samples': 59, 'colsample_bytree': 0.6931073295502603, 'subsample': 0.65027837282275, 'reg_alpha': 2.138248397675997, 'reg_lambda': 9.400481000537795}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043776 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99089
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 983
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_10_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/ddaf65950dc9493bbc2519c95dd2f21d
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:19:09,068][0m Trial 10 finished with value: 0.5802155992390615 and parameters: {'n_estimators': 463, 'learning_rate': 0.0011920817697455481, 'max_depth': 3, 'num_leaves': 23, 'min_child_samples': 12, 'colsample_bytree': 0.9867092780247556, 'subsample': 0.5049618899150821, 'reg_alpha': 0.00012867563980536426, 'reg_lambda': 0.00010140383948542999}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043997 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98331
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 942
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_11_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/bf3486057d39452eac4737d023da7fc2
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:20:17,919][0m Trial 11 finished with value: 0.8106108645106743 and parameters: {'n_estimators': 729, 'learning_rate': 0.09403866668412732, 'max_depth': 11, 'num_leaves': 67, 'min_child_samples': 100, 'colsample_bytree': 0.9963260102815068, 'subsample': 0.5102777454348377, 'reg_alpha': 5.829559685678781, 'reg_lambda': 0.01826317515189149}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044369 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98747
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 955
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_12_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/fb208cc354394e1eb542e048fb072841
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:21:26,854][0m Trial 12 finished with value: 0.8134643838511942 and parameters: {'n_estimators': 530, 'learning_rate': 0.09094010829367208, 'max_depth': 10, 'num_leaves': 104, 'min_child_samples': 81, 'colsample_bytree': 0.8474707921491501, 'subsample': 0.6883188365929727, 'reg_alpha': 0.0005352862262537609, 'reg_lambda': 0.03449310366645037}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044151 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98797
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 957
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_13_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/4701779337e44472ae0b7a212b1efef0
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:22:37,068][0m Trial 13 finished with value: 0.813147326146692 and parameters: {'n_estimators': 472, 'learning_rate': 0.09070735880994522, 'max_depth': 9, 'num_leaves': 61, 'min_child_samples': 70, 'colsample_bytree': 0.8316553350928901, 'subsample': 0.7077720693922542, 'reg_alpha': 0.00017386465521461594, 'reg_lambda': 0.006555493468883039}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043227 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_14_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/1a5e39bcd85f4f73a74e98a678de492b
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:23:46,067][0m Trial 14 finished with value: 0.7748890298034242 and parameters: {'n_estimators': 991, 'learning_rate': 0.010357603529232963, 'max_depth': 6, 'num_leaves': 148, 'min_child_samples': 46, 'colsample_bytree': 0.9046482283626719, 'subsample': 0.5839466542789382, 'reg_alpha': 0.31488196681624714, 'reg_lambda': 0.2479142611953101}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044234 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_15_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/c509de05447d4007bd65630bfb21cd31
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:25:06,102][0m Trial 15 finished with value: 0.6697315578101881 and parameters: {'n_estimators': 542, 'learning_rate': 0.0014485710352480621, 'max_depth': 10, 'num_leaves': 115, 'min_child_samples': 77, 'colsample_bytree': 0.8064578981730968, 'subsample': 0.71545208172173, 'reg_alpha': 0.0006153616782418192, 'reg_lambda': 0.0023377994397672185}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043997 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98843
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_16_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/f8c46da8db334ad498e0812f688fda93
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:26:11,924][0m Trial 16 finished with value: 0.6364404988374551 and parameters: {'n_estimators': 338, 'learning_rate': 0.0002674808988625881, 'max_depth': 6, 'num_leaves': 41, 'min_child_samples': 62, 'colsample_bytree': 0.6283372989568738, 'subsample': 0.5607877189745861, 'reg_alpha': 0.6676567681005909, 'reg_lambda': 0.22637250022890562}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044206 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98718
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_17_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/444cad62a29742d3b6ad5ffe8c1efd66
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:27:21,645][0m Trial 17 finished with value: 0.8080744028746565 and parameters: {'n_estimators': 561, 'learning_rate': 0.03264325657704475, 'max_depth': 13, 'num_leaves': 84, 'min_child_samples': 85, 'colsample_bytree': 0.799002673358867, 'subsample': 0.6916667632461255, 'reg_alpha': 0.0950860182616905, 'reg_lambda': 0.04699377159271856}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043949 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99111
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_18_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/45d46b9b3f894c9f94b44946b9ad6f89
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:28:41,647][0m Trial 18 finished with value: 0.7687592475163814 and parameters: {'n_estimators': 796, 'learning_rate': 0.008617767530268406, 'max_depth': 9, 'num_leaves': 113, 'min_child_samples': 10, 'colsample_bytree': 0.9123403554179101, 'subsample': 0.6029265230122414, 'reg_alpha': 4.0328637636902664, 'reg_lambda': 0.0006400185606887557}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060672 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_19_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/955e933496884e488a77e01b4e62fa1e
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:29:48,977][0m Trial 19 finished with value: 0.7610441767068273 and parameters: {'n_estimators': 105, 'learning_rate': 0.034847772482912355, 'max_depth': 12, 'num_leaves': 91, 'min_child_samples': 43, 'colsample_bytree': 0.7169263731648858, 'subsample': 0.7673454691416675, 'reg_alpha': 0.0005753488348094306, 'reg_lambda': 0.010054086127777365}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042717 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98993
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_20_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/2d2e43eb595a43f8ad54868a1f6111a2
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:31:00,763][0m Trial 20 finished with value: 0.6772352568167407 and parameters: {'n_estimators': 511, 'learning_rate': 0.0025895444204971, 'max_depth': 8, 'num_leaves': 150, 'min_child_samples': 26, 'colsample_bytree': 0.6713703655455006, 'subsample': 0.8489424414130514, 'reg_alpha': 0.007368745599173893, 'reg_lambda': 0.7604930258503522}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044922 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_21_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/423fff0a4e2c4ea68384da6b6598d3b0
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:32:09,656][0m Trial 21 finished with value: 0.8077573451701543 and parameters: {'n_estimators': 420, 'learning_rate': 0.06524314127560493, 'max_depth': 9, 'num_leaves': 59, 'min_child_samples': 77, 'colsample_bytree': 0.8385277724644461, 'subsample': 0.7052053619333496, 'reg_alpha': 0.00014195084225269324, 'reg_lambda': 0.005442380440222005}. Best is trial 6 with value: 0.8136757556541957.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043310 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98821
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_22_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/b3ce30faf811418fa45ef17fe3dcb7fc
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:33:18,653][0m Trial 22 finished with value: 0.8170577045022194 and parameters: {'n_estimators': 628, 'learning_rate': 0.09754661461461496, 'max_depth': 10, 'num_leaves': 53, 'min_child_samples': 66, 'colsample_bytree': 0.8207708981842748, 'subsample': 0.6819339545799052, 'reg_alpha': 0.00035824933188251197, 'reg_lambda': 0.07801824664032224}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041585 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98821
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_23_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/ab1bda5ef782493aaa9a35212bfbf7c7
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:34:27,644][0m Trial 23 finished with value: 0.8120904671316846 and parameters: {'n_estimators': 635, 'learning_rate': 0.04741870594548399, 'max_depth': 10, 'num_leaves': 43, 'min_child_samples': 65, 'colsample_bytree': 0.78695790980968, 'subsample': 0.6647423535175216, 'reg_alpha': 0.0005916236718110792, 'reg_lambda': 0.07235406034719306}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043269 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98881
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 961
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_24_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/8fe48afaf9144f30a69bba6dd8ee7fdc
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:35:36,659][0m Trial 24 finished with value: 0.8145212428662016 and parameters: {'n_estimators': 584, 'learning_rate': 0.09555503998633293, 'max_depth': 7, 'num_leaves': 82, 'min_child_samples': 53, 'colsample_bytree': 0.8870420829638577, 'subsample': 0.5511793164006977, 'reg_alpha': 0.0003245996463891553, 'reg_lambda': 0.01727768537244912}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043779 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98881
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 961
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_25_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/3c142df557a04736b42b4f82d526366d
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:36:45,698][0m Trial 25 finished with value: 0.7898964278165292 and parameters: {'n_estimators': 733, 'learning_rate': 0.026553669109472315, 'max_depth': 5, 'num_leaves': 48, 'min_child_samples': 53, 'colsample_bytree': 0.9493103357658075, 'subsample': 0.5616719212419746, 'reg_alpha': 1.2798804506083183, 'reg_lambda': 0.013175050775655584}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043382 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_26_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/0fca21cdfaaa4b2ebfaf4090cd723f9f
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:37:54,653][0m Trial 26 finished with value: 0.8099767491016698 and parameters: {'n_estimators': 599, 'learning_rate': 0.051537454676292595, 'max_depth': 7, 'num_leaves': 81, 'min_child_samples': 35, 'colsample_bytree': 0.9014809557674902, 'subsample': 0.6035163256560113, 'reg_alpha': 0.16960717072193657, 'reg_lambda': 0.409671948415002}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042446 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98797
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 957
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_27_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/6269440c30fb49a1bfb017dc6f5b9ac7
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:39:03,662][0m Trial 27 finished with value: 0.7792221517649546 and parameters: {'n_estimators': 694, 'learning_rate': 0.014044194072576755, 'max_depth': 7, 'num_leaves': 25, 'min_child_samples': 68, 'colsample_bytree': 0.8856914538297329, 'subsample': 0.5486035939674163, 'reg_alpha': 0.0013801650097704748, 'reg_lambda': 0.10208514636716697}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044282 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99038
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_28_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/efbdcdeb0bd24feca1886a03ad3a9038
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:40:23,301][0m Trial 28 finished with value: 0.636863242443458 and parameters: {'n_estimators': 933, 'learning_rate': 0.0005859913474968903, 'max_depth': 8, 'num_leaves': 76, 'min_child_samples': 19, 'colsample_bytree': 0.9733350717713403, 'subsample': 0.6206340199677961, 'reg_alpha': 9.318498660871292, 'reg_lambda': 0.0030083783565270444}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042970 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_29_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/6fac7e16aa574bdd9d800c3aab2a9f8a
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:41:32,205][0m Trial 29 finished with value: 0.7571337983512999 and parameters: {'n_estimators': 371, 'learning_rate': 0.02282880769927514, 'max_depth': 5, 'num_leaves': 35, 'min_child_samples': 49, 'colsample_bytree': 0.934324098647102, 'subsample': 0.7324831609870834, 'reg_alpha': 0.003964567921959158, 'reg_lambda': 0.0015704428171733808}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042803 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98843
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_30_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/12d049b29fa44e828e60364a7bcb741f
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:42:41,234][0m Trial 30 finished with value: 0.7934897484675544 and parameters: {'n_estimators': 255, 'learning_rate': 0.048063185949804584, 'max_depth': 8, 'num_leaves': 56, 'min_child_samples': 59, 'colsample_bytree': 0.7673658843802614, 'subsample': 0.5395835071746725, 'reg_alpha': 0.0002976204969162083, 'reg_lambda': 0.0005644422306592694}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041901 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98718
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_31_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/43f7659cc1b447ba9f2c2c558fcdda66
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:43:50,212][0m Trial 31 finished with value: 0.8124075248361868 and parameters: {'n_estimators': 508, 'learning_rate': 0.09509441995072396, 'max_depth': 10, 'num_leaves': 120, 'min_child_samples': 85, 'colsample_bytree': 0.8537036019848312, 'subsample': 0.6795484402765538, 'reg_alpha': 0.0007348593959270039, 'reg_lambda': 0.02395301565541237}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042938 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98657
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 952
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_32_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/b8c1d02162b5416483d91c717dc8cb43
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:44:59,267][0m Trial 32 finished with value: 0.8113506658211794 and parameters: {'n_estimators': 578, 'learning_rate': 0.09793543082133561, 'max_depth': 12, 'num_leaves': 108, 'min_child_samples': 91, 'colsample_bytree': 0.8263917421253387, 'subsample': 0.7666097285576564, 'reg_alpha': 0.0002285831511183716, 'reg_lambda': 0.07104660331474474}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043253 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_33_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/4e04244f5ed64ad5881dc8130b77126e
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:46:08,219][0m Trial 33 finished with value: 0.8142041851616995 and parameters: {'n_estimators': 670, 'learning_rate': 0.06382941389381186, 'max_depth': 10, 'num_leaves': 124, 'min_child_samples': 73, 'colsample_bytree': 0.8712862767733144, 'subsample': 0.5865983047161522, 'reg_alpha': 0.001411465494569907, 'reg_lambda': 0.04115349581626023}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_34_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/4360e706439a46dc98381fac76897334
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:47:17,307][0m Trial 34 finished with value: 0.8095540054956669 and parameters: {'n_estimators': 665, 'learning_rate': 0.059193744650833, 'max_depth': 7, 'num_leaves': 124, 'min_child_samples': 71, 'colsample_bytree': 0.8815671883424033, 'subsample': 0.5779673553216573, 'reg_alpha': 0.0014180405492751347, 'reg_lambda': 0.16309695690573886}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041983 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98863
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_35_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/a67fe14affd6438d811f3cde216bba14
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:49:45,486][0m Trial 35 finished with value: 0.7783766645529486 and parameters: {'n_estimators': 768, 'learning_rate': 0.017501109368824194, 'max_depth': 5, 'num_leaves': 141, 'min_child_samples': 56, 'colsample_bytree': 0.9594913297853012, 'subsample': 0.5210456968278452, 'reg_alpha': 0.0033361264148647784, 'reg_lambda': 0.006431684665670131}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044958 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_36_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/9122f7ecf5e248fa9f033f00e6bdd9ee
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:50:51,351][0m Trial 36 finished with value: 0.8119847812301839 and parameters: {'n_estimators': 859, 'learning_rate': 0.03334281393437406, 'max_depth': 9, 'num_leaves': 73, 'min_child_samples': 40, 'colsample_bytree': 0.8630258533126282, 'subsample': 0.6107999208012588, 'reg_alpha': 0.006869006947684816, 'reg_lambda': 0.04291457937808425}. Best is trial 22 with value: 0.8170577045022194.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042590 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98983
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_37_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/15e253e60a7b428982e27d30e0695986
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:52:06,294][0m Trial 37 finished with value: 0.8191714225322342 and parameters: {'n_estimators': 671, 'learning_rate': 0.07115996912865756, 'max_depth': 11, 'num_leaves': 126, 'min_child_samples': 32, 'colsample_bytree': 0.5001082413981426, 'subsample': 0.8172259242914355, 'reg_alpha': 0.01819116501798817, 'reg_lambda': 0.4626306764277623}. Best is trial 37 with value: 0.8191714225322342.[0m


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044600 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98821
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_38_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/9f09c20f1c5b4fe196be2ad8e8f342ef
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:53:24,896][0m Trial 38 finished with value: 0.7655886704713591 and parameters: {'n_estimators': 676, 'learning_rate': 0.006120925690415668, 'max_depth': 12, 'num_leaves': 135, 'min_child_samples': 64, 'colsample_bytree': 0.6191884229735246, 'subsample': 0.8002520266127708, 'reg_alpha': 0.0014548680747855068, 'reg_lambda': 0.7043376992725615}. Best is trial 37 with value: 0.8191714225322342.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_39_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/50d9987144554f2e9c31a431e4f91027
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:54:41,422][0m Trial 39 finished with value: 0.8186429930247305 and parameters: {'n_estimators': 744, 'learning_rate': 0.06323475684817126, 'max_depth': 11, 'num_leaves': 93, 'min_child_samples': 50, 'colsample_bytree': 0.5183241987281701, 'subsample': 0.8940019736186984, 'reg_alpha': 0.00030965890300474166, 'reg_lambda': 0.9648547286007487}. Best is trial 37 with value: 0.8191714225322342.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044445 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98983
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Trial_40_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6/runs/2f9525b6433144748a08b6b8e7ffd877
🧪 View experiment at: https://dagshub.com/prasu202324/YTintel-extension.mlflow/#/experiments/6


[32m[I 2026-02-23 14:56:06,513][0m Trial 40 finished with value: 0.7971887550200804 and parameters: {'n_estimators': 740, 'learning_rate': 0.011894247148615384, 'max_depth': 13, 'num_leaves': 91, 'min_child_samples': 33, 'colsample_bytree': 0.5006799555097616, 'subsample': 0.8899060798765078, 'reg_alpha': 0.017269213747203617, 'reg_lambda': 1.6960112415438693}. Best is trial 37 with value: 0.8191714225322342.[0m


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


