In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna
import pickle
import dagshub

dagshub.init(repo_owner='satyajeetrai007', repo_name='Youtube-Comment-Sentiment-Analysis', mlflow=True)
mlflow.set_experiment("Algorithms with HyperParameter tuning")

df = pd.read_csv('data_preprocessed.csv').dropna().drop_duplicates()
df.shape

(36243, 2)

In [10]:

df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

df = df.dropna(subset=['category'])

In [11]:
df.head(3)

Unnamed: 0,clean_comment,category
0,family mormon never tried explain still stare ...,1
1,buddhism much lot compatible christianity espe...,1
2,seriously say thing first get complex explain ...,2


In [12]:
# Step 1: Train-test split (only raw text + labels here)
X_train_text, X_test_text, y_train, y_test = train_test_split(
    df["clean_comment"],
    df["category"],
    test_size=0.2,
    random_state=42,
    stratify=df["category"]
)

# Step 2: TF-IDF (fit ONLY on training text, transform both train/test)
ngram_range = (1, 3)  
max_features = 2000  
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)

X_train = vectorizer.fit_transform(X_train_text)
X_test = vectorizer.transform(X_test_text)

# Step 3: Apply SMOTE **only on training data**
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# ✅ DO NOT resample test data. Keep X_test, y_test untouched.


# ---- MLflow logging function ----
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        mlflow.set_tag("mlflow.runName", f"{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        mlflow.log_param("algo_name", model_name)

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Save model artifact
        with open(f"{model_name}_model.pkl", "wb") as f:
            pickle.dump(model, f)
        mlflow.log_artifact(f"{model_name}_model.pkl", "model")


# ---- Optuna objective function ----
def objective_lightgbm(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 10)

    model = LGBMClassifier(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        max_depth=max_depth,
        random_state=42
    )

    model.fit(X_train_resampled, y_train_resampled)
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)


# ---- Run Optuna and log best model ----
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    best_params = study.best_params
    best_model = LGBMClassifier(
        n_estimators=best_params['n_estimators'],
        learning_rate=best_params['learning_rate'],
        max_depth=best_params['max_depth'],
        random_state=42
    )

    log_mlflow("LightGBM", best_model, X_train_resampled, X_test, y_train_resampled, y_test)


# Run experiment
run_optuna_experiment()

[I 2025-08-27 20:15:11,762] A new study created in memory with name: no-name-969670c3-d7b5-42d6-9dd2-8888a340e934


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.526990 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:15:45,908] Trial 0 finished with value: 0.5595254517864533 and parameters: {'n_estimators': 124, 'learning_rate': 0.0008720217317565556, 'max_depth': 6}. Best is trial 0 with value: 0.5595254517864533.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226898 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:16:08,762] Trial 1 finished with value: 0.560215202096841 and parameters: {'n_estimators': 196, 'learning_rate': 0.00016135260013141792, 'max_depth': 7}. Best is trial 1 with value: 0.560215202096841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.142815 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:16:24,210] Trial 2 finished with value: 0.6000827700372465 and parameters: {'n_estimators': 144, 'learning_rate': 0.0006008348516183281, 'max_depth': 9}. Best is trial 2 with value: 0.6000827700372465.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.186031 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:16:54,198] Trial 3 finished with value: 0.6700234515105532 and parameters: {'n_estimators': 233, 'learning_rate': 0.005705174738912723, 'max_depth': 9}. Best is trial 3 with value: 0.6700234515105532.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.280944 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:17:19,503] Trial 4 finished with value: 0.7072699682714857 and parameters: {'n_estimators': 269, 'learning_rate': 0.017108225411867704, 'max_depth': 5}. Best is trial 4 with value: 0.7072699682714857.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.296211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:17:49,372] Trial 5 finished with value: 0.5770451096702993 and parameters: {'n_estimators': 171, 'learning_rate': 0.0001429555377997847, 'max_depth': 8}. Best is trial 4 with value: 0.7072699682714857.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.490496 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:18:06,585] Trial 6 finished with value: 0.5381431921644365 and parameters: {'n_estimators': 125, 'learning_rate': 0.0002585871356104622, 'max_depth': 5}. Best is trial 4 with value: 0.7072699682714857.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.208494 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:18:26,709] Trial 7 finished with value: 0.5542833494275072 and parameters: {'n_estimators': 160, 'learning_rate': 0.001365815146583644, 'max_depth': 5}. Best is trial 4 with value: 0.7072699682714857.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.106437 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:18:50,530] Trial 8 finished with value: 0.7675541453993654 and parameters: {'n_estimators': 136, 'learning_rate': 0.09500342619709595, 'max_depth': 4}. Best is trial 8 with value: 0.7675541453993654.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.204932 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:19:09,149] Trial 9 finished with value: 0.6571940957373431 and parameters: {'n_estimators': 178, 'learning_rate': 0.0214500315831839, 'max_depth': 3}. Best is trial 8 with value: 0.7675541453993654.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.135325 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:19:22,708] Trial 10 finished with value: 0.6742999034349565 and parameters: {'n_estimators': 53, 'learning_rate': 0.09532034083063473, 'max_depth': 3}. Best is trial 8 with value: 0.7675541453993654.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.963390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:19:42,118] Trial 11 finished with value: 0.7919713063870879 and parameters: {'n_estimators': 278, 'learning_rate': 0.07624813911984368, 'max_depth': 4}. Best is trial 11 with value: 0.7919713063870879.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.215424 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:20:00,356] Trial 12 finished with value: 0.8090771140847013 and parameters: {'n_estimators': 299, 'learning_rate': 0.09744767448923465, 'max_depth': 4}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.231814 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:20:28,515] Trial 13 finished with value: 0.730859428886743 and parameters: {'n_estimators': 299, 'learning_rate': 0.027965189374035804, 'max_depth': 4}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.563780 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:22:04,824] Trial 14 finished with value: 0.6598151469168161 and parameters: {'n_estimators': 299, 'learning_rate': 0.006100640404090476, 'max_depth': 6}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.266441 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:22:38,100] Trial 15 finished with value: 0.7544488895020003 and parameters: {'n_estimators': 248, 'learning_rate': 0.045031116581070764, 'max_depth': 4}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.562889 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:22:58,941] Trial 16 finished with value: 0.6199475789764105 and parameters: {'n_estimators': 215, 'learning_rate': 0.009132189285442826, 'max_depth': 3}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.309433 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:24:01,211] Trial 17 finished with value: 0.7966616084977238 and parameters: {'n_estimators': 267, 'learning_rate': 0.049570916985521726, 'max_depth': 7}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.423909 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:25:54,309] Trial 18 finished with value: 0.6541591943716375 and parameters: {'n_estimators': 258, 'learning_rate': 0.002725056699186159, 'max_depth': 10}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.308382 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:26:22,813] Trial 19 finished with value: 0.7653469444061249 and parameters: {'n_estimators': 226, 'learning_rate': 0.0333919375489099, 'max_depth': 7}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.298988 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:26:39,094] Trial 20 finished with value: 0.7399641329838599 and parameters: {'n_estimators': 90, 'learning_rate': 0.05191151154179142, 'max_depth': 8}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.299288 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:27:47,165] Trial 21 finished with value: 0.8038350117257552 and parameters: {'n_estimators': 278, 'learning_rate': 0.06284675291422656, 'max_depth': 6}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.562196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:32:00,837] Trial 22 finished with value: 0.7195475237963858 and parameters: {'n_estimators': 283, 'learning_rate': 0.017362084239906865, 'max_depth': 6}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.144760 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:36:25,275] Trial 23 finished with value: 0.7874189543385295 and parameters: {'n_estimators': 248, 'learning_rate': 0.04405505143529286, 'max_depth': 7}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 5.221988 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:38:49,397] Trial 24 finished with value: 0.7082356187060285 and parameters: {'n_estimators': 281, 'learning_rate': 0.010665388357334963, 'max_depth': 8}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.459975 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:39:49,153] Trial 25 finished with value: 0.7759690991860946 and parameters: {'n_estimators': 210, 'learning_rate': 0.057874766835006385, 'max_depth': 5}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.233450 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:41:50,954] Trial 26 finished with value: 0.753069388881225 and parameters: {'n_estimators': 261, 'learning_rate': 0.02887365558119918, 'max_depth': 6}. Best is trial 12 with value: 0.8090771140847013.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.628202 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:43:26,991] Trial 27 finished with value: 0.8235618706028418 and parameters: {'n_estimators': 243, 'learning_rate': 0.09948821804039844, 'max_depth': 7}. Best is trial 27 with value: 0.8235618706028418.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.664085 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:45:12,973] Trial 28 finished with value: 0.8312870740791833 and parameters: {'n_estimators': 245, 'learning_rate': 0.0945344404226141, 'max_depth': 9}. Best is trial 28 with value: 0.8312870740791833.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.432764 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-08-27 20:47:18,671] Trial 29 finished with value: 0.8281142226514002 and parameters: {'n_estimators': 195, 'learning_rate': 0.09915432937496137, 'max_depth': 10}. Best is trial 28 with value: 0.8312870740791833.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.879391 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 126964
[LightGBM] [Info] Number of data points in the train set: 37599, number of used features: 1948
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/satyajeetrai007/Youtube-Comment-Sentiment-Analysis.mlflow/#/experiments/6/runs/3a8a846e7fa14e619bd9ebfa66dc34f6
🧪 View experiment at: https://dagshub.com/satyajeetrai007/Youtube-Comment-Sentiment-Analysis.mlflow/#/experiments/6
