In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna
import pickle
import dagshub

dagshub.init(repo_owner='satyajeetrai007', repo_name='Youtube-Comment-Sentiment-Analysis', mlflow=True)
mlflow.set_experiment("Algorithms with HyperParameter tuning")

df = pd.read_csv('data_preprocessed.csv').dropna().drop_duplicates()
df.shape

(36243, 2)

In [4]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 2000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        # Log model type
        mlflow.set_tag("mlflow.runName", f"{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model

        with open(f"{model_name}_model", "wb") as f:
            pickle.dump(model, f)

        with open(f"{model_name}_model", "rb") as f:
            clf = pickle.load(f)

        mlflow.log_artifact(f"{model_name}_model", "model")


# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 10)

    model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=42)
    return accuracy_score(y_test, model.fit(X_train, y_train).predict(X_test))


# Step 7: Run Optuna for LightGBM, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    # Get the best parameters and log only the best model
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    # Log the best model with MLflow, passing the algo_name as "LightGBM"
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test)

# Run the experiment for LightGBM
run_optuna_experiment()

[I 2025-08-26 20:58:11,455] A new study created in memory with name: no-name-e241b67f-7d9c-442f-a1cc-f04efacf9e76


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.186511 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 20:59:04,705] Trial 0 finished with value: 0.6181914893617021 and parameters: {'n_estimators': 290, 'learning_rate': 0.00016209493003053793, 'max_depth': 9}. Best is trial 0 with value: 0.6181914893617021.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.313425 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 20:59:12,291] Trial 1 finished with value: 0.5476595744680851 and parameters: {'n_estimators': 78, 'learning_rate': 0.0019235493796486971, 'max_depth': 4}. Best is trial 0 with value: 0.6181914893617021.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.284988 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 20:59:36,854] Trial 2 finished with value: 0.6385106382978724 and parameters: {'n_estimators': 149, 'learning_rate': 0.0030011096570448947, 'max_depth': 10}. Best is trial 2 with value: 0.6385106382978724.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.260103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:00:08,133] Trial 3 finished with value: 0.5548936170212766 and parameters: {'n_estimators': 232, 'learning_rate': 0.0001367567931639806, 'max_depth': 5}. Best is trial 2 with value: 0.6385106382978724.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.265836 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:00:37,446] Trial 4 finished with value: 0.600531914893617 and parameters: {'n_estimators': 224, 'learning_rate': 0.0028372096957502615, 'max_depth': 5}. Best is trial 2 with value: 0.6385106382978724.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.171111 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:00:51,582] Trial 5 finished with value: 0.5981914893617021 and parameters: {'n_estimators': 100, 'learning_rate': 0.00016831965147054236, 'max_depth': 8}. Best is trial 2 with value: 0.6385106382978724.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.138423 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:01:01,796] Trial 6 finished with value: 0.8025531914893617 and parameters: {'n_estimators': 117, 'learning_rate': 0.09653254083825223, 'max_depth': 9}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105579 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:01:12,839] Trial 7 finished with value: 0.5663829787234043 and parameters: {'n_estimators': 159, 'learning_rate': 0.00013961861262735736, 'max_depth': 6}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.118482 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:01:18,485] Trial 8 finished with value: 0.62 and parameters: {'n_estimators': 149, 'learning_rate': 0.008868003325691104, 'max_depth': 4}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.116132 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:01:29,116] Trial 9 finished with value: 0.607872340425532 and parameters: {'n_estimators': 183, 'learning_rate': 0.004032410737415692, 'max_depth': 5}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.279480 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:01:35,554] Trial 10 finished with value: 0.7264893617021276 and parameters: {'n_estimators': 53, 'learning_rate': 0.06602524738483688, 'max_depth': 8}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.170269 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:01:41,003] Trial 11 finished with value: 0.7506382978723404 and parameters: {'n_estimators': 53, 'learning_rate': 0.09462838303601756, 'max_depth': 8}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.184722 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:01:49,850] Trial 12 finished with value: 0.7637234042553191 and parameters: {'n_estimators': 98, 'learning_rate': 0.05986331292282727, 'max_depth': 8}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.150376 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:02:01,850] Trial 13 finished with value: 0.7167021276595744 and parameters: {'n_estimators': 112, 'learning_rate': 0.023672752879611233, 'max_depth': 10}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.222036 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:02:13,851] Trial 14 finished with value: 0.7048936170212766 and parameters: {'n_estimators': 102, 'learning_rate': 0.030772907100452305, 'max_depth': 7}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.272525 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:02:26,548] Trial 15 finished with value: 0.7086170212765958 and parameters: {'n_estimators': 123, 'learning_rate': 0.020995364573371093, 'max_depth': 9}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.184035 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:02:40,169] Trial 16 finished with value: 0.7842553191489362 and parameters: {'n_estimators': 193, 'learning_rate': 0.05086769032532485, 'max_depth': 7}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100156 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:02:55,199] Trial 17 finished with value: 0.5969148936170213 and parameters: {'n_estimators': 202, 'learning_rate': 0.0007385637685172027, 'max_depth': 7}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106304 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:03:03,524] Trial 18 finished with value: 0.6526595744680851 and parameters: {'n_estimators': 275, 'learning_rate': 0.012577148841230132, 'max_depth': 3}. Best is trial 6 with value: 0.8025531914893617.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.096134 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:03:25,479] Trial 19 finished with value: 0.8059574468085107 and parameters: {'n_estimators': 255, 'learning_rate': 0.047039716088411655, 'max_depth': 9}. Best is trial 19 with value: 0.8059574468085107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.379675 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:03:47,839] Trial 20 finished with value: 0.6997872340425532 and parameters: {'n_estimators': 257, 'learning_rate': 0.009026757492104827, 'max_depth': 9}. Best is trial 19 with value: 0.8059574468085107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.090708 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:04:07,294] Trial 21 finished with value: 0.8036170212765957 and parameters: {'n_estimators': 243, 'learning_rate': 0.04312889293157268, 'max_depth': 10}. Best is trial 19 with value: 0.8059574468085107.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.102647 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:04:30,405] Trial 22 finished with value: 0.8004255319148936 and parameters: {'n_estimators': 245, 'learning_rate': 0.03814895849533972, 'max_depth': 10}. Best is trial 19 with value: 0.8059574468085107.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.425243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:04:49,027] Trial 23 finished with value: 0.8388297872340426 and parameters: {'n_estimators': 299, 'learning_rate': 0.09814576286986673, 'max_depth': 9}. Best is trial 23 with value: 0.8388297872340426.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092468 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:05:14,450] Trial 24 finished with value: 0.7446808510638298 and parameters: {'n_estimators': 299, 'learning_rate': 0.013282050312753065, 'max_depth': 10}. Best is trial 23 with value: 0.8388297872340426.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.195082 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:05:34,199] Trial 25 finished with value: 0.8002127659574468 and parameters: {'n_estimators': 267, 'learning_rate': 0.038883801989277995, 'max_depth': 9}. Best is trial 23 with value: 0.8388297872340426.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.095647 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:05:59,368] Trial 26 finished with value: 0.7578723404255319 and parameters: {'n_estimators': 220, 'learning_rate': 0.02077081643866848, 'max_depth': 10}. Best is trial 23 with value: 0.8388297872340426.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.192260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:06:25,020] Trial 27 finished with value: 0.6220212765957447 and parameters: {'n_estimators': 275, 'learning_rate': 0.0005467360587561846, 'max_depth': 9}. Best is trial 23 with value: 0.8388297872340426.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.116054 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:06:44,508] Trial 28 finished with value: 0.6732978723404255 and parameters: {'n_estimators': 247, 'learning_rate': 0.006522396866744775, 'max_depth': 8}. Best is trial 23 with value: 0.8388297872340426.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.120005 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-08-26 21:07:02,922] Trial 29 finished with value: 0.843936170212766 and parameters: {'n_estimators': 291, 'learning_rate': 0.0991727027630526, 'max_depth': 10}. Best is trial 29 with value: 0.843936170212766.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.094975 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 127600
[LightGBM] [Info] Number of data points in the train set: 37598, number of used features: 1935
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665




🏃 View run LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/satyajeetrai007/Youtube-Comment-Sentiment-Analysis.mlflow/#/experiments/6/runs/9d30ae3562254653b3ff1189c3d11f51
🧪 View experiment at: https://dagshub.com/satyajeetrai007/Youtube-Comment-Sentiment-Analysis.mlflow/#/experiments/6
