<a href="https://colab.research.google.com/github/raj-coding1/comment-analysis/blob/main/Exp_6_lightgbm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mlflow dvc dagshub optuna

Collecting mlflow
  Downloading mlflow-3.6.0-py3-none-any.whl.metadata (31 kB)
Collecting dvc
  Downloading dvc-3.64.0-py3-none-any.whl.metadata (17 kB)
Collecting dagshub
  Downloading dagshub-0.6.3-py3-none-any.whl.metadata (12 kB)
Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting mlflow-skinny==3.6.0 (from mlflow)
  Downloading mlflow_skinny-3.6.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.6.0 (from mlflow)
  Downloading mlflow_tracing-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting huey<3,>=2.5.0 (from mlflow)
  D

In [None]:
import pandas as pd
# from gensim.models import Word2Vec
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import mlflow
import optuna
import dvc
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
df = pd.read_csv('/content/df.csv')
df.head()
df.dropna(axis = 0,inplace = True)
ngram_choice= (1,3)
max_feat = 1000
vectorizer = TfidfVectorizer(
            max_features=max_feat,
            ngram_range=ngram_choice
        )
X = vectorizer.fit_transform(df['cleaned_comment'])
df["category"] = df["category"].replace(-1, 2)
y = df['category']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)
import dagshub
mlflow.set_tracking_uri('https://dagshub.com/raj-coding1/youtube-comment-analysis.mlflow')
dagshub.init(repo_owner='raj-coding1', repo_name='youtube-comment-analysis', mlflow=True)

mlflow.set_experiment('EXP-11: Final LightGBM')


def log_ml_model(model_name, model, X_train,X_test,y_train,y_test, params, trial_number):
  with mlflow.start_run():
    mlflow.set_tag('mlflow.runname',f'{model_name}_tfidf_smote_trigram')
    mlflow.set_tag("experiment_type", "algorithm comparison")
    mlflow.log_param("algo_name", model_name)

    for key,value in params.items():
      mlflow.log_param(key,value)

    # Train model

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    # Correct metric logging
    mlflow.log_metric("accuracy", accuracy)

    # Correct classification report
    class_rep = classification_report(y_test, y_pred, output_dict=True)

    for label, info in class_rep.items():
        if isinstance(info, dict):
            for metric, value in info.items():
                mlflow.log_metric(f"{label}_{metric}", value)

    # Save confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.xlabel("Prediction")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")

    plt.savefig("ConfusionMatrix.png")
    mlflow.log_artifact("/content/ConfusionMatrix.png")
    plt.close()

    # Save dataset (if needed)
    df.to_csv("df1.csv", index=False)
    mlflow.log_artifact("/content/df1.csv")
    # mlflow.sklearn.log_model(model,f'{model_name}_model')
    print(f"accuracy : {accuracy}")


def objective(trial):
  n_estimators = trial.suggest_int('n_estimators',100,1000)
  learning_rate = trial.suggest_float('learning_rate',1e-4,1e-1, log = True)
  max_depth = trial.suggest_int('max_depth',3, 15)
  num_leaves = trial.suggest_int('num_leaves',20,150)
  min_child_samples = trial.suggest_int('min_child_samples',10,100)
  cosample_bytree = trial.suggest_float('cosample_bytree',0.5,1.0)
  subsample = trial.suggest_float('subsample',0.5,1.0)
  reg_alpha = trial.suggest_float('reg_alpha',1e-4,10.0,log = True)
  reg_lamda = trial.suggest_float('reg_lamda',1e-4,10.0,log = True)

  params = {
      'n_estimators' : n_estimators,
      'learning_rate' : learning_rate,
      'max_depth' : max_depth,
      'num_leaves' : num_leaves,
      'min_child_samples': min_child_samples,
      'cosample_bytree' : cosample_bytree,
      'subsample' : subsample,
      'reg_alpha' : reg_alpha,
      'reg_lamda' : reg_lamda
        }

  model = LGBMClassifier(n_estimators = n_estimators, learning_rate = learning_rate, max_depth = max_depth, random_state = 42,
                         num_leaves = num_leaves,
                         min_child_samples = min_child_samples,
                         cosample_bytree = cosample_bytree,
                         subsample = subsample,
                         reg_alpha = reg_alpha,
                         reg_lamda = reg_lamda
                         )

  accuracy = log_ml_model('LGBMClassifier', model, X_train,X_test,y_train,y_test,params, trial.number)
  return accuracy

def run_optuna_exp():
  study = optuna.create_study(direction = 'maximize')
  study.optimize(objective,n_trials = 30)
  best_params = study.best_params
  best_model = LGBMClassifier(n_estimators = best_params['n_estimators'], learning_rate = best_params['learning_rate'], max_depth = best_params['max_depth']
                              num_leaves = best_params['num_leaves'],
                              min_child_samples = best_params['min_child_samples'],
                              cosample_bytree = best_params['cosample_bytree'],
                              subsample = best_params['subsample'],
                              reg_alpha = best_params['reg_alpha'],
                              reg_lamda = best_params['reg_lamda']
                              )
  log_ml_model('LGBMClassifier', best_model, X_train,X_test,y_train,y_test, best_params, 'Best')

run_optuna_exp()

[I 2025-11-30 11:07:55,370] A new study created in memory with name: no-name-29524a00-a119-494c-b289-18ccb3f4ecc7


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.176912 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:08:04,803] Trial 0 finished with value: 0.4331105959361789 and parameters: {'n_estimators': 191, 'learning_rate': 0.00025947481754215484, 'max_depth': 4}. Best is trial 0 with value: 0.4331105959361789.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.180689 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:08:16,935] Trial 1 finished with value: 0.6930315014318833 and parameters: {'n_estimators': 201, 'learning_rate': 0.020016759851003853, 'max_depth': 5}. Best is trial 1 with value: 0.6930315014318833.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.199192 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:08:26,209] Trial 2 finished with value: 0.6485749352243284 and parameters: {'n_estimators': 109, 'learning_rate': 0.005919636688408279, 'max_depth': 9}. Best is trial 1 with value: 0.6930315014318833.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.310272 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:08:40,230] Trial 3 finished with value: 0.7545342970135006 and parameters: {'n_estimators': 297, 'learning_rate': 0.04518323567571173, 'max_depth': 4}. Best is trial 3 with value: 0.7545342970135006.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.307040 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:08:57,302] Trial 4 finished with value: 0.43338333560616393 and parameters: {'n_estimators': 220, 'learning_rate': 0.0007236808857164227, 'max_depth': 8}. Best is trial 3 with value: 0.7545342970135006.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.179868 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:09:03,362] Trial 5 finished with value: 0.6360289104050184 and parameters: {'n_estimators': 111, 'learning_rate': 0.011980146626957119, 'max_depth': 4}. Best is trial 3 with value: 0.7545342970135006.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.203577 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:09:21,168] Trial 6 finished with value: 0.7145779353606982 and parameters: {'n_estimators': 231, 'learning_rate': 0.013644464608578068, 'max_depth': 9}. Best is trial 3 with value: 0.7545342970135006.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.179529 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:09:28,727] Trial 7 finished with value: 0.43338333560616393 and parameters: {'n_estimators': 105, 'learning_rate': 0.001561333598294905, 'max_depth': 8}. Best is trial 3 with value: 0.7545342970135006.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.182180 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:09:51,116] Trial 8 finished with value: 0.4331105959361789 and parameters: {'n_estimators': 258, 'learning_rate': 0.00024254156255053188, 'max_depth': 10}. Best is trial 3 with value: 0.7545342970135006.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.179456 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:10:11,618] Trial 9 finished with value: 0.4331105959361789 and parameters: {'n_estimators': 255, 'learning_rate': 0.00022570466508724406, 'max_depth': 9}. Best is trial 3 with value: 0.7545342970135006.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.195805 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:10:29,538] Trial 10 finished with value: 0.7782626483021956 and parameters: {'n_estimators': 299, 'learning_rate': 0.04703046936007497, 'max_depth': 6}. Best is trial 10 with value: 0.7782626483021956.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.177399 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:10:49,289] Trial 11 finished with value: 0.7976271648711305 and parameters: {'n_estimators': 298, 'learning_rate': 0.09467743542060351, 'max_depth': 6}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.176766 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:11:08,143] Trial 12 finished with value: 0.7962634665212055 and parameters: {'n_estimators': 299, 'learning_rate': 0.07967465432482758, 'max_depth': 6}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.298862 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:11:12,288] Trial 13 finished with value: 0.6993045138415382 and parameters: {'n_estimators': 50, 'learning_rate': 0.07182118124487878, 'max_depth': 6}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.178579 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:11:29,512] Trial 14 finished with value: 0.7973544252011455 and parameters: {'n_estimators': 266, 'learning_rate': 0.09835007369774318, 'max_depth': 7}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.183568 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:11:40,769] Trial 15 finished with value: 0.6338469930451384 and parameters: {'n_estimators': 160, 'learning_rate': 0.0038426338937556617, 'max_depth': 7}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.196023 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:11:57,670] Trial 16 finished with value: 0.7550797763534706 and parameters: {'n_estimators': 265, 'learning_rate': 0.028222440483644105, 'max_depth': 7}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.191528 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:12:04,591] Trial 17 finished with value: 0.6193917905359334 and parameters: {'n_estimators': 160, 'learning_rate': 0.007445715738875717, 'max_depth': 3}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.314321 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:12:25,218] Trial 18 finished with value: 0.4528842220100914 and parameters: {'n_estimators': 271, 'learning_rate': 0.0016774453252238549, 'max_depth': 5}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.306089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:12:43,470] Trial 19 finished with value: 0.796672576026183 and parameters: {'n_estimators': 238, 'learning_rate': 0.0927244683682051, 'max_depth': 7}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.306875 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:13:00,666] Trial 20 finished with value: 0.7404882040092732 and parameters: {'n_estimators': 279, 'learning_rate': 0.029374086132282762, 'max_depth': 5}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.311108 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:13:17,981] Trial 21 finished with value: 0.795036138006273 and parameters: {'n_estimators': 230, 'learning_rate': 0.08313698481547468, 'max_depth': 7}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.329422 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:13:37,375] Trial 22 finished with value: 0.7965362061911905 and parameters: {'n_estimators': 245, 'learning_rate': 0.09181949917268205, 'max_depth': 8}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.314585 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:13:56,990] Trial 23 finished with value: 0.7728078549024956 and parameters: {'n_estimators': 280, 'learning_rate': 0.03546806588017523, 'max_depth': 7}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.180262 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:14:10,457] Trial 24 finished with value: 0.7657166234828856 and parameters: {'n_estimators': 201, 'learning_rate': 0.051536120444761316, 'max_depth': 6}. Best is trial 11 with value: 0.7976271648711305.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.315857 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:14:28,511] Trial 25 finished with value: 0.798581753716078 and parameters: {'n_estimators': 240, 'learning_rate': 0.09987586800523875, 'max_depth': 8}. Best is trial 25 with value: 0.798581753716078.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.179527 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:14:43,169] Trial 26 finished with value: 0.7194872494204282 and parameters: {'n_estimators': 214, 'learning_rate': 0.01819167492875518, 'max_depth': 8}. Best is trial 25 with value: 0.798581753716078.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.180627 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:15:05,945] Trial 27 finished with value: 0.7046229374062457 and parameters: {'n_estimators': 277, 'learning_rate': 0.009143109719649154, 'max_depth': 10}. Best is trial 25 with value: 0.798581753716078.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.196707 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:15:19,362] Trial 28 finished with value: 0.4331105959361789 and parameters: {'n_estimators': 176, 'learning_rate': 0.00012169214377143368, 'max_depth': 8}. Best is trial 25 with value: 0.798581753716078.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.178160 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356


[I 2025-11-30 11:15:35,689] Trial 29 finished with value: 0.7074867039410883 and parameters: {'n_estimators': 249, 'learning_rate': 0.020760587228050815, 'max_depth': 5}. Best is trial 25 with value: 0.798581753716078.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.316453 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81446
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 972
[LightGBM] [Info] Start training from score -1.062681
[LightGBM] [Info] Start training from score -0.845356
[LightGBM] [Info] Start training from score -1.491356




accuracy : 0.798581753716078
🏃 View run intelligent-ape-938 at: https://dagshub.com/raj-coding1/youtube-comment-analysis.mlflow/#/experiments/9/runs/c84eea504afd4e5482949242df5dc42f
🧪 View experiment at: https://dagshub.com/raj-coding1/youtube-comment-analysis.mlflow/#/experiments/9
