In [1]:
import dagshub
dagshub.init(repo_owner='sahilbhardwaj23', repo_name='Youtube-comment-Sentiment-Analysis', mlflow=True)

In [2]:
import mlflow
# Step 2: Set up the MLflow tracking server
mlflow.set_tracking_uri("https://dagshub.com/sahilbhardwaj23/Youtube-comment-Sentiment-Analysis.mlflow")

In [4]:
from mlflow.tracking import MlflowClient
import mlflow

In [5]:
def set_or_create_experiment(experiment_name):
    client = MlflowClient()
    experiment = client.get_experiment_by_name(experiment_name)

    # Check if the experiment exists and is active
    if experiment:
        if experiment.lifecycle_stage == "active":
            print(f"Using existing experiment '{experiment_name}' (ID: {experiment.experiment_id})")
            mlflow.set_experiment(experiment_name)
        else:
            # Restore if it's deleted
            print(f"Restoring deleted experiment '{experiment_name}' (ID: {experiment.experiment_id})")
            client.restore_experiment(experiment.experiment_id)
            mlflow.set_experiment(experiment_name)
    else:
        # Create a new experiment if it doesn't exist
        print(f"Creating new experiment '{experiment_name}'")
        mlflow.set_experiment(experiment_name)



In [6]:
# Set or create an experiment
set_or_create_experiment("ML Algos with HP Tuning")

Creating new experiment 'ML Algos with HP Tuning'


2024/11/02 22:48:09 INFO mlflow.tracking.fluent: Experiment with name 'ML Algos with HP Tuning' does not exist. Creating a new experiment.


In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
df = pd.read_csv('reddit_preprocessing.csv').dropna()
df.shape

(36662, 2)

In [9]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        # Log model type
        mlflow.set_tag("mlflow.runName", f"{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")


# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 10)

    model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=42)
    return accuracy_score(y_test, model.fit(X_train, y_train).predict(X_test))


# Step 7: Run Optuna for LightGBM, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    # Get the best parameters and log only the best model
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    # Log the best model with MLflow, passing the algo_name as "LightGBM"
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test)

# Run the experiment for LightGBM
run_optuna_experiment()


[I 2024-11-02 22:49:43,355] A new study created in memory with name: no-name-046e37cb-277e-40df-bc54-cdee214821ef


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.064410 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:49:46,907] Trial 0 finished with value: 0.606002959205242 and parameters: {'n_estimators': 52, 'learning_rate': 0.004970455935845532, 'max_depth': 7}. Best is trial 0 with value: 0.606002959205242.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062126 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:49:55,249] Trial 1 finished with value: 0.5894102726696259 and parameters: {'n_estimators': 157, 'learning_rate': 0.0001414123950173089, 'max_depth': 7}. Best is trial 0 with value: 0.606002959205242.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055553 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:49:59,378] Trial 2 finished with value: 0.7273303741280913 and parameters: {'n_estimators': 153, 'learning_rate': 0.04406525927279289, 'max_depth': 4}. Best is trial 2 with value: 0.7273303741280913.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.075009 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:50:05,035] Trial 3 finished with value: 0.7828154724159797 and parameters: {'n_estimators': 281, 'learning_rate': 0.088741672603841, 'max_depth': 3}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062650 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:50:16,308] Trial 4 finished with value: 0.5641513422109491 and parameters: {'n_estimators': 272, 'learning_rate': 0.00011682436944841506, 'max_depth': 5}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.058999 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:50:20,382] Trial 5 finished with value: 0.5652082012259565 and parameters: {'n_estimators': 106, 'learning_rate': 0.0005092725829856545, 'max_depth': 5}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059840 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:50:25,210] Trial 6 finished with value: 0.6357006975269499 and parameters: {'n_estimators': 98, 'learning_rate': 0.00771080671357163, 'max_depth': 6}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057179 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:50:35,742] Trial 7 finished with value: 0.7566053688437963 and parameters: {'n_estimators': 284, 'learning_rate': 0.023898653222445655, 'max_depth': 6}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059392 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:50:45,084] Trial 8 finished with value: 0.7027055590784189 and parameters: {'n_estimators': 219, 'learning_rate': 0.013513938276303051, 'max_depth': 6}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057339 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:50:59,307] Trial 9 finished with value: 0.6353836398224477 and parameters: {'n_estimators': 243, 'learning_rate': 0.0002833984323020668, 'max_depth': 10}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061784 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:51:03,531] Trial 10 finished with value: 0.7797505812724582 and parameters: {'n_estimators': 228, 'learning_rate': 0.09732514412749337, 'max_depth': 3}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.056188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:51:07,447] Trial 11 finished with value: 0.7669625871908687 and parameters: {'n_estimators': 212, 'learning_rate': 0.08115699783827705, 'max_depth': 3}. Best is trial 3 with value: 0.7828154724159797.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100986 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:51:12,838] Trial 12 finished with value: 0.7892623124075249 and parameters: {'n_estimators': 299, 'learning_rate': 0.09377675197838461, 'max_depth': 3}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.063010 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:51:19,670] Trial 13 finished with value: 0.5633058549989431 and parameters: {'n_estimators': 300, 'learning_rate': 0.0011923454793143094, 'max_depth': 3}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061482 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:51:34,273] Trial 14 finished with value: 0.6516592686535616 and parameters: {'n_estimators': 252, 'learning_rate': 0.00214726190234536, 'max_depth': 10}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.060803 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:51:39,029] Trial 15 finished with value: 0.7138025787359966 and parameters: {'n_estimators': 192, 'learning_rate': 0.02913990155782799, 'max_depth': 4}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:51:52,267] Trial 16 finished with value: 0.7358909321496513 and parameters: {'n_estimators': 266, 'learning_rate': 0.014153954218295125, 'max_depth': 8}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058817 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:51:59,010] Trial 17 finished with value: 0.7678080744028747 and parameters: {'n_estimators': 300, 'learning_rate': 0.044267657698939054, 'max_depth': 4}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059035 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:04,300] Trial 18 finished with value: 0.7853519340519974 and parameters: {'n_estimators': 182, 'learning_rate': 0.0847927682015974, 'max_depth': 5}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062392 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:09,494] Trial 19 finished with value: 0.5846544071020926 and parameters: {'n_estimators': 129, 'learning_rate': 0.002167508230182494, 'max_depth': 5}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061022 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:18,991] Trial 20 finished with value: 0.7177129570915239 and parameters: {'n_estimators': 183, 'learning_rate': 0.0159093976690742, 'max_depth': 8}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060676 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:24,509] Trial 21 finished with value: 0.7702388501373917 and parameters: {'n_estimators': 245, 'learning_rate': 0.056748897462244006, 'max_depth': 4}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.057959 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:28,198] Trial 22 finished with value: 0.7726696258719087 and parameters: {'n_estimators': 202, 'learning_rate': 0.0949798463073075, 'max_depth': 3}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060251 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:35,610] Trial 23 finished with value: 0.7451912914817164 and parameters: {'n_estimators': 279, 'learning_rate': 0.032675872372213546, 'max_depth': 4}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062540 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:38,275] Trial 24 finished with value: 0.7104206298879729 and parameters: {'n_estimators': 68, 'learning_rate': 0.05824775070499872, 'max_depth': 5}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:41,933] Trial 25 finished with value: 0.5955400549566687 and parameters: {'n_estimators': 161, 'learning_rate': 0.0067188622831757085, 'max_depth': 3}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064240 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:49,182] Trial 26 finished with value: 0.7065102515324456 and parameters: {'n_estimators': 259, 'learning_rate': 0.018538085537159105, 'max_depth': 4}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062386 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:52:53,823] Trial 27 finished with value: 0.7570281124497992 and parameters: {'n_estimators': 235, 'learning_rate': 0.06350510065666462, 'max_depth': 3}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062851 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:53:02,319] Trial 28 finished with value: 0.7673853307968717 and parameters: {'n_estimators': 288, 'learning_rate': 0.03591704523244787, 'max_depth': 5}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.058971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-11-02 22:53:09,300] Trial 29 finished with value: 0.6643415768336504 and parameters: {'n_estimators': 133, 'learning_rate': 0.009314337722495167, 'max_depth': 7}. Best is trial 12 with value: 0.7892623124075249.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.061808 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/11/02 22:53:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/sahilbhardwaj23/Youtube-comment-Sentiment-Analysis.mlflow/#/experiments/7/runs/f55fad8830df45b2808befad9e9dda6f.
2024/11/02 22:53:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/sahilbhardwaj23/Youtube-comment-Sentiment-Analysis.mlflow/#/experiments/7.
