In [None]:
!pip install mlflow boto3 awscli optuna lightgbm imbalanced-learn



In [None]:
import os
import mlflow

# Replace with your actual username and token
os.environ["MLFLOW_TRACKING_USERNAME"] = "sreyo2004"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "54b0fc2a87e30ce5bfc3a8ed37bac2ef6d65c1ad"

mlflow.set_tracking_uri("https://dagshub.com/sreyo2004/mlflow-test.mlflow")

In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/Himanshu-1703/reddit-sentiment-analysis/refs/heads/main/data/reddit.csv')
df.head()

Unnamed: 0,clean_comment,category
0,family mormon have never tried explain them t...,1
1,buddhism has very much lot compatible with chr...,1
2,seriously don say thing first all they won get...,-1
3,what you have learned yours and only yours wha...,0
4,for your own benefit you may want read living ...,1


In [None]:
# Drop missing and duplicate values
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
df = df[~(df['clean_comment'].str.strip() == '')]

# Import necessary libraries
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('wordnet')

# Define the preprocessing function
def preprocess_comment(comment):
    # Convert to lowercase
    comment = comment.lower()

    # Remove trailing and leading whitespaces
    comment = comment.strip()

    # Remove newline characters
    comment = re.sub(r'\n', ' ', comment)

    # Remove non-alphanumeric characters, except punctuation
    comment = re.sub(r"[^A-Za-z0-9\s!?.]", '', comment)

    # Remove stopwords but retain important ones for sentiment analysis
    stop_words = set(stopwords.words('english')) - {'not', 'but', 'however', 'no', 'yet'}
    comment = ' '.join([word for word in comment.split() if word not in stop_words])

    # Lemmatize the words
    lemmatizer = WordNetLemmatizer()
    comment = ' '.join([lemmatizer.lemmatize(word) for word in comment.split()])

    return comment

# Apply the preprocessing function to the `clean_comment` column
df['clean_comment'] = df['clean_comment'].apply(preprocess_comment)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
mlflow.set_experiment("Exp 5 - ML Algos with HP Tuning")

<Experiment: artifact_location='mlflow-artifacts:/57a11b57c17f4fbf87cf3fa6ca38890d', creation_time=1749279996336, experiment_id='9', last_update_time=1749279996336, lifecycle_stage='active', name='Exp 5 - ML Algos with HP Tuning', tags={}>

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna

In [None]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category']=df['category'].map({-1:2,0:0,1:1})


# Step 2: Remove rows where the target labels (category) are NaN
df=df.dropna(subset=['category'])

# Step 3: TF-IDF vectorizer setup
ngram_range=(1,3)
max_feautures=1000
vectorizer=TfidfVectorizer(
    ngram_range=ngram_range,
    max_features=max_feautures
)

X=vectorizer.fit_transform(df['clean_comment'])
y=df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote=SMOTE(random_state=42)
X_resampled,y_resampled=smote.fit_resample(X,y)

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

#Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        # Log model type
        mlflow.set_tag("mlflow.runName", f"{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)
        print(f"{model_name} - Accuracy: {accuracy:.4f}")



# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
  n_estimators=trial.suggest_int('n_estimators',50,300)
  learning_rate=trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
  max_depth = trial.suggest_int('max_depth', 3, 10)

  model=LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=42)
  return accuracy_score(y_test, model.fit(X_train, y_train).predict(X_test))



# Step 7: Run Optuna for LightGBM, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    # Get the best parameters and log only the best model
    best_params = study.best_params
    print("Best params:", best_params)
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    # Log the best model with MLflow, passing the algo_name as "LightGBM"
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test)

# Run the experiment for LightGBM
run_optuna_experiment()




[I 2025-06-16 13:13:09,722] A new study created in memory with name: no-name-253fe711-06c8-456c-8938-e0170aa69347


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.232072 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:13:38,059] Trial 0 finished with value: 0.6136531755257318 and parameters: {'n_estimators': 254, 'learning_rate': 0.00013104879383110682, 'max_depth': 9}. Best is trial 0 with value: 0.6136531755257318.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.255999 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:13:46,314] Trial 1 finished with value: 0.5377787171087393 and parameters: {'n_estimators': 212, 'learning_rate': 0.000611174517691741, 'max_depth': 3}. Best is trial 0 with value: 0.6136531755257318.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.263737 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:14:03,836] Trial 2 finished with value: 0.6216844552467505 and parameters: {'n_estimators': 150, 'learning_rate': 0.0013430270770796622, 'max_depth': 9}. Best is trial 2 with value: 0.6216844552467505.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.234897 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:14:18,495] Trial 3 finished with value: 0.7965761386452499 and parameters: {'n_estimators': 209, 'learning_rate': 0.08780341611568, 'max_depth': 7}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.233713 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:14:55,365] Trial 4 finished with value: 0.6541265983303393 and parameters: {'n_estimators': 288, 'learning_rate': 0.0041223321175077625, 'max_depth': 7}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.248982 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:15:10,297] Trial 5 finished with value: 0.6571911655923068 and parameters: {'n_estimators': 157, 'learning_rate': 0.008237199211145331, 'max_depth': 7}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.248692 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:15:34,697] Trial 6 finished with value: 0.6136531755257318 and parameters: {'n_estimators': 219, 'learning_rate': 0.00014413961152372507, 'max_depth': 9}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.255615 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:15:49,755] Trial 7 finished with value: 0.7402515058649477 and parameters: {'n_estimators': 162, 'learning_rate': 0.02721957296207057, 'max_depth': 8}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.232264 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:16:17,383] Trial 8 finished with value: 0.5902990594948748 and parameters: {'n_estimators': 277, 'learning_rate': 0.00011113403676966004, 'max_depth': 7}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.441094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:16:26,337] Trial 9 finished with value: 0.7708971784846244 and parameters: {'n_estimators': 231, 'learning_rate': 0.07039958497135362, 'max_depth': 4}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.247874 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:16:31,949] Trial 10 finished with value: 0.7427876994610588 and parameters: {'n_estimators': 83, 'learning_rate': 0.08766383329737701, 'max_depth': 5}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.236314 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:16:43,113] Trial 11 finished with value: 0.7864313642608053 and parameters: {'n_estimators': 221, 'learning_rate': 0.09192355777384634, 'max_depth': 5}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.400658 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:16:49,844] Trial 12 finished with value: 0.6637429990489274 and parameters: {'n_estimators': 97, 'learning_rate': 0.020996341848884773, 'max_depth': 5}. Best is trial 3 with value: 0.7965761386452499.






[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.241411 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:17:00,670] Trial 13 finished with value: 0.7275705378843919 and parameters: {'n_estimators': 183, 'learning_rate': 0.03158888747524107, 'max_depth': 5}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.247715 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:17:16,842] Trial 14 finished with value: 0.6646940716474691 and parameters: {'n_estimators': 193, 'learning_rate': 0.009035766001652162, 'max_depth': 6}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.240562 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:17:25,346] Trial 15 finished with value: 0.772904998414879 and parameters: {'n_estimators': 121, 'learning_rate': 0.0947387987914293, 'max_depth': 6}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.235921 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:17:32,615] Trial 16 finished with value: 0.7300010567473317 and parameters: {'n_estimators': 250, 'learning_rate': 0.04179753512152086, 'max_depth': 3}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.381691 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:17:39,935] Trial 17 finished with value: 0.6577195392581634 and parameters: {'n_estimators': 56, 'learning_rate': 0.013065822918330928, 'max_depth': 10}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.238501 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:17:50,391] Trial 18 finished with value: 0.5980133150163796 and parameters: {'n_estimators': 199, 'learning_rate': 0.003448715184228745, 'max_depth': 4}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228910 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:18:10,785] Trial 19 finished with value: 0.789178907323259 and parameters: {'n_estimators': 255, 'learning_rate': 0.052047935203447956, 'max_depth': 8}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.381683 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:18:42,805] Trial 20 finished with value: 0.6127021029271901 and parameters: {'n_estimators': 300, 'learning_rate': 0.0010380860141697772, 'max_depth': 8}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.245611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:19:03,386] Trial 21 finished with value: 0.7885448589242312 and parameters: {'n_estimators': 253, 'learning_rate': 0.05084049153929224, 'max_depth': 8}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.304794 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:19:23,757] Trial 22 finished with value: 0.7847405685300645 and parameters: {'n_estimators': 252, 'learning_rate': 0.04462843688560928, 'max_depth': 8}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.236770 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:19:50,796] Trial 23 finished with value: 0.765507767092888 and parameters: {'n_estimators': 269, 'learning_rate': 0.021292445733644815, 'max_depth': 10}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.245547 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:20:10,053] Trial 24 finished with value: 0.7865370389939765 and parameters: {'n_estimators': 232, 'learning_rate': 0.0514326434952945, 'max_depth': 8}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249254 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:20:33,098] Trial 25 finished with value: 0.7172144140336045 and parameters: {'n_estimators': 264, 'learning_rate': 0.013479931413897223, 'max_depth': 7}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.262749 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:20:53,625] Trial 26 finished with value: 0.7905526788544859 and parameters: {'n_estimators': 242, 'learning_rate': 0.051121901996940805, 'max_depth': 9}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.236977 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:21:17,262] Trial 27 finished with value: 0.7341223713410123 and parameters: {'n_estimators': 233, 'learning_rate': 0.015497588032592833, 'max_depth': 9}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.265753 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:21:40,210] Trial 28 finished with value: 0.6914297791398076 and parameters: {'n_estimators': 202, 'learning_rate': 0.008028312645937946, 'max_depth': 10}. Best is trial 3 with value: 0.7965761386452499.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.253480 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665


[I 2025-06-16 13:21:56,680] Trial 29 finished with value: 0.7640283208284899 and parameters: {'n_estimators': 177, 'learning_rate': 0.03416070737152996, 'max_depth': 9}. Best is trial 3 with value: 0.7965761386452499.


Best params: {'n_estimators': 209, 'learning_rate': 0.08780341611568, 'max_depth': 7}




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.253171 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99059
[LightGBM] [Info] Number of data points in the train set: 37850, number of used features: 970
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098586
[LightGBM] [Info] Start training from score -1.098665




LightGBM - Accuracy: 0.7966
🏃 View run LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/sreyo2004/mlflow-test.mlflow/#/experiments/9/runs/2a8f6cd1c6bb420cbe24f2f27908f481
🧪 View experiment at: https://dagshub.com/sreyo2004/mlflow-test.mlflow/#/experiments/9
