In [None]:
!pip install mlflow boto3 awscli optuna imbalanced-learn lightgbm

Collecting mlflow
  Downloading mlflow-2.16.2-py3-none-any.whl.metadata (29 kB)
Collecting boto3
  Downloading boto3-1.35.38-py3-none-any.whl.metadata (6.7 kB)
Collecting awscli
  Downloading awscli-1.35.4-py3-none-any.whl.metadata (11 kB)
Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting mlflow-skinny==2.16.2 (from mlflow)
  Downloading mlflow_skinny-2.16.2-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.3-py2.py3-none-any.whl.metadata (7.7 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.16.2->mlflow)
  Downloading databricks_sdk-0.34.0-py3-none-any.whl.metadata (37 kB)
Colle

In [None]:
!aws configure

In [2]:
import mlflow
# Step 2: Set up the MLflow tracking server
mlflow.set_tracking_uri("http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/")

In [3]:
# Set or create an experiment
mlflow.set_experiment("LightGBM HP Tuning")

2025/05/22 16:12:23 INFO mlflow.tracking.fluent: Experiment with name 'LightGBM HP Tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://yt-mlflowbucket/538688335696357572', creation_time=1747910543791, experiment_id='538688335696357572', last_update_time=1747910543791, lifecycle_stage='active', name='LightGBM HP Tuning', tags={}>

In [4]:
import pandas as pd

df = pd.read_csv('reddit_preprocessing.csv').dropna()
df.shape

(36662, 2)

In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt

In [6]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

In [7]:
# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [8]:
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [9]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy




In [10]:
# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    # Hyperparameter space to explore
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    num_leaves = trial.suggest_int('num_leaves', 20, 150)
    min_child_samples = trial.suggest_int('min_child_samples', 10, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True)  # L2 regularization

    # Log trial parameters
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'num_leaves': num_leaves,
        'min_child_samples': min_child_samples,
        'colsample_bytree': colsample_bytree,
        'subsample': subsample,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda
    }

    # Create LightGBM model
    model = LGBMClassifier(n_estimators=n_estimators,
                           learning_rate=learning_rate,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           colsample_bytree=colsample_bytree,
                           subsample=subsample,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda,
                           random_state=42)

    # Log each trial as a separate run in MLflow
    accuracy = log_mlflow("LightGBM", model, X_train, X_test, y_train, y_test, params, trial.number)

    return accuracy




In [11]:
# Step 7: Run Optuna for LightGBM, log the best model, and plot the importance of each parameter
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=100)  # Increased to 100 trials

    # Get the best parameters
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'],
                                learning_rate=best_params['learning_rate'],
                                max_depth=best_params['max_depth'],
                                num_leaves=best_params['num_leaves'],
                                min_child_samples=best_params['min_child_samples'],
                                colsample_bytree=best_params['colsample_bytree'],
                                subsample=best_params['subsample'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                random_state=42)

    # Log the best model with MLflow and print the classification report
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test, best_params, "Best")

    # Plot parameter importance
    optuna.visualization.plot_param_importances(study).show()

    # Plot optimization history
    optuna.visualization.plot_optimization_history(study).show()

In [None]:
# Run the experiment for LightGBM
run_optuna_experiment()

[I 2025-05-22 16:13:48,041] A new study created in memory with name: no-name-a64d82e6-1753-4dbb-8cd5-10280ae2f4e2


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.110095 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98843
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:15:20,399] Trial 0 finished with value: 0.8086028323821602 and parameters: {'n_estimators': 994, 'learning_rate': 0.04182653159082513, 'max_depth': 6, 'num_leaves': 45, 'min_child_samples': 62, 'colsample_bytree': 0.5756489259226318, 'subsample': 0.6497565510995658, 'reg_alpha': 0.0009345712097594116, 'reg_lambda': 0.0006323472570237471}. Best is trial 0 with value: 0.8086028323821602.


🏃 View run Trial_0_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/73282c56b27e492c8b2ef67fddabcd09
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079481 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:16:24,871] Trial 1 finished with value: 0.8055379412386388 and parameters: {'n_estimators': 964, 'learning_rate': 0.03650343644825857, 'max_depth': 5, 'num_leaves': 75, 'min_child_samples': 48, 'colsample_bytree': 0.6589853332425375, 'subsample': 0.5686437962437032, 'reg_alpha': 0.01135804048155242, 'reg_lambda': 0.0007353965595724745}. Best is trial 0 with value: 0.8086028323821602.


🏃 View run Trial_1_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/b377c3325eee4280a16631aab802de3e
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.075116 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98797
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 957
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:17:22,571] Trial 2 finished with value: 0.8041640245191292 and parameters: {'n_estimators': 526, 'learning_rate': 0.02767298816459907, 'max_depth': 13, 'num_leaves': 135, 'min_child_samples': 69, 'colsample_bytree': 0.9810898175853338, 'subsample': 0.5777138790974303, 'reg_alpha': 0.011460070390905816, 'reg_lambda': 1.3884219831177202}. Best is trial 0 with value: 0.8086028323821602.


🏃 View run Trial_2_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/5dc5b63716d34847860c8abacdfb3fde
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.074068 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:18:27,276] Trial 3 finished with value: 0.6249207355738744 and parameters: {'n_estimators': 812, 'learning_rate': 0.00015158950988161974, 'max_depth': 7, 'num_leaves': 81, 'min_child_samples': 37, 'colsample_bytree': 0.7657044467578651, 'subsample': 0.7961608722718805, 'reg_alpha': 1.7031544734737643, 'reg_lambda': 0.0017936290136978568}. Best is trial 0 with value: 0.8086028323821602.


🏃 View run Trial_3_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/4ddedaf1895745fca09f4ec4082a66d7
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084794 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:19:38,059] Trial 4 finished with value: 0.625132107376876 and parameters: {'n_estimators': 619, 'learning_rate': 0.000482027804801964, 'max_depth': 8, 'num_leaves': 114, 'min_child_samples': 76, 'colsample_bytree': 0.9131610518474949, 'subsample': 0.8365949759025759, 'reg_alpha': 0.4021076480813288, 'reg_lambda': 0.011242015961197342}. Best is trial 0 with value: 0.8086028323821602.


🏃 View run Trial_4_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/6cbe833cf823483ea8d15a537f7e55ef
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072534 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:20:25,587] Trial 5 finished with value: 0.6727964489537096 and parameters: {'n_estimators': 739, 'learning_rate': 0.0032951975319747157, 'max_depth': 5, 'num_leaves': 22, 'min_child_samples': 80, 'colsample_bytree': 0.691226439110515, 'subsample': 0.551818260253252, 'reg_alpha': 0.0019307986070869747, 'reg_lambda': 1.1248855613211406}. Best is trial 0 with value: 0.8086028323821602.


🏃 View run Trial_5_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/9bdc18a85a1d44088dadc8f5f69e58ae
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093017 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:21:26,885] Trial 6 finished with value: 0.8143098710632002 and parameters: {'n_estimators': 723, 'learning_rate': 0.08258405024836947, 'max_depth': 8, 'num_leaves': 127, 'min_child_samples': 79, 'colsample_bytree': 0.9331589627964771, 'subsample': 0.7844673973554938, 'reg_alpha': 0.0010514730259858608, 'reg_lambda': 0.30591007655273517}. Best is trial 6 with value: 0.8143098710632002.


🏃 View run Trial_6_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/b3f2ca9788254a8196ed9777f2b5e978
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.156720 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:23:15,340] Trial 7 finished with value: 0.8133586979496935 and parameters: {'n_estimators': 494, 'learning_rate': 0.04217080664623342, 'max_depth': 15, 'num_leaves': 115, 'min_child_samples': 48, 'colsample_bytree': 0.8768999046763515, 'subsample': 0.9980972107140729, 'reg_alpha': 0.00018801587936785857, 'reg_lambda': 0.7592462559245845}. Best is trial 6 with value: 0.8143098710632002.


🏃 View run Trial_7_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/59f9fac1a2344b55b47671faa4eb6399
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098783 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:25:16,400] Trial 8 finished with value: 0.7568167406467977 and parameters: {'n_estimators': 803, 'learning_rate': 0.004507231409999218, 'max_depth': 11, 'num_leaves': 75, 'min_child_samples': 33, 'colsample_bytree': 0.6695479919125851, 'subsample': 0.5458093396910273, 'reg_alpha': 0.025792698783837612, 'reg_lambda': 0.013146113625296526}. Best is trial 6 with value: 0.8143098710632002.


🏃 View run Trial_8_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/29f94d25e0d24e6898bdbe7e743f8fba
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090024 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98843
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:26:41,091] Trial 9 finished with value: 0.6781864299302474 and parameters: {'n_estimators': 621, 'learning_rate': 0.0010924531102938242, 'max_depth': 13, 'num_leaves': 45, 'min_child_samples': 62, 'colsample_bytree': 0.9736268123451249, 'subsample': 0.8780884066868686, 'reg_alpha': 0.00022341999993402773, 'reg_lambda': 0.00011771445016195078}. Best is trial 6 with value: 0.8143098710632002.


🏃 View run Trial_9_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/4cf0b7953c0f4abbb94465e516619a4f
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.103023 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98365
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 943
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:27:03,261] Trial 10 finished with value: 0.6254491650813782 and parameters: {'n_estimators': 174, 'learning_rate': 0.010543877184925663, 'max_depth': 3, 'num_leaves': 150, 'min_child_samples': 99, 'colsample_bytree': 0.8148157726128599, 'subsample': 0.731227595455201, 'reg_alpha': 0.12535620169163694, 'reg_lambda': 0.1555164666918952}. Best is trial 6 with value: 0.8143098710632002.


🏃 View run Trial_10_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/6112a89e6e4f4aeaaa76add2412041a6
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.145262 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99064
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:28:11,793] Trial 11 finished with value: 0.8211794546607483 and parameters: {'n_estimators': 407, 'learning_rate': 0.09111351347260026, 'max_depth': 15, 'num_leaves': 110, 'min_child_samples': 14, 'colsample_bytree': 0.866283161465995, 'subsample': 0.992024786254877, 'reg_alpha': 0.00017334068144906093, 'reg_lambda': 6.7122785015659}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_11_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/9e65d4aadee043859203dae7093114ce
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.109711 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99070
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 979
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:29:03,685] Trial 12 finished with value: 0.8112449799196787 and parameters: {'n_estimators': 336, 'learning_rate': 0.07029498616191399, 'max_depth': 10, 'num_leaves': 115, 'min_child_samples': 13, 'colsample_bytree': 0.858363837036808, 'subsample': 0.9739693773499177, 'reg_alpha': 0.0017490144925960776, 'reg_lambda': 4.491282511603587}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_12_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/53763d1fabfe43549abff1842c0c477c
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.081524 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99112
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:29:53,235] Trial 13 finished with value: 0.8124075248361868 and parameters: {'n_estimators': 337, 'learning_rate': 0.0946647349162673, 'max_depth': 9, 'num_leaves': 98, 'min_child_samples': 10, 'colsample_bytree': 0.9268067777346088, 'subsample': 0.9145826325914488, 'reg_alpha': 0.00021747034253485935, 'reg_lambda': 9.604283158114196}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_13_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/e847baa490fe43b481fd9455ab6f54dd
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105506 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98432
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 945
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:30:39,730] Trial 14 finished with value: 0.7641090678503487 and parameters: {'n_estimators': 367, 'learning_rate': 0.01145364893087825, 'max_depth': 12, 'num_leaves': 133, 'min_child_samples': 95, 'colsample_bytree': 0.8124169943113813, 'subsample': 0.7177203082360806, 'reg_alpha': 0.0011689692672670637, 'reg_lambda': 0.19110958833527286}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_14_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/f5f99a7d001a4d2689b20bcc969ab13c
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.070678 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98994
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:31:10,283] Trial 15 finished with value: 0.7499471570492496 and parameters: {'n_estimators': 102, 'learning_rate': 0.016489473316272776, 'max_depth': 15, 'num_leaves': 98, 'min_child_samples': 27, 'colsample_bytree': 0.5046191553873443, 'subsample': 0.6605360598864072, 'reg_alpha': 0.00011254914890400939, 'reg_lambda': 0.0998994841961394}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_15_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/3d8d333faca3499fa11851396513a3a8
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072085 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98718
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:31:49,112] Trial 16 finished with value: 0.8106108645106743 and parameters: {'n_estimators': 435, 'learning_rate': 0.09537938648456741, 'max_depth': 9, 'num_leaves': 131, 'min_child_samples': 87, 'colsample_bytree': 0.9169448570229731, 'subsample': 0.9310716582302422, 'reg_alpha': 0.004714171576696873, 'reg_lambda': 3.4948646053664523}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_16_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/41b66d1b2773428eab75b388d776e537
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.084918 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99002
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 969
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:32:25,780] Trial 17 finished with value: 0.7427605157471994 and parameters: {'n_estimators': 694, 'learning_rate': 0.017280066926585843, 'max_depth': 3, 'num_leaves': 150, 'min_child_samples': 22, 'colsample_bytree': 0.998657466710329, 'subsample': 0.8086272563166879, 'reg_alpha': 0.0004688928728357153, 'reg_lambda': 0.04835972405966485}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_17_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/37e8696b316449b4a71bb2dd149ce4e4
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:33:10,405] Trial 18 finished with value: 0.7086239695624603 and parameters: {'n_estimators': 261, 'learning_rate': 0.00653559230485603, 'max_depth': 11, 'num_leaves': 99, 'min_child_samples': 46, 'colsample_bytree': 0.8143550655118097, 'subsample': 0.8565161848221751, 'reg_alpha': 0.09485988931748474, 'reg_lambda': 0.4269418054704139}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_18_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/c9a3c4e76d8a4922bd592027aaf99753
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.089976 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98747
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 955
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:34:35,582] Trial 19 finished with value: 0.6610653138871274 and parameters: {'n_estimators': 888, 'learning_rate': 0.0012715644127272766, 'max_depth': 7, 'num_leaves': 124, 'min_child_samples': 84, 'colsample_bytree': 0.7346661836225414, 'subsample': 0.5007350385023475, 'reg_alpha': 0.00318309447687729, 'reg_lambda': 2.3666595262694097}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_19_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/aec2c093932f48699eccd71c1704c69f
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106359 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98797
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 957
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:36:06,297] Trial 20 finished with value: 0.6932995138448531 and parameters: {'n_estimators': 603, 'learning_rate': 0.0013299367075145184, 'max_depth': 14, 'num_leaves': 93, 'min_child_samples': 70, 'colsample_bytree': 0.865115202568532, 'subsample': 0.7727933291879446, 'reg_alpha': 5.4464863070604475, 'reg_lambda': 0.4078789754278716}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_20_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/72a0b1835b4c41da9c8bd3f8b2622d0c
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.126535 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:37:44,791] Trial 21 finished with value: 0.8146269287677024 and parameters: {'n_estimators': 498, 'learning_rate': 0.04724889187534655, 'max_depth': 15, 'num_leaves': 114, 'min_child_samples': 49, 'colsample_bytree': 0.8825912523606907, 'subsample': 0.9991323998587002, 'reg_alpha': 0.00038736645421446995, 'reg_lambda': 0.6243636608458529}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_21_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/77957a10410446969b1c72d0a19dc2b3
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.089747 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98863
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:38:58,736] Trial 22 finished with value: 0.8109279222151765 and parameters: {'n_estimators': 431, 'learning_rate': 0.0567565447049115, 'max_depth': 14, 'num_leaves': 107, 'min_child_samples': 58, 'colsample_bytree': 0.9422240092408263, 'subsample': 0.9531305627487868, 'reg_alpha': 0.00047373454229467, 'reg_lambda': 6.355465837236476}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_22_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/b1cebf08c71541eea37df64c34604e3b
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.211595 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:40:53,514] Trial 23 finished with value: 0.8148383005707038 and parameters: {'n_estimators': 691, 'learning_rate': 0.02620111292929314, 'max_depth': 15, 'num_leaves': 139, 'min_child_samples': 20, 'colsample_bytree': 0.8779850568622443, 'subsample': 0.8953565215454666, 'reg_alpha': 0.0005561949186846024, 'reg_lambda': 0.4002365830693692}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_23_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/d1d6ce1f24484c6aaf0d185345d68340
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.089234 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99039
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:42:30,995] Trial 24 finished with value: 0.8051151976326358 and parameters: {'n_estimators': 467, 'learning_rate': 0.02126782810545147, 'max_depth': 15, 'num_leaves': 142, 'min_child_samples': 18, 'colsample_bytree': 0.7704678800667697, 'subsample': 0.9232925144991726, 'reg_alpha': 0.00010436164558253357, 'reg_lambda': 0.054058129633446306}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_24_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/d141762f3d654ed3b361c5855a8560ed
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106479 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:44:10,918] Trial 25 finished with value: 0.7708729655463961 and parameters: {'n_estimators': 542, 'learning_rate': 0.008189783187035862, 'max_depth': 13, 'num_leaves': 120, 'min_child_samples': 36, 'colsample_bytree': 0.8748171705796823, 'subsample': 0.9900681841666615, 'reg_alpha': 0.00041571556616316587, 'reg_lambda': 1.599735768101742}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_25_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/bdfdb7cccaaf4a0bac9d67aedd828f6a
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.128100 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98994
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:45:25,698] Trial 26 finished with value: 0.7954977805960685 and parameters: {'n_estimators': 254, 'learning_rate': 0.028352286814387423, 'max_depth': 14, 'num_leaves': 139, 'min_child_samples': 26, 'colsample_bytree': 0.8428084741999637, 'subsample': 0.8979637948875429, 'reg_alpha': 0.006024470415193356, 'reg_lambda': 0.015526038252464956}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_26_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/9d2547720a9743b7b5c4d8a2db0b2506
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.115339 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99053
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 976
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:46:40,897] Trial 27 finished with value: 0.8182202494187275 and parameters: {'n_estimators': 668, 'learning_rate': 0.051014996007084014, 'max_depth': 12, 'num_leaves': 61, 'min_child_samples': 17, 'colsample_bytree': 0.7935269751036137, 'subsample': 0.9523985000631304, 'reg_alpha': 0.0004064107423888423, 'reg_lambda': 0.7384181267171311}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_27_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/6463fb5666e141e09a6c2c4a2f99471a
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.080869 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99039
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:48:13,926] Trial 28 finished with value: 0.796131896005073 and parameters: {'n_estimators': 669, 'learning_rate': 0.015540933402336548, 'max_depth': 12, 'num_leaves': 55, 'min_child_samples': 18, 'colsample_bytree': 0.7356087753651909, 'subsample': 0.958224934466731, 'reg_alpha': 0.0007902148573140508, 'reg_lambda': 9.928942190934237}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_28_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/d8f72c94bd604d7d91b35d3f00301286
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.490290 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:49:37,371] Trial 29 finished with value: 0.8171633904037201 and parameters: {'n_estimators': 788, 'learning_rate': 0.048625381814525095, 'max_depth': 11, 'num_leaves': 60, 'min_child_samples': 29, 'colsample_bytree': 0.7848854381196236, 'subsample': 0.8844613669620637, 'reg_alpha': 0.0029225483434757752, 'reg_lambda': 2.666823307291593}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_29_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/12a2d46ab5874d9faa6e103cdac8cc71
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.149796 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:51:14,501] Trial 30 finished with value: 0.8187486789262313 and parameters: {'n_estimators': 845, 'learning_rate': 0.0535770386538156, 'max_depth': 11, 'num_leaves': 62, 'min_child_samples': 29, 'colsample_bytree': 0.6341119412265687, 'subsample': 0.9457365149414046, 'reg_alpha': 0.0030358659854003074, 'reg_lambda': 3.056314014898496}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_30_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/03d9d9eaed0c4e91be6dc3cbf4b4d73d
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.109136 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-05-22 16:53:09,623] Trial 31 finished with value: 0.8186429930247305 and parameters: {'n_estimators': 828, 'learning_rate': 0.05662429320005097, 'max_depth': 11, 'num_leaves': 61, 'min_child_samples': 30, 'colsample_bytree': 0.5951706248776106, 'subsample': 0.9428552057645728, 'reg_alpha': 0.010138919516374184, 'reg_lambda': 2.8623111938021255}. Best is trial 11 with value: 0.8211794546607483.


🏃 View run Trial_31_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572/runs/626c0395ed9a4577a6ee69bd26ef7abd
🧪 View experiment at: http://ec2-13-233-74-14.ap-south-1.compute.amazonaws.com:5000/#/experiments/538688335696357572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.118669 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


Exception ignored on calling ctypes callback function: <function _log_callback at 0x0000023A8C03E840>
Traceback (most recent call last):
  File "c:\Users\soura\anaconda3\Lib\site-packages\lightgbm\basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
    
KeyboardInterrupt: 


No further splits with positive gain, best gain: -inf


In [None]:
best_model