In [1]:
!pip install optuna imbalanced-learn lightgbm dagshub mlflow==2.2.2

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting dagshub
  Downloading dagshub-0.6.3-py3-none-any.whl.metadata (12 kB)
Collecting mlflow==2.2.2
  Downloading mlflow-2.2.2-py3-none-any.whl.metadata (11 kB)
Collecting cloudpickle<3 (from mlflow==2.2.2)
  Downloading cloudpickle-2.2.1-py3-none-any.whl.metadata (6.9 kB)
Collecting databricks-cli<1,>=0.8.7 (from mlflow==2.2.2)
  Downloading databricks_cli-0.18.0-py2.py3-none-any.whl.metadata (4.0 kB)
Collecting protobuf<5,>=3.12.0 (from mlflow==2.2.2)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting pytz<2023 (from mlflow==2.2.2)
  Downloading pytz-2022.7.1-py2.py3-none-any.whl.metadata (21 kB)
Collecting packaging<24 (from mlflow==2.2.2)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting importlib-metadata!=4.7.0,<7,>=3.7.0 (from mlflow==2.2.2)
  Downloading importlib_metadata-6.11.0-py3-none-any.whl.metadata (4.9 kB)
Collecting

In [1]:
import dagshub
import mlflow

dagshub.init(repo_owner='mr.hamxa942', repo_name='youtube-sentiment-insights-using-MLOPs', mlflow=True)

print('MLflow is now configured to log to DagsHub!')



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=56455778-fab9-4ef6-8817-c34da968d340&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=0787159caa8ab9076eca4c5012a4a92b60923b66eff62132c9c1a791be3b7d7a




Output()

MLflow is now configured to log to DagsHub!


In [2]:
# Step 2: Set up the MLflow tracking server
mlflow.set_tracking_uri('https://dagshub.com/mr.hamxa942/youtube-sentiment-insights-using-MLOPs.mlflow')

In [3]:
# Set or create an experiment
mlflow.set_experiment("LightGBM HP Tuning")

2025/08/14 11:48:05 INFO mlflow.tracking.fluent: Experiment with name 'LightGBM HP Tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/8ebfd354aeae49aa88415a9c2c1fb310', creation_time=1755172085320, experiment_id='6', last_update_time=1755172085320, lifecycle_stage='active', name='LightGBM HP Tuning', tags={}>

In [4]:
import pandas as pd

df = pd.read_csv('/content/reddit_preprocessing.csv').dropna()
df.shape

(36662, 2)

In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt

In [6]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

In [7]:
# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [8]:
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [9]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy

In [10]:
# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    # Hyperparameter space to explore
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    num_leaves = trial.suggest_int('num_leaves', 20, 150)
    min_child_samples = trial.suggest_int('min_child_samples', 10, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True)  # L2 regularization

    # Log trial parameters
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'num_leaves': num_leaves,
        'min_child_samples': min_child_samples,
        'colsample_bytree': colsample_bytree,
        'subsample': subsample,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda
    }

    # Create LightGBM model
    model = LGBMClassifier(n_estimators=n_estimators,
                           learning_rate=learning_rate,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           colsample_bytree=colsample_bytree,
                           subsample=subsample,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda,
                           random_state=42)

    # Log each trial as a separate run in MLflow
    accuracy = log_mlflow("LightGBM", model, X_train, X_test, y_train, y_test, params, trial.number)

    return accuracy

In [11]:
# Step 7: Run Optuna for LightGBM, log the best model, and plot the importance of each parameter
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=100)  # Increased to 100 trials

    # Get the best parameters
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'],
                                learning_rate=best_params['learning_rate'],
                                max_depth=best_params['max_depth'],
                                num_leaves=best_params['num_leaves'],
                                min_child_samples=best_params['min_child_samples'],
                                colsample_bytree=best_params['colsample_bytree'],
                                subsample=best_params['subsample'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                random_state=42)

    # Log the best model with MLflow and print the classification report
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test, best_params, "Best")

    # Plot parameter importance
    optuna.visualization.plot_param_importances(study).show()

    # Plot optimization history
    optuna.visualization.plot_optimization_history(study).show()

In [None]:
# Run the experiment for LightGBM
run_optuna_experiment()

[I 2025-08-14 11:50:08,895] A new study created in memory with name: no-name-2a141b6e-8d69-4308-b17b-118bfaf74961


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.212979 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
  return method()
[I 2025-08-14 11:51:18,287] Trial 0 finished with value: 0.7883111392940182 and parameters: {'n_estimators': 434, 'learning_rate': 0.01806465994949989, 'max_depth': 11, 'num_leaves': 145, 'min_child_samples': 37, 'colsample_bytree': 0.9794869881488077, 'subsample': 0.609973916619686, 'reg_alpha': 0.17231918099435453, 'reg_lambda': 0.00013754866666261013}. Best is trial 0 with value: 0.7883111392940182.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.215342 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98657
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 952
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 11:52:50,633] Trial 1 finished with value: 0.7696047347283872 and parameters: {'n_estimators': 559, 'learning_rate': 0.007194521304883487, 'max_depth': 14, 'num_leaves': 69, 'min_child_samples': 91, 'colsample_bytree': 0.5820960209153125, 'subsample': 0.6516651952689638, 'reg_alpha': 0.36843977293504876, 'reg_lambda': 0.06839048986845205}. Best is trial 0 with value: 0.7883111392940182.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.193410 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98365
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 943
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 11:55:06,596] Trial 2 finished with value: 0.7170788416825196 and parameters: {'n_estimators': 976, 'learning_rate': 0.0004939852126190782, 'max_depth': 13, 'num_leaves': 37, 'min_child_samples': 99, 'colsample_bytree': 0.5096935894804735, 'subsample': 0.6108582274386365, 'reg_alpha': 0.00010267911007910089, 'reg_lambda': 0.11194042134148639}. Best is trial 0 with value: 0.7883111392940182.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.209915 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98821
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 11:57:07,144] Trial 3 finished with value: 0.6522933840625661 and parameters: {'n_estimators': 973, 'learning_rate': 0.00019851670098664997, 'max_depth': 6, 'num_leaves': 41, 'min_child_samples': 64, 'colsample_bytree': 0.5297790907016906, 'subsample': 0.9307048690581547, 'reg_alpha': 0.00042371061814617617, 'reg_lambda': 0.03490709914889094}. Best is trial 0 with value: 0.7883111392940182.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.202588 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 11:57:49,799] Trial 4 finished with value: 0.6331642358909322 and parameters: {'n_estimators': 321, 'learning_rate': 0.004506578705507943, 'max_depth': 4, 'num_leaves': 146, 'min_child_samples': 75, 'colsample_bytree': 0.9957553537165384, 'subsample': 0.5511784237978077, 'reg_alpha': 0.00018020859411256828, 'reg_lambda': 0.008962650617070595}. Best is trial 0 with value: 0.7883111392940182.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.200518 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98432
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 945
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 11:58:51,265] Trial 5 finished with value: 0.8075459733671528 and parameters: {'n_estimators': 449, 'learning_rate': 0.0414305965508246, 'max_depth': 14, 'num_leaves': 95, 'min_child_samples': 96, 'colsample_bytree': 0.681044158521872, 'subsample': 0.7691046926478711, 'reg_alpha': 0.5607123091321106, 'reg_lambda': 0.0054013901716484375}. Best is trial 5 with value: 0.8075459733671528.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.223035 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98863
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 11:59:44,317] Trial 6 finished with value: 0.5802155992390615 and parameters: {'n_estimators': 583, 'learning_rate': 0.0004270582796809633, 'max_depth': 4, 'num_leaves': 102, 'min_child_samples': 58, 'colsample_bytree': 0.9339733597515756, 'subsample': 0.6238431594364049, 'reg_alpha': 0.009928931909911402, 'reg_lambda': 0.10457775867741381}. Best is trial 5 with value: 0.8075459733671528.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.339344 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:00:29,245] Trial 7 finished with value: 0.6436271401395054 and parameters: {'n_estimators': 130, 'learning_rate': 0.0013007492440229927, 'max_depth': 9, 'num_leaves': 50, 'min_child_samples': 42, 'colsample_bytree': 0.883721593578684, 'subsample': 0.8835752265935395, 'reg_alpha': 0.0003546681296807654, 'reg_lambda': 0.0014587489794938625}. Best is trial 5 with value: 0.8075459733671528.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.205619 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98843
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:01:29,766] Trial 8 finished with value: 0.6613823715916297 and parameters: {'n_estimators': 433, 'learning_rate': 0.0002094390409488262, 'max_depth': 9, 'num_leaves': 33, 'min_child_samples': 59, 'colsample_bytree': 0.66821996565563, 'subsample': 0.9264414469715546, 'reg_alpha': 0.21818538137337326, 'reg_lambda': 0.0003005976632305149}. Best is trial 5 with value: 0.8075459733671528.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.204566 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:03:25,229] Trial 9 finished with value: 0.6337983512999366 and parameters: {'n_estimators': 988, 'learning_rate': 0.0005245553429436737, 'max_depth': 5, 'num_leaves': 62, 'min_child_samples': 50, 'colsample_bytree': 0.5654148487889397, 'subsample': 0.7469614656458047, 'reg_alpha': 0.002879617774687576, 'reg_lambda': 0.0003828716761995922}. Best is trial 5 with value: 0.8075459733671528.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.203304 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99052
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 976
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:04:48,350] Trial 10 finished with value: 0.8110336081166772 and parameters: {'n_estimators': 745, 'learning_rate': 0.07904903180377824, 'max_depth': 15, 'num_leaves': 103, 'min_child_samples': 16, 'colsample_bytree': 0.7874441645208377, 'subsample': 0.8080039981246386, 'reg_alpha': 9.658315345575419, 'reg_lambda': 2.7942477139834496}. Best is trial 10 with value: 0.8110336081166772.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.204396 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99069
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 979
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:06:24,182] Trial 11 finished with value: 0.8099767491016698 and parameters: {'n_estimators': 753, 'learning_rate': 0.0690855477300564, 'max_depth': 15, 'num_leaves': 108, 'min_child_samples': 13, 'colsample_bytree': 0.7851213009562369, 'subsample': 0.7979254207074218, 'reg_alpha': 9.178926032473406, 'reg_lambda': 9.451445362548728}. Best is trial 10 with value: 0.8110336081166772.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.209555 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99089
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 983
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:08:10,198] Trial 12 finished with value: 0.8168463326992179 and parameters: {'n_estimators': 751, 'learning_rate': 0.07371907325206085, 'max_depth': 15, 'num_leaves': 114, 'min_child_samples': 12, 'colsample_bytree': 0.8151231276594769, 'subsample': 0.8085772762910193, 'reg_alpha': 6.637735818697181, 'reg_lambda': 9.270747027588252}. Best is trial 12 with value: 0.8168463326992179.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.202440 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99111
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:10:04,154] Trial 13 finished with value: 0.7956034664975692 and parameters: {'n_estimators': 772, 'learning_rate': 0.019418244340182276, 'max_depth': 12, 'num_leaves': 121, 'min_child_samples': 10, 'colsample_bytree': 0.8370151320836337, 'subsample': 0.859136252397956, 'reg_alpha': 7.992798860805842, 'reg_lambda': 9.839516659078141}. Best is trial 12 with value: 0.8168463326992179.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.211221 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98983
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:11:22,187] Trial 14 finished with value: 0.8201225956457409 and parameters: {'n_estimators': 767, 'learning_rate': 0.09597991124592449, 'max_depth': 10, 'num_leaves': 126, 'min_child_samples': 29, 'colsample_bytree': 0.7520325660818656, 'subsample': 0.9986803117074392, 'reg_alpha': 2.707996867428431, 'reg_lambda': 0.9701735118933587}. Best is trial 14 with value: 0.8201225956457409.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.200623 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98993
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


  X = _LGBMValidateData(
[I 2025-08-14 12:12:37,186] Trial 15 finished with value: 0.8002536461636017 and parameters: {'n_estimators': 809, 'learning_rate': 0.021506215232949717, 'max_depth': 8, 'num_leaves': 129, 'min_child_samples': 27, 'colsample_bytree': 0.7067076888928294, 'subsample': 0.6900181454792309, 'reg_alpha': 1.6059928497704867, 'reg_lambda': 0.7417229312934739}. Best is trial 14 with value: 0.8201225956457409.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.218750 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98993
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


Exception ignored on calling ctypes callback function: <function _log_callback at 0x7c38909e6fc0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/lightgbm/basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
    
KeyboardInterrupt: 


No further splits with positive gain, best gain: -inf


In [None]:
best_model