In [2]:
import sys
import os
import numpy as np
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv
import importlib

sys.path.append(os.path.abspath(".."))

import src.models.lgbm.lgbm_cv_trainer as cv
import src.utils.optuna_visualizer as opv
import src.utils.telegram as te

In [4]:
# Load data
env_path = Path.cwd().parent / ".env"
load_dotenv(dotenv_path=env_path)
url = os.environ.get("OPTUNA_STORAGE_URL")

tr_df1 = pd.read_parquet("../artifacts/features/base/tr_df1.parquet")
test_df1 = pd.read_parquet("../artifacts/features/base/test_df1.parquet")

In [None]:
# Check tuning results
study_name = "lgbm_v1"

study = opv.OptunaVisualizer(study_name, url)
study.visualize_optimization()
study.print_trials_table()

In [None]:
# Create OOF and test predictions
importlib.reload(cv)
params = {
    "learning_rate": 0.02,
    "max_depth": 10,
    "num_leaves": 665,
    "min_child_samples": 897,
    "min_split_gain": 0.004136243817528799,
    "feature_fraction": 0.40560189158272997,
    "bagging_fraction": 0.7638109452667537,
    "bagging_freq": 7,
    "lambda_l1": 1.2710142539585863e-05,
    "lambda_l2": 4.212122094964176e-05
}

trainer = cv.LGBMCVTrainer(
    params=params,
    early_stopping_rounds=500
)
oof, test_preds = trainer.fit(tr_df1, test_df1)

np.save("../artifacts/preds/base/oof_single_1.npy", oof)
np.save("../artifacts/preds/base/test_single_1.npy", test_preds)

In [None]:
te.send_telegram_message("LGBM Training Complete!")

In [None]:
# Full training
params = {
    "learning_rate": 0.02,
    "max_depth": 10,
    "num_leaves": 665,
    "min_child_samples": 897,
    "min_split_gain": 0.004136243817528799,
    "feature_fraction": 0.40560189158272997,
    "bagging_fraction": 0.7638109452667537,
    "bagging_freq": 7,
    "lambda_l1": 1.2710142539585863e-05,
    "lambda_l2": 4.212122094964176e-05
}
trainer = cv.LGBMCVTrainer(params=params)
trainer.full_train(tr_df1, test_df1, 1000, 1)