In [1]:
import sys
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import importlib

sys.path.append(os.path.abspath(".."))

import src.models.xgb.xgb_cv_trainer as cv
import src.utils.optuna_visualizer as opv
import src.utils.telegram as te

In [2]:
# Load data
env_path = Path.cwd().parent / ".env"
load_dotenv(dotenv_path=env_path)
url = os.environ.get("OPTUNA_STORAGE_URL")

tr_df1 = pd.read_parquet("../artifacts/features/base/tr_df1.parquet")
test_df1 = pd.read_parquet("../artifacts/features/base/test_df1.parquet")

In [None]:
# Check tuning results
study_name = "l1_xgb_v1"

study = opv.OptunaVisualizer(study_name, url)
study.visualize_optimization()
study.print_trials_table()

In [None]:
# Create OOF and test predictions
importlib.reload(cv)
params = {
    "learning_rate": 0.016339963844430296,
    "max_depth": 12,
    "min_child_weight": 47.74820951880311,
    "colsample_bytree": 1.0,
    "subsample": 0.7931390750752597,
    "reg_alpha": 2.556308808893024,
    "reg_lambda": 0.008220020184828742
}

trainer = cv.XGBCVTrainer(
    params=params,
    early_stopping_rounds=500
)
oof, test_preds = trainer.fit(tr_df1, test_df1)

np.save("../artifacts/preds/base/oof_single_1.npy", oof)
np.save("../artifacts/preds/base/test_single_1.npy", test_preds)

In [None]:
te.send_telegram_message("XGB Training Complete!")

In [None]:
# Full training
params = {
    "learning_rate": 0.016339963844430296,
    "max_depth": 12,
    "min_child_weight": 47.74820951880311,
    "colsample_bytree": 1.0,
    "subsample": 0.7931390750752597,
    "reg_alpha": 2.556308808893024,
    "reg_lambda": 0.008220020184828742
}
trainer = cv.XGBCVTrainer(params=params)
trainer.full_train(tr_df1, test_df1, 1000, 1)