# Best practice: Set root directory to the root of the project


In [59]:
%load_ext autoreload
%autoreload 2
from constants import ROOT_DIR
import os
import sys
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)
os.chdir(ROOT_DIR)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load Imports and Configs

In [60]:
from configs.data_config import DataConfig
from configs.model_config import ModelConfig
from src.models.lgbm.train import TrainingEngine
from configs.lgbm_config import LGBMConfig
from torchmetrics.retrieval import RetrievalNormalizedDCG

import pandas as pd

data_config = DataConfig()
lgbm_config = LGBMConfig()

# Read Processed Features Data

In [61]:
train = pd.read_parquet(data_config.train.file_path)
val = pd.read_parquet(data_config.val.file_path)
test = pd.read_parquet(data_config.test.file_path)

# Training Engine

In [62]:
trainer = TrainingEngine(
    train,
    val,
    lgbm_config,
    scorer=RetrievalNormalizedDCG(top_k=5),
    scorer_name="ndcg_at_5"
)

# Default model benchmark

In [29]:
trainer.train_default_model()

# HyperParameter Tuning



In [63]:
trainer.hyperparameter_tuning()
trainer.best_model_results

[32m2025-05-04 21:21:43.345[0m | [1mINFO    [0m | [36msrc.models.lgbm.train[0m:[36mhyperparameter_tuning[0m:[36m8[0m - [1mExperiment 'LGBMRanker_Optimization' already exists.[0m
[I 2025-05-04 21:21:43,354] A new study created in memory with name: no-name-1b4bf9bc-3ce3-42cf-b058-a640e2f71b24
[32m2025-05-04 21:21:43.355[0m | [1mINFO    [0m | [36msrc.models.lgbm.train[0m:[36moptimize_optuna[0m:[36m195[0m - [1mOptimizing for 20 trials[0m
[I 2025-05-04 21:22:05,058] Trial 0 finished with value: 0.4914098381996155 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 31, 'max_depth': 7, 'learning_rate': 0.05439233114768742, 'min_split_gain': 0.023649554341539147, 'min_child_weight': 0.07462654854337827, 'min_child_samples': 81, 'subsample': 0.8009008355252296, 'subsample_freq': 0, 'colsample_bytree': 0.6441867562656824, 'reg_alpha': 5.6713028829205345e-06, 'reg_lambda': 1.254441414644514e-06, 'objective': 'rank_xendcg'}. Best is trial 0 with value: 0.491409838199615

Unnamed: 0,k,ndcg@k,recall@k,precision@k,map@k
0,1,0.232716,0.231987,0.231987,0.231987
1,2,0.366093,0.446801,0.220623,0.336616
2,3,0.458702,0.632997,0.208025,0.397559
3,4,0.523403,0.781818,0.193645,0.435185
4,5,0.572001,0.906397,0.180337,0.460606


# Train Best Model with full data

In [26]:
trainer.train_best_model()

# Save best and default model for benchmarking

In [30]:
trainer.save_best_model()
trainer.save_default_model()