In [1]:
import os

In [2]:
%pwd

'c:\\Users\\omarh\\OneDrive\\Desktop\\To do\\Projects\\SalesForecast\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\omarh\\OneDrive\\Desktop\\To do\\Projects\\SalesForecast'

In [11]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_file: Path
    test_file: Path
    model_file: Path
    iterations: int
    learning_rate: float
    depth: int
    loss_function: str
    early_stopping_rounds: int
    verbose: int

In [12]:
from salesRegressor.constants import *
from salesRegressor.utils.common import read_yaml, create_directories

In [13]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.CatBoostParams

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_file=config.train_file,
            test_file=config.test_file,
            model_file=config.model_file,
            iterations=params.iterations,
            learning_rate=params.learning_rate,
            depth=params.depth,
            loss_function=params.loss_function,
            early_stopping_rounds=params.early_stopping_rounds,
            verbose=params.verbose
            )
        
        return model_trainer_config

In [14]:
import pandas as pd
from catboost import CatBoostRegressor, Pool
from salesRegressor import logger

In [15]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):

        train_df = pd.read_csv(self.config.train_file, low_memory=False)
        test_df = pd.read_csv(self.config.test_file, low_memory=False)

        y_train = train_df['Sales']
        X_train = train_df.drop(['Sales', 'Date'], axis=1)

        y_test = test_df['Sales']
        X_test = test_df.drop(['Sales', 'Date'], axis=1)

        cat_features = [col for col in X_train.columns if X_train[col].dtype == 'object' or "StoreType" in col 
                        or "Assortment" in col]

        logger.info(f"Categorical features: {cat_features}")

        train_pool = Pool(data=X_train, label=y_train, cat_features=cat_features)
        test_pool = Pool(data=X_test, label=y_test, cat_features=cat_features)

        model = CatBoostRegressor(
            iterations=self.config.iterations,
            learning_rate=self.config.learning_rate,
            depth=self.config.depth,
            loss_function=self.config.loss_function,
            verbose=self.config.verbose
        )

        logger.info("Training CatBoost model...")
        model.fit(train_pool, eval_set=test_pool, early_stopping_rounds=self.config.early_stopping_rounds)

        model.save_model(self.config.model_file)
        logger.info(f"Model saved to: {self.config.model_file}")

        return model

In [16]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()

    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()

    logger.info("Model training completed successfully.")

except Exception as e:
    raise e

[2025-12-01 22:30:45,916: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-12-01 22:30:45,920: INFO: common: yaml file: params.yaml loaded successfully]
[2025-12-01 22:30:45,922: INFO: common: created directory at: artifacts]
[2025-12-01 22:30:45,924: INFO: common: created directory at: artifacts/model_trainer]
[2025-12-01 22:30:55,724: INFO: 1964197463: Categorical features: ['StateHoliday', 'StoreType', 'Assortment', 'PromoInterval']]
[2025-12-01 22:30:57,884: INFO: 1964197463: Training CatBoost model...]
0:	learn: 0.4083058	test: 0.3949552	best: 0.3949552 (0)	total: 371ms	remaining: 6m 10s
100:	learn: 0.0734092	test: 0.0730503	best: 0.0730503 (100)	total: 33.1s	remaining: 4m 54s
200:	learn: 0.0650103	test: 0.0669087	best: 0.0669087 (200)	total: 1m 5s	remaining: 4m 21s
300:	learn: 0.0624186	test: 0.0647241	best: 0.0647241 (300)	total: 1m 38s	remaining: 3m 49s
400:	learn: 0.0609056	test: 0.0633761	best: 0.0633761 (400)	total: 2m 12s	remaining: 3m 18s
500:	learn: 