In [5]:
import os

# Test model training

### Model training config entity

In [13]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_models_path: Path
    training_data_path: Path

### update config manager class

In [4]:
from src.dopro2_HEFTcom_challenge.constants import PARAMS_FILE_PATH, CONFIG_FILE_PATH
import yaml
from loguru import logger

In [15]:
class ConfigurationManager:
    """Class to manage all configurations."""

    def __init__(
        self,
        config_filepath: Path = CONFIG_FILE_PATH,
        params_filepath: Path = PARAMS_FILE_PATH
    ) -> None:
        """
        Constructor for ConfigurationManager Class.
        Creates artifacts folder.

        :param config_filepath: Path to config.yaml file
        :param params_filepath: Path to params.yaml file

        """
        with config_filepath.open("r") as f:
            self.config: dict = yaml.safe_load(f)

        with params_filepath.open("r") as f:
            self.params: dict = yaml.safe_load(f)

        os.makedirs(self.config["artifacts_root"], exist_ok=True)
        logger.info("created directory at: {}", self.config["artifacts_root"])

    def get_training_config(self) -> TrainingConfig:
        """
        Get all config params and create folder in artifacts dir.
        
        :return: values from config.yaml
        :rtype: TrainingConfig
        """
        config = self.config["training"]
        params = self.params

        os.makedirs(config["root_dir"], exist_ok=True)
        logger.info("created directory at: {}", config["root_dir"])

        training_config = TrainingConfig(
            root_dir=config["root_dir"],
            trained_models_path=config["trained_models_path"],
            training_data_path=config["training_data_path"]
        )

        return training_config


### create training component

In [8]:
import statsmodels.formula.api as smf
from statsmodels.iolib.smpickle import load_pickle
import pandas as pd

In [18]:
class Training:
    """Class to performe the model training."""

    def __init__(self, config: TrainingConfig) -> None:
        """
        Constructor for Training class.

        :param config: config values from config.yaml
        """

        self.config = config

    @staticmethod
    def save_models(forecast_models: dict, path: Path) -> None:
        os.makedirs(path, exist_ok=True)
        logger.info("created directory at: {}", path)

        for quantile in range(10,100,10):
            forecast_models[f"q{quantile}"].save(f"{path}/model_q{quantile}.pickle")
        logger.info("saved all models in at {}", path)

    def train(self) -> None:
        logger.info("Loading trainind data from {}", self.config.training_data_path)
        training_data = pd.read_parquet(self.config.training_data_path)
        model = smf.quantreg(
            formula='total_generation_MWh ~ bs(SolarDownwardRadiation,df=5) + bs(WindSpeed,df=8)',
            data=training_data
        )

        logger.info("Start model training")
        forecast_models = dict()
        for quantile in range(10,100,10):
            forecast_models[f"q{quantile}"] = model.fit(q=quantile/100,max_iter=2500)
            training_data[f"q{quantile}"] = forecast_models[f"q{quantile}"].predict(training_data)
            training_data.loc[training_data[f"q{quantile}"] < 0, f"q{quantile}"] = 0
        logger.info("Model trained")

        self.save_models(forecast_models, self.config.trained_models_path)

In [19]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.train()
except Exception as e:
    raise e

[32m2024-10-10 17:54:40.474[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m24[0m - [1mcreated directory at: artifacts[0m
[32m2024-10-10 17:54:40.475[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_training_config[0m:[36m37[0m - [1mcreated directory at: artifacts/training[0m
[32m2024-10-10 17:54:40.478[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m23[0m - [1mLoading trainind data from artifacts/prepared_data/model_data.parquet[0m
[32m2024-10-10 17:54:42.362[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m30[0m - [1mStart model training[0m
[32m2024-10-10 18:07:59.862[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m36[0m - [1mModel trained[0m
[32m2024-10-10 18:07:59.865[0m | [1mINFO    [0m | [36m__main__[0m:[36msave_models[0m:[36m16[0m - [1mcreated directory at: artifacts/training/models[0m
[32m2024-10-10 18:08:19.392[0m | [1mINFO    [0m | [36m__main__[0m:[36msave_models[0m:[36