# Model evaluation with MLFlow

In [52]:
from pathlib import Path
import os
import pandas as pd
import json

In [1]:
%pwd

'c:\\Tom\\HKA\\7_Semester\\Domänenprojekt_2\\DoPro'

In [56]:
import dagshub
dagshub.init(repo_owner='tombeihofer23', repo_name='DoPro2', mlflow=True)

import mlflow
with mlflow.start_run():
  mlflow.log_param('parameter name', 'value')
  mlflow.log_metric('metric name', 1)



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=42378371-53a5-45fd-b93b-8bcf72a0dafd&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=9cc217f904b651bc66a09839b5ffeaf044bbdff845cc2c38404532f9da69bd71




2024/10/11 15:12:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run youthful-ram-521 at: https://dagshub.com/tombeihofer23/DoPro2.mlflow/#/experiments/0/runs/1a7b5c778f4b4f31985fc8f6b7d79311.
2024/10/11 15:12:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/tombeihofer23/DoPro2.mlflow/#/experiments/0.


In [37]:
import statsmodels.formula.api as smf
from statsmodels.regression.quantile_regression import QuantRegResults
from statsmodels.base.model import Results

### Model evaluation config entity

In [18]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EvaluationConfig:
    """Entity-Class for data evaluation config params."""

    path_to_models: Path
    """Directory in which models are stored."""

    training_data_path: Path
    """Directory where training data is stored."""

    all_params: dict
    """Model parameters."""

    mlflow_uri: str
    """URL to MLFlow dashboard."""

In [19]:
from src.dopro2_HEFTcom_challenge.constants import PARAMS_FILE_PATH, CONFIG_FILE_PATH
import yaml
from loguru import logger

In [20]:
class ConfigurationManager:
    """Class to manage all configurations."""

    def __init__(
        self,
        config_filepath: Path = CONFIG_FILE_PATH,
        params_filepath: Path = PARAMS_FILE_PATH
    ) -> None:
        """
        Constructor for ConfigurationManager Class.
        Creates artifacts folder.

        :param config_filepath: Path to config.yaml file
        :param params_filepath: Path to params.yaml file

        """
        with config_filepath.open("r") as f:
            self.config: dict = yaml.safe_load(f)

        with params_filepath.open("r") as f:
            self.params: dict = yaml.safe_load(f)

        os.makedirs(self.config["artifacts_root"], exist_ok=True)
        logger.info("created directory at: {}", self.config["artifacts_root"])

    def get_evalution_config(self) -> EvaluationConfig:
        """
        Get all config params and create folder in artifacts dir.
        
        :return: values from config.yaml
        :rtype: EvaluationConfig
        """

        config = self.config["evaluation"]

        evaluation_config = EvaluationConfig(
            path_to_models=config["path_to_models"],
            training_data_path=config["training_data_path"],
            all_params=self.params,
            mlflow_uri=config["mlflow_uri"]
        )

        return evaluation_config

### Evaluation component

In [57]:
from urllib.parse import urlparse

In [58]:
class Evaluation:
    """Class to evaluate the model."""

    def __init__(self, config: EvaluationConfig) -> None:
        """
        Constructor for Evaluation class.

        :param config: config values from config.yaml
        """

        self.config = config

    @staticmethod
    def load_models(path: Path) -> list[Results]:
        model_files = Path(path).glob("*.pickle")
        models = []
        for file in model_files:
            models.append(QuantRegResults.load(file))
        return models
    
    @staticmethod
    def pinball_score(df):
        def pinball(y,q,alpha):
            return (y-q)*alpha*(y>=q) + (q-y)*(1-alpha)*(y<q)
        
        score = list()
        for qu in range(10,100,10):
            score.append(pinball(y=df["total_generation_MWh"],
                q=df[f"q{qu}"],
                alpha=qu/100).mean())
        return sum(score)/len(score)
    
    def make_predictions(self):
        self.models = self.load_models(self.config.path_to_models)
        test_data = pd.read_parquet(self.config.training_data_path).iloc[400000:] # nur jetzt zum testen mit iloc
        logger.info("Start making predictions on the trained models.")
        for i, model in enumerate(self.models):
            test_data = test_data.copy()
            test_data[f"q{(i+1)*10}"] = model.predict(test_data)
            test_data.loc[test_data[f"q{(i+1)*10}"] < 0, f"q{(i+1)*10}"] = 0
        self.predictions = test_data[["total_generation_MWh", 
                                      "q10", "q20", "q30", "q40", "q50", 
                                      "q60", "q70", "q80", "q90"]]
        logger.info("Made predictions on the trained models.")
        
    def evaluation(self):
        logger.info("Calculate the pinball score on the predictions.")
        self.score = self.pinball_score(self.predictions)
        with open("score.txt", "w") as f:
            f.write(f"Pinball Score: {self.score}")
        logger.info("Score file saved at: score.txt")

    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics(
                {"pinball score": self.score}
            )
            if tracking_url_type_store != "file":
                for i, model in enumerate(self.models):
                    mlflow.statsmodels.log_model(model, "model", registered_model_name=f"q{(i+1)*10}")
            else:
                for i, model in enumerate(self.models):
                    mlflow.statsmodels.log_model(model, "model")

In [60]:
try:
    config = ConfigurationManager()
    eval_config = config.get_evalution_config()
    evaluation = Evaluation(config=eval_config)
    evaluation.make_predictions()
    evaluation.evaluation()
    evaluation.log_into_mlflow()
except Exception as e:
    raise e

[32m2024-10-11 16:51:11.100[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m24[0m - [1mcreated directory at: artifacts[0m


[32m2024-10-11 16:51:26.493[0m | [1mINFO    [0m | [36m__main__[0m:[36mmake_predictions[0m:[36m36[0m - [1mStart making predictions on the trained models.[0m
[32m2024-10-11 16:51:27.369[0m | [1mINFO    [0m | [36m__main__[0m:[36mmake_predictions[0m:[36m44[0m - [1mMade predictions on the trained models.[0m
[32m2024-10-11 16:51:27.372[0m | [1mINFO    [0m | [36m__main__[0m:[36mevaluation[0m:[36m47[0m - [1mCalculate the pinball score on the predictions.[0m
[32m2024-10-11 16:51:27.388[0m | [1mINFO    [0m | [36m__main__[0m:[36mevaluation[0m:[36m51[0m - [1mScore file saved at: score.txt[0m
Successfully registered model 'q10'.
2024/10/11 16:54:32 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: q10, version 1
Created version '1' of model 'q10'.
Successfully registered model 'q20'.
2024/10/11 16:57:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds 