#### Initial Setup

In [1]:
import os

In [2]:
%pwd

'E:\\RajaRajeshwari\\MyFolders\\Projects\\mlops_water_potability_prediction\\notebooks'

In [3]:
os.chdir("../")

In [4]:
%pwd

'E:\\RajaRajeshwari\\MyFolders\\Projects\\mlops_water_potability_prediction'

In [5]:
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/rajarajeshwarir2021/MLOPS-Water-Potability-prediction.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"]="rajarajeshwarir2021"
os.environ["MLFLOW_TRACKING_PASSWORD"]="9bcd728f7628e9db2e294588d97d9589df5ea9b1"

#### Entity

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    metric_file_name: str
    parameters: dict
    target_column: str
    mlflow_uri: str    

#### Configuration

In [7]:
from src.mlops_water_potability_prediction_project.constants import *
from src.mlops_water_potability_prediction_project.utilities.helpers import read_yaml, create_directories, save_json

class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH, schema_filepath=SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories(directories_path_list=[self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.CatBoost
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path=config.model_path,
            metric_file_name=config.metric_file_name,
            parameters=params,
            target_column=schema.name,
            mlflow_uri="https://dagshub.com/rajarajeshwarir2021/MLOPS-Water-Potability-prediction.mlflow"
        )

        return model_evaluation_config

#### Component

In [10]:
import joblib
import mlflow
import mlflow.sklearn
import numpy as np
import os
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix
from urllib.parse import urlparse

from src.mlops_water_potability_prediction_project import logger


class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def evaluate(self):
        try:
            test_df = pd.read_csv(self.config.test_data_path)
            model = joblib.load(self.config.model_path)

            X_test = test_df.drop([self.config.target_column], axis=1)
            y_test = test_df[[self.config.target_column]]

            mlflow.set_registry_uri(self.config.mlflow_uri)
            tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

            with mlflow.start_run():
                y_pred = model.predict(test_df)

                accuracy, conf_matrix = ModelEvaluation.evaluate_metrics(y_test, y_pred)

                # Save metrics locally
                score = { "Accuracy": accuracy,
                         "True_Positive": int(conf_matrix[0][0]),
                         "True_Negative": int(conf_matrix[1][1]),
                         "False_Positive": int(conf_matrix[0][1]),
                         "False_Negative": int(conf_matrix[1][0])
                        }
                metric_file_path = Path(os.path.join(self.config.root_dir, self.config.metric_file_name))
                save_json(metric_file_path, score)

                # Log parameters and metrics
                mlflow.log_params(self.config.parameters)
                mlflow.log_metrics(score)

            if tracking_url_type_store != "file":
                mlflow.sklearn.log_model(model, "model", registered_model_name="CatBoostModel")
            else:
                mlflow.sklearn.log_model(model, "model")

            logger.info("Evaluate the model")
        except Exception as e:
            raise e

    @staticmethod
    def evaluate_metrics(actual, predict):
        accuracy = round(accuracy_score(actual, predict), 2)
        conf_matrix = confusion_matrix(actual, predict)
        return accuracy, conf_matrix        

#### Pipeline

In [11]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
    model_evaluation_config.evaluate()
except Exception as e:
    raise e

[2024-01-29 14:55:36,997]: INFO: helpers: YAML file: config\config.yaml loaded successfully]
[2024-01-29 14:55:36,999]: INFO: helpers: YAML file: params.yaml loaded successfully]
[2024-01-29 14:55:37,000]: INFO: helpers: YAML file: schema.yaml loaded successfully]
[2024-01-29 14:55:37,001]: INFO: helpers: Created directory at: artifacts]
[2024-01-29 14:55:37,002]: INFO: helpers: Created directory at: artifacts/model_evaluation]
[2024-01-29 14:55:37,453]: INFO: helpers: JSON file saved at: artifacts\model_evaluation\metrics.json]


Registered model 'CatBoostModel' already exists. Creating a new version of this model...
2024/01/29 14:55:44 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: CatBoostModel, version 6


[2024-01-29 14:55:44,333]: INFO: 2019962352: Evaluate the model]


Created version '6' of model 'CatBoostModel'.
