In [1]:
%pwd

'/home/omar/Desktop/End-to-end-Machine-Learning-Project-with-MLflow/research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'/home/omar/Desktop/End-to-end-Machine-Learning-Project-with-MLflow'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    parms: dict
    metric_file_name: Path
    target_column: str
    mlflow_uri: str

In [5]:
from src.ML.constants import *
from ML.utils.common import read_yaml, create_directories, save_json

In [6]:
class ConfigurationManager:
    def __init__(self,
                config_filepath=CONFIG_FILE_PATH,
                params_filepath=PARAMS_FILE_PATH,
                schema_filepath=SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        parms = {model_name: self.params[model_name] for model_name in self.params.keys()}
        target_column = list(self.schema.TARGET_COLUMN.keys())[0]


        create_directories([config.root_dir])

        model_evaluation_config= ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path = config.model_path,
            parms=parms,
            metric_file_name=config.metric_file_name,
            target_column = target_column,
            mlflow_uri="https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow",
        )

        return model_evaluation_config
         

In [7]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow
import mlflow.sklearn
import numpy as np
import joblib
from src.ML import logger
import dagshub


In [8]:

class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
        dagshub.init(repo_owner='omaar25', repo_name='End-to-end-Machine-Learning-Project-with-MLflow', mlflow=True)

    def load_test_data(self):
        """Load the test data and split into features and target."""
        self.test_data = pd.read_csv(self.config.test_data_path)
        self.X_test = self.test_data.drop(columns=[self.config.target_column])
        self.y_test = self.test_data[self.config.target_column]
        logger.info("Test data loaded successfully")

    def evaluate_model(self, model):
        """Evaluate a single model and return its metrics as a dictionary."""
        y_pred = model.predict(self.X_test)
        accuracy = accuracy_score(self.y_test, y_pred)
        precision = precision_score(self.y_test, y_pred, average='weighted')
        recall = recall_score(self.y_test, y_pred, average='weighted')
        f1 = f1_score(self.y_test, y_pred, average='weighted')

        return {
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1 Score": f1
        }

    def get_model_params(self, model_name):
        """Retrieve model-specific parameters from the config."""
        return self.config.parms.get(model_name, {})

    def log_to_mlflow(self, model_name, metrics):
        """Log model metrics and parameters to MLflow."""
        with mlflow.start_run(run_name=model_name):
            model_params = self.get_model_params(model_name)
            for param, value in model_params.items():
                mlflow.log_param(param, value)
                
            for metric_name, metric_value in metrics.items():
                try:
                    mlflow.log_metric(metric_name, float(metric_value))
                except ValueError:
                    logger.error(f"Metric {metric_name} with value {metric_value} is not numeric.")
                    
            logger.info(f"Logged {model_name} metrics to MLflow")
    
    def evaluate_best_model(self):
        """Evaluate all models, log each to MLflow, and save the best model locally."""
        model_files = [os.path.join(self.config.model_path, model_file) for model_file in os.listdir(self.config.model_path)]
        metrics_list = []
        best_model = None
        best_model_name = ""
        best_f1_score = 0

        for model_path in model_files:
            model_name = os.path.basename(model_path).split('.')[0]
            model = joblib.load(model_path)
            logger.info(f"Evaluating model: {model_name}")
            metrics = self.evaluate_model(model)
            metrics['Model'] = model_name
            metrics_list.append(metrics)
            self.log_to_mlflow(model_name, metrics)

            if metrics["F1 Score"] > best_f1_score:
                best_f1_score = metrics["F1 Score"]
                best_model = model
                best_model_name = model_name
                model_artifact_path = os.path.join(self.config.root_dir, f'{model_name}.joblib')
        joblib.dump(best_model, model_artifact_path)
        logger.info(f"Best model is {best_model_name}")
        save_json(Path(self.config.metric_file_name), {"metrics": metrics_list})


In [9]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.load_test_data()
    model_evaluation.evaluate_best_model()  # Change this line to evaluate only the best model
except Exception as e:
    logger.error(f"Error during model evaluation: {e}")
    raise e


[2024-10-26 15:46:01,880: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-10-26 15:46:01,884: INFO: common: yaml file: params.yaml loaded successfully]
[2024-10-26 15:46:01,886: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-10-26 15:46:01,888: INFO: common: created directory at: artifacts]
[2024-10-26 15:46:01,890: INFO: common: created directory at: artifacts/model_evaluation]
[2024-10-26 15:46:02,994: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2024-10-26 15:46:03,010: INFO: helpers: Accessing as omaar25]
[2024-10-26 15:46:03,690: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/repos/omaar25/End-to-end-Machine-Learning-Project-with-MLflow "HTTP/1.1 200 OK"]


[2024-10-26 15:46:03,700: INFO: helpers: Initialized MLflow to track repo "omaar25/End-to-end-Machine-Learning-Project-with-MLflow"]


[2024-10-26 15:46:03,705: INFO: helpers: Repository omaar25/End-to-end-Machine-Learning-Project-with-MLflow initialized!]
[2024-10-26 15:46:03,838: INFO: 555488300: Test data loaded successfully]
[2024-10-26 15:46:04,008: INFO: 555488300: Evaluating model: Random_Forest]
[2024-10-26 15:46:07,031: ERROR: 555488300: Metric Model with value Random_Forest is not numeric.]
[2024-10-26 15:46:07,033: INFO: 555488300: Logged Random_Forest metrics to MLflow]


2024/10/26 15:46:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random_Forest at: https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow/#/experiments/0/runs/57a46ea4f6384741905b76d51181268d.
2024/10/26 15:46:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow/#/experiments/0.


[2024-10-26 15:46:07,774: INFO: 555488300: Evaluating model: Logistic_Regression]
[2024-10-26 15:46:10,204: ERROR: 555488300: Metric Model with value Logistic_Regression is not numeric.]
[2024-10-26 15:46:10,206: INFO: 555488300: Logged Logistic_Regression metrics to MLflow]


2024/10/26 15:46:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run Logistic_Regression at: https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow/#/experiments/0/runs/c3ad3c1abd5f469d9fbe1c1bd87dd846.
2024/10/26 15:46:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow/#/experiments/0.


[2024-10-26 15:46:11,079: INFO: 555488300: Evaluating model: SVM]
[2024-10-26 15:46:40,945: ERROR: 555488300: Metric Model with value SVM is not numeric.]
[2024-10-26 15:46:40,947: INFO: 555488300: Logged SVM metrics to MLflow]


2024/10/26 15:46:41 INFO mlflow.tracking._tracking_service.client: 🏃 View run SVM at: https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow/#/experiments/0/runs/1933c589bcc1496e91dbbc130fc1ab25.
2024/10/26 15:46:41 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow/#/experiments/0.


[2024-10-26 15:46:41,665: INFO: 555488300: Evaluating model: Decision_Tree]
[2024-10-26 15:46:43,591: ERROR: 555488300: Metric Model with value Decision_Tree is not numeric.]
[2024-10-26 15:46:43,593: INFO: 555488300: Logged Decision_Tree metrics to MLflow]


2024/10/26 15:46:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run Decision_Tree at: https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow/#/experiments/0/runs/a5f78ee52d034dc4bf2e57ccd732bc15.
2024/10/26 15:46:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow/#/experiments/0.


[2024-10-26 15:46:44,331: INFO: common: json file saved at: artifacts/model_evaluation/metrics.json]
[2024-10-26 15:46:44,454: INFO: 555488300: Best model is Random_Forest]
