In [1]:
%pwd

'/home/omar/Desktop/End-to-end-Machine-Learning-Project-with-MLflow/research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'/home/omar/Desktop/End-to-end-Machine-Learning-Project-with-MLflow'

In [4]:
import os
os.environ["MLFLOW_URI"] = "https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow"
os.environ["MLFLOW_USERNAME"] = "omaar25"
os.environ["MLFLOW_PASSWORD"] = "2c50f4469577c59f93c5b3ead8bad346cd5f7785"

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path 
    parms: dict
    metric_file_name: Path
    target_column: str
    mlflow_uri: str

In [6]:
from src.ML.constants import *
from ML.utils.common import read_yaml, create_directories, save_json

In [7]:
class ConfigurationManager:
    def __init__(self,
                config_filepath=CONFIG_FILE_PATH,
                params_filepath=PARAMS_FILE_PATH,
                schema_filepath=SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        parms = self.params.LogisticRegression
        target_column = list(self.schema.TARGET_COLUMN.keys())[0]


        create_directories([config.root_dir])

        model_evaluation_config= ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path = config.model_path,
            parms=parms,
            metric_file_name=config.metric_file_name,
            target_column = target_column,
            mlflow_uri="https://dagshub.com/omaar25/End-to-end-Machine-Learning-Project-with-MLflow.mlflow",
        )

        return model_evaluation_config
         

In [8]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow
import mlflow.sklearn
import numpy as np
import joblib
from src.ML import logger


In [9]:

class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
        mlflow.set_tracking_uri(self.config.mlflow_uri)

    def load_test_data(self):
        """Load the test data."""
        self.test_data = pd.read_csv(self.config.test_data_path)
        self.X_test = self.test_data.drop([self.config.target_column], axis=1)
        self.y_test = self.test_data[self.config.target_column]
        logger.info("Test data loaded successfully")

    def evaluate_model(self, model):
        """Evaluate a single model and return its metrics."""
        y_pred = model.predict(self.X_test)
        accuracy = accuracy_score(self.y_test, y_pred)
        precision = precision_score(self.y_test, y_pred, average='macro')
        recall = recall_score(self.y_test, y_pred, average='macro')
        f1 = f1_score(self.y_test, y_pred, average='macro')
        
        return {
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1 Score": f1
        }

    def log_to_mlflow(self, model_name, metrics):
        """Log model parameters and metrics to MLflow."""
        with mlflow.start_run():
            mlflow.log_param("model_name", model_name)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(metric_name, metric_value)

            logger.info(f"Logged {model_name} metrics to MLflow")

    def evaluate_all_models(self):
        """Evaluate all models, save metrics, and log to MLflow."""
        model_files = [os.path.join(self.config.model_path, model_file) for model_file in os.listdir(self.config.model_path)]
        metrics_list = []
        best_model = None
        best_f1_score = 0

        for model_path in model_files:
            model_name = os.path.basename(model_path).split('.')[0]
            model = joblib.load(model_path)
            logger.info(f"Evaluating model: {model_name}")

            metrics = self.evaluate_model(model)
            metrics['Model'] = model_name
            metrics_list.append(metrics)

            self.log_to_mlflow(model_name, metrics)

            if metrics["F1 Score"] > best_f1_score:
                best_f1_score = metrics["F1 Score"]
                best_model = model

        save_json(self.config.metric_file_name, metrics_list)

        joblib.dump(best_model, os.path.join(self.config.root_dir, 'best_model.pkl'))
        logger.info("Best model saved successfully")


In [10]:
try:
    config= ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
    model_evaluation_config.load_test_data()
    model_evaluation_config.evaluate_all_models()
except Exception as e:
    raise e

[2024-10-25 23:16:03,895: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-10-25 23:16:03,905: INFO: common: yaml file: params.yaml loaded successfully]
[2024-10-25 23:16:03,913: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-10-25 23:16:03,917: INFO: common: created directory at: artifacts]
[2024-10-25 23:16:03,920: INFO: common: created directory at: artifacts/model_evaluation]
[2024-10-25 23:16:04,019: INFO: 1023339436: Test data loaded successfully]
[2024-10-25 23:16:04,287: INFO: 1023339436: Evaluating model: Random_Forest]


MlflowException: API request to endpoint /api/2.0/mlflow/runs/create failed with error code 403 != 200. Response body: ''