In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv(override=True)

mlflow_tracking_uri = os.getenv('MLFLOW_TRACKING_URI')
dagsbuh_username = os.getenv('DAGSHUB_USERNAME')
dvc_secret_access_key = os.getenv('DVC_SECRET_ACCESS_KEY')

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\Courses\\Udemy\\Complete MLOps Bootcamp\\Projects\\7_End to End Projects\\end-to-end-data-science-project'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    all_params: dict
    metric_file_name: Path
    target_column: str
    mlflow_tracking_uri: str


In [6]:
from src.project.constants import *
from src.project.utils.common import read_yaml, create_directories, save_json

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self, model_type) -> ModelEvaluationConfig:
        config=self.config.model_evaluation
        
        if model_type == "RandomForest":
            params=self.params.RandomForestBest
        elif model_type == "XGBoost":
            params=self.params.XGBoostBest
        else:
            raise ValueError(f"Unknown model type: {self.config.model_type}")
        schema=self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config=ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path = config.model_path,
            all_params=params,
            metric_file_name = config.metric_file_name,
            target_column = schema.name,
            mlflow_tracking_uri=config.mlflow_tracking_uri
        )
        return model_evaluation_config


In [7]:
import os
import pandas as pd
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
import numpy as np
import joblib

In [9]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    
    def eval_metrics(self, actual, pred):
        
        """Calculate classification metrics for Forest Cover Type dataset"""
        accuracy = accuracy_score(actual, pred)
        precision = precision_score(actual, pred, average='weighted', zero_division=0)
        recall = recall_score(actual, pred, average='weighted', zero_division=0)
        
        f1 = f1_score(actual, pred, average='weighted', zero_division=0)
              
        return accuracy, precision, recall, f1

    def log_into_mlflow(self):
        """Log all models into MLflow"""
        mlflow.set_registry_uri(self.config.mlflow_tracking_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        
        # Find all model files in the model directory
        model_files = []
        model_dir = Path(self.config.model_path).parent
        
        # Look for different model files
        for file_path in model_dir.glob("*.joblib"):
            if "RandomForest" in file_path.name:
                model_files.append(("RandomForest", str(file_path)))
            elif "XGBoost" in file_path.name:
                model_files.append(("XGBoost", str(file_path)))
        
        # If no specific model files found, use the default path
        if not model_files:
            model_files = [("DefaultModel", self.config.model_path)]
        
        all_results = {}
        
        for model_name, model_path in model_files:
            print(f"\n>>> Evaluating {model_name} <<<")
            
            with mlflow.start_run(run_name=f"{model_name}_evaluation"):
                try:
                    # Load test data and model
                    test_data = pd.read_csv(self.config.test_data_path)
                    model = joblib.load(model_path)

                    test_x = test_data.drop([self.config.target_column], axis=1)
                    test_y = test_data[self.config.target_column]

                    # Make predictions
                    predictions = model.predict(test_x)
                    

                    # Calculate metrics
                    accuracy, precision, recall, f1 = self.eval_metrics(test_y, predictions)
                    
                    # Store results
                    metrics = {
                        "model_name": model_name,
                        "accuracy": accuracy,
                        "precision": precision,
                        "recall": recall,
                        "f1_score": f1,
                    }
                    
                    all_results[model_name] = metrics
                    
                    # Log parameters if available
                    if hasattr(self.config, 'all_params') and self.config.all_params:
                        mlflow.log_params(self.config.all_params)
                    
                    # Log metrics to MLflow
                    mlflow.log_metric("accuracy", accuracy)
                    mlflow.log_metric("precision", precision)
                    mlflow.log_metric("recall", recall)
                    mlflow.log_metric("f1_score", f1)
                    
                    
                    # Log classification report as artifact
                    class_report = classification_report(test_y, predictions, output_dict=True)
                    class_report_path = f"{model_name}_classification_report.json"
                    with open(class_report_path, 'w') as f:
                        json.dump(class_report, f, indent=2)
                    mlflow.log_artifact(class_report_path)
                    os.remove(class_report_path)  # Clean up
                    
                    # Log model to MLflow
                    if tracking_url_type_store != "file":
                        mlflow.sklearn.log_model(
                            model, 
                            "model", 
                            registered_model_name=f"{model_name}ForestCoverModel"
                        )
                    else:
                        mlflow.sklearn.log_model(model, "model")
                    
                    # Print results
                    print(f"{model_name} Results:")
                    print(f"  Accuracy: {accuracy:.4f}")
                    print(f"  Precision: {precision:.4f}")
                    print(f"  Recall: {recall:.4f}")
                    print(f"  F1-Score: {f1:.4f}")
                    
                except Exception as e:
                    print(f"Error evaluating {model_name}: {str(e)}")
                    continue
        
        # Save all results locally
        if all_results:
            results_path = Path(self.config.metric_file_name)
            save_json(path=results_path, data=all_results)
            
            # Print comparison summary
            self.print_comparison(all_results)
        
        return all_results
    
    

    


In [12]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.log_into_mlflow()
except Exception as e:
    raise e

[2025-09-06 20:44:49,424: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-09-06 20:44:49,521: INFO: common: yaml file: params.yaml loaded successfully]
[2025-09-06 20:44:49,692: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-09-06 20:44:49,725: INFO: common: created directory at: artifacts]


BoxKeyError: "'ConfigBox' object has no attribute 'elasticnet'"