In [1]:
import os

In [2]:
%pwd

'd:\\SAMITH\\Github\\Heart_Rate_Anomaly_Detector\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\SAMITH\\Github\\Heart_Rate_Anomaly_Detector'

In [5]:
from dataclasses import dataclass
from pathlib import Path
from typing import Dict

@dataclass
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Dict[str, Path]   
    model_path: Dict[str, Path]       
    report_path: Dict[str, Path]      
    target_columns: Dict[str, str]

In [6]:
from Heart_Rate_Anomaly_Detector.constants import *
from Heart_Rate_Anomaly_Detector.utils.common import read_yaml, create_directories
from pathlib import Path
from typing import Dict

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH
    ):
        # Load YAML files
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        
        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        
        create_directories([config.root_dir])

       
        model_evaluation_config = ModelEvaluationConfig(
            root_dir=Path(config.root_dir),
            test_data_path={k: Path(v) for k, v in config.test_data_path.items()},
            model_path={k: Path(v) for k, v in config.model_path.items()},
            report_path={k: Path(v) for k, v in config.report_path.items()},
            target_columns={k: v for k, v in config.target_columns.items()}
        )

        return model_evaluation_config


In [8]:
import os
import json
import joblib
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from pathlib import Path
from typing import Dict
from Heart_Rate_Anomaly_Detector import logger

In [None]:


class ModelEvaluation:
    def __init__(self, config):
        
        self.config = config
        self.predictions: Dict[str, np.ndarray] = {}
        self.actuals: Dict[str, np.ndarray] = {}

    def load_model_and_artifacts(self, model_key: str):
        model_path = self.config.model_path[model_key]
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Model file not found: {model_path}")
        model_artifacts = joblib.load(model_path)
        logger.info(f"Loaded model artifacts for {model_key}: {model_artifacts.get('model_type', 'Unknown')}")
        return model_artifacts

    def load_test_data(self, model_key: str):
        test_path = self.config.test_data_path[model_key]
        if not os.path.exists(test_path):
            raise FileNotFoundError(f"Test data not found: {test_path}")
        
        if str(test_path).endswith(".csv"):
            df = pd.read_csv(test_path)
        elif str(test_path).endswith(".joblib"):
            df = joblib.load(test_path)
            if isinstance(df, dict):
                X_test = df.get('X_test')
                y_test = df.get('y_test')
                if X_test is None or y_test is None:
                    raise ValueError(f"Joblib test data must contain 'X_test' and 'y_test'")
                return X_test, y_test
        else:
            raise ValueError(f"Unsupported test file format: {test_path}")
        
        target_col = self.config.target_columns[model_key]
        if target_col not in df.columns:
            raise ValueError(f"Target column '{target_col}' not found in test data")
        X_test = df.drop(columns=[target_col])
        y_test = df[target_col]
        return X_test, y_test

    def validate_feature_compatibility(self, X_test, expected_features):
        X_test = X_test.reindex(columns=expected_features, fill_value=0)
        logger.info(f"Feature validation completed. Final shape: {X_test.shape}")
        return X_test

    def calculate_metrics(self, y_true, y_pred):
        mae = mean_absolute_error(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)
        mape = np.mean(np.abs((y_true - y_pred) / np.where(y_true == 0, 1e-8, y_true))) * 100

        residuals = y_true - y_pred
        mean_residual = np.mean(residuals)
        std_residual = np.std(residuals)

        pred_range = y_pred.max() - y_pred.min()
        actual_range = y_true.max() - y_true.min()
        range_coverage = (pred_range / actual_range * 100) if actual_range > 0 else 0

        metrics = {
            "mae": float(mae),
            "rmse": float(rmse),
            "r2": float(r2),
            "mape": float(mape),
            "mean_residual": float(mean_residual),
            "std_residual": float(std_residual),
            "range_coverage": float(range_coverage),
            "n_samples": len(y_true),
            "prediction_stats": {
                "mean": float(np.mean(y_pred)),
                "std": float(np.std(y_pred)),
                "min": float(np.min(y_pred)),
                "max": float(np.max(y_pred)),
            },
            "actual_stats": {
                "mean": float(np.mean(y_true)),
                "std": float(np.std(y_true)),
                "min": float(np.min(y_true)),
                "max": float(np.max(y_true)),
            }
        }
        return metrics

    def save_results(self, model_key: str, metrics, model_artifacts):
        os.makedirs(self.config.root_dir, exist_ok=True)

        result_path = Path(self.config.report_path[model_key])
        os.makedirs(result_path.parent, exist_ok=True)

        full_results = {
            "model_key": model_key,
            "model_info": {
                "model_type": model_artifacts.get("model_type", "Unknown"),
                "target_column": model_artifacts.get("target_column", "Unknown"),
                "timestamp": model_artifacts.get("timestamp", "Unknown"),
                "feature_count": len(model_artifacts.get("feature_columns", []))
            },
            "metrics": metrics,
            "evaluation_timestamp": datetime.now().isoformat()
        }

        with open(result_path, "w") as f:
            json.dump(full_results, f, indent=4)

        logger.info(f"Saved evaluation results for {model_key} at {result_path}")

    def evaluate_model(self, model_key: str):
        logger.info(f"Evaluating model: {model_key}")
        model_artifacts = self.load_model_and_artifacts(model_key)
        model = model_artifacts["model"]

        X_test, y_test = self.load_test_data(model_key)
        expected_features = model_artifacts.get("feature_columns", [])
        if expected_features:
            X_test = self.validate_feature_compatibility(X_test, expected_features)

        logger.info("Generating predictions...")
        y_pred = model.predict(X_test)

        self.predictions[model_key] = y_pred
        self.actuals[model_key] = y_test

        metrics = self.calculate_metrics(y_test, y_pred)
        self.save_results(model_key, metrics, model_artifacts)
        logger.info(f"Evaluation completed for {model_key}")
        return metrics

    def evaluate_all(self):
        results = {}
        for model_key in self.config.model_path.keys():
            results[model_key] = self.evaluate_model(model_key)
        return results


In [10]:
try:
    
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluator = ModelEvaluation(config=model_evaluation_config)
    evaluation_results = model_evaluator.evaluate_all()

except Exception as e:
    raise e

[2025-09-07 23:37:01,495: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-09-07 23:37:01,503: INFO: common: yaml file: params.yaml loaded successfully]
[2025-09-07 23:37:01,512: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-09-07 23:37:01,513: INFO: common: created directory at: artifacts]
[2025-09-07 23:37:01,516: INFO: common: created directory at: artifacts/model_evaluation]
[2025-09-07 23:37:01,519: INFO: 2018659500: Evaluating model: heart_rate]
[2025-09-07 23:37:09,179: INFO: 2018659500: Loaded model artifacts for heart_rate: RandomForestRegressor]
[2025-09-07 23:37:09,314: INFO: 2018659500: Feature validation completed. Final shape: (75600, 30)]
[2025-09-07 23:37:09,316: INFO: 2018659500: Generating predictions...]
[2025-09-07 23:37:14,116: INFO: 2018659500: Saved evaluation results for heart_rate at artifacts\model_evaluation\heart_rate_report.json]
[2025-09-07 23:37:14,116: INFO: 2018659500: Evaluation completed for heart_rate]
[2025-09