In [1]:
import os

In [2]:
# check present working directory
%pwd

'd:\\ML_OPS_BABBY_FULL_STACK_NEW\\End-to-End-wine-quality-ML-Project\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\ML_OPS_BABBY_FULL_STACK_NEW\\End-to-End-wine-quality-ML-Project'

# creating entity

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelEvaluationConfig:
    """
    Configuration class for model evaluation with immutable attributes.
    
    This dataclass defines the required parameters for the model evaluation process
    in the wine quality prediction pipeline. The 'frozen=True' parameter ensures
    all attributes are read-only after initialization.
    
    Attributes:
        root_dir (Path): Directory where all model evaluation outputs will be stored,
                        including metrics and evaluation reports
        test_data_path (Path): Path to the CSV file containing testing data,
                              output from the data transformation step
        model_path (Path): Path to the trained model file (.joblib),
                          output from the model trainer step
        all_params (dict): Dictionary containing all hyperparameters used for training,
                          useful for documenting model configuration alongside metrics
        metric_file_name (Path): Path where evaluation metrics will be saved as JSON,
                                enables tracking performance across model iterations
        target_column (str): Name of the column being predicted (the target variable),
                            typically "quality" for the wine quality prediction
    
    Note:
        This configuration combines paths from the config.yaml file, 
        hyperparameters from the params.yaml file, and target information 
        from the schema.yaml file into a single object for the model 
        evaluation component.
    """
    root_dir: Path
    test_data_path: Path
    model_path: Path
    all_params: dict
    metric_file_name: Path
    target_column: str

# configuration_manager

In [7]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories, save_json

In [8]:
class ConfigurationManager:
    """
    Manages configuration for the ML pipeline components.
    
    This class centralizes access to all configuration parameters by reading from
    YAML configuration files and providing component-specific configuration objects.
    
    Attributes:
        config: Main configuration parameters from config.yaml
        params: Model hyperparameters and training parameters from params.yaml
        schema: Data schema specifications from schema.yaml
    
    Methods:
        get_data_ingestion_config: Returns configuration for the data ingestion component
        get_data_validation_config: Returns configuration for the data validation component
        get_data_transformation_config: Returns configuration for the data transformation component
        get_model_trainer_config: Returns configuration for the model trainer component
        get_model_evaluation_config: Returns configuration for the model evaluation component
    """
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):
        """
        Initialize the ConfigurationManager with paths to configuration files.
        
        Args:
            config_filepath: Path to the main configuration file (default: CONFIG_FILE_PATH)
            params_filepath: Path to the parameters file (default: PARAMS_FILE_PATH)
            schema_filepath: Path to the schema file (default: SCHEMA_FILE_PATH)
        
        Note:
            Creates the root artifacts directory specified in the main configuration.
        """
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        """
        Prepare and return the configuration for model evaluation.
        
        This method combines information from three sources:
        1. config.yaml - For file paths related to evaluation
        2. params.yaml - For model hyperparameters to document with metrics
        3. schema.yaml - For the target column name
        
        Returns:
            ModelEvaluationConfig: Configuration object with all parameters
                                  required for the model evaluation component.
                                  
        Note:
            Creates the root directory for model evaluation if it doesn't exist.
            Includes the complete hyperparameter dictionary in the configuration
            to enable tracking parameters alongside performance metrics.
        """
        config = self.config.model_evaluation
        params = self.params.ElasticNet
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path=config.model_path,
            all_params=params,
            metric_file_name=config.metric_file_name,
            target_column=schema.name
        )

        return model_evaluation_config

In [9]:
import os
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from urllib.parse import urlparse
import numpy as np
import joblib

In [10]:
class ModelEvaluation:
    """
    Handles the model evaluation process for the ML pipeline.
    
    This class is responsible for evaluating the trained model's performance
    on test data, calculating key performance metrics, and saving these
    metrics for future reference and model comparison.
    
    Attributes:
        config (ModelEvaluationConfig): Configuration containing all parameters
                                       needed for the model evaluation process.
    """
    def __init__(self, config: ModelEvaluationConfig):
        """
        Initialize the ModelEvaluation component with configuration.
        
        Args:
            config (ModelEvaluationConfig): Configuration object with all required
                                          parameters for model evaluation.
        """
        self.config = config

    
    def eval_metrics(self, actual, pred):
        """
        Calculate key regression evaluation metrics.
        
        Computes three standard metrics for regression model performance:
        - Root Mean Squared Error (RMSE): Measures the average magnitude of errors
        - Mean Absolute Error (MAE): Measures the average absolute difference 
        - R-squared (R²): Measures the proportion of variance explained by the model
        
        Args:
            actual: The true target values (ground truth)
            pred: The predicted values from the model
            
        Returns:
            tuple: (rmse, mae, r2) - A tuple containing the three computed metrics
            
        Note:
            Lower values are better for RMSE and MAE, while higher values (closer to 1)
            are better for R².
        """
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2
    


    def save_results(self):
        """
        Evaluates the model on test data and saves performance metrics.
        
        This method:
        1. Loads the test data and trained model
        2. Makes predictions on the test data
        3. Calculates performance metrics (RMSE, MAE, R²)
        4. Saves the metrics to a JSON file
        
        Process:
        - Features: All columns except the target column
        - Target: The specified target column from schema
        - Metrics: RMSE, MAE, and R² are calculated and saved
        - Storage: Metrics are saved in JSON format for later reference
        
        Returns:
            None: The metrics are saved to disk but not returned
            
        Note:
            This method could be extended to save visualizations of model
            performance or additional metrics depending on project needs.
        """
        test_data = pd.read_csv(self.config.test_data_path)
        model = joblib.load(self.config.model_path)

        test_x = test_data.drop([self.config.target_column], axis=1)
        test_y = test_data[[self.config.target_column]]
        
        predicted_qualities = model.predict(test_x)

        (rmse, mae, r2) = self.eval_metrics(test_y, predicted_qualities)
        
        # Saving metrics as local
        scores = {"rmse": rmse, "mae": mae, "r2": r2}
        save_json(path=Path(self.config.metric_file_name), data=scores)

In [11]:
"""
Model Evaluation Pipeline

This script orchestrates the model evaluation process by initializing the configuration,
creating the model evaluation component, and executing the evaluation process.

The pipeline follows these steps:
1. Initialize the ConfigurationManager to load all configuration parameters
2. Get the specific model evaluation configuration, which combines:
   - Path information from config.yaml
   - Hyperparameters from params.yaml
   - Target column information from schema.yaml
3. Initialize the ModelEvaluation component with the configuration
4. Evaluate the model on test data and save the resulting metrics to a JSON file

The entire process is wrapped in a try-except block to catch and propagate
any exceptions that might occur during execution, ensuring proper error handling.

Note:
- This is the final stage in the ML pipeline, following model training
- It calculates key performance metrics (RMSE, MAE, R²) to assess model quality
- The metrics are saved to a JSON file for future reference and model comparison
- There's a variable naming issue in this code: model_evaluation_config is used
  both for the configuration object and the ModelEvaluation instance
"""

try:
    # Initialize configuration
    config = ConfigurationManager()
    
    # Get component-specific configuration
    model_evaluation_config = config.get_model_evaluation_config()
    
    # Initialize model evaluation component
    # Note: Variable naming issue - reusing model_evaluation_config for the evaluator instance
    model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
    
    # Execute model evaluation and save metrics
    model_evaluation_config.save_results()
    
except Exception as e:
    # Propagate any exceptions for handling at a higher level
    raise e

[2025-05-08 13:06:14,659: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-08 13:06:14,664: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-08 13:06:14,670: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-05-08 13:06:14,672: INFO: common: created directory at: artifacts]
[2025-05-08 13:06:14,672: INFO: common: created directory at: artifacts/model_evaluation]
[2025-05-08 13:06:14,804: INFO: common: json file saved at: artifacts\model_evaluation\metrics.json]
