In [1]:
import os

In [2]:
# check present working directory
%pwd

'd:\\ML_OPS_BABBY_FULL_STACK_NEW\\End-to-End-wine-quality-ML-Project\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\ML_OPS_BABBY_FULL_STACK_NEW\\End-to-End-wine-quality-ML-Project'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    """
    Configuration class for model training with immutable attributes.
    
    This dataclass defines the required parameters for the model training process
    in the wine quality prediction pipeline. The 'frozen=True' parameter ensures
    all attributes are read-only after initialization.
    
    Attributes:
        root_dir (Path): Directory where all model training outputs will be stored,
                        including the trained model file and performance metrics
        train_data_path (Path): Path to the CSV file containing training data,
                               output from the data transformation step
        test_data_path (Path): Path to the CSV file containing testing data,
                              output from the data transformation step
        model_name (str): Filename for saving the trained model (typically model.joblib)
        alpha (float): Regularization strength parameter for the ElasticNet model,
                      controls the overall penalty applied to coefficients
        l1_ratio (float): The mix of L1 and L2 regularization in the ElasticNet model,
                         where 0 is Ridge (L2 only) and 1 is Lasso (L1 only)
        target_column (str): Name of the column to predict (the target variable),
                            typically "quality" for the wine quality prediction
    
    Note:
        This configuration combines paths from the config.yaml file and 
        hyperparameters from the params.yaml file into a single object
        for the model training component.
    """
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    alpha: float
    l1_ratio: float
    target_column: str

In [6]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    """
    Manages configuration for the ML pipeline components.
    
    This class centralizes access to all configuration parameters by reading from
    YAML configuration files and providing component-specific configuration objects.
    
    Attributes:
        config: Main configuration parameters from config.yaml
        params: Model hyperparameters and training parameters from params.yaml
        schema: Data schema specifications from schema.yaml
    
    Methods:
        get_data_ingestion_config: Returns configuration for the data ingestion component
        get_data_validation_config: Returns configuration for the data validation component
        get_data_transformation_config: Returns configuration for the data transformation component
        get_model_trainer_config: Returns configuration for the model trainer component
    """
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):
        """
        Initialize the ConfigurationManager with paths to configuration files.
        
        Args:
            config_filepath: Path to the main configuration file (default: CONFIG_FILE_PATH)
            params_filepath: Path to the parameters file (default: PARAMS_FILE_PATH)
            schema_filepath: Path to the schema file (default: SCHEMA_FILE_PATH)
        
        Note:
            Creates the root artifacts directory specified in the main configuration.
        """
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        """
        Prepare and return the configuration for model training.
        
        This method combines information from three sources:
        1. config.yaml - For file paths related to training
        2. params.yaml - For model hyperparameters 
        3. schema.yaml - For the target column name
        
        Returns:
            ModelTrainerConfig: Configuration object with all parameters
                                required for the model training component.
                                
        Note:
            Creates the root directory for model training if it doesn't exist.
            This is the only configuration method that pulls from all three
            configuration sources.
        """
        config = self.config.model_trainer
        params = self.params.ElasticNet
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            alpha = params.alpha,
            l1_ratio = params.l1_ratio,
            target_column = schema.name
        )

        return model_trainer_config

In [8]:
import pandas as pd
import os
from mlProject import logger
from sklearn.linear_model import ElasticNet
import joblib

In [9]:
class ModelTrainer:
    """
    Handles the model training process for the ML pipeline.
    
    This class is responsible for training an ElasticNet regression model
    on the prepared data, evaluating its performance, and saving the
    trained model for later use.
    
    Attributes:
        config (ModelTrainerConfig): Configuration containing all parameters
                                    needed for the model training process.
    """
    def __init__(self, config: ModelTrainerConfig):
        """
        Initialize the ModelTrainer component with configuration.
        
        Args:
            config (ModelTrainerConfig): Configuration object with all required
                                       parameters for model training.
        """
        self.config = config

    
    def train(self):
        """
        Trains an ElasticNet regression model on the prepared data.
        
        This method:
        1. Loads training and testing data from CSV files
        2. Separates features (X) and target (y) for both datasets
        3. Initializes an ElasticNet model with configured hyperparameters
        4. Trains the model on the training data
        5. Saves the trained model to disk using joblib
        
        Process:
        - Features: All columns except the target column
        - Target: The specified target column from schema (typically "quality")
        - Model: ElasticNet with alpha and l1_ratio from params.yaml
        - Persistence: Model saved in joblib format for later use
        
        Returns:
            None: The trained model is saved to disk but not returned
            
        Note:
            Uses a fixed random_state=42 for reproducibility of training
            Could be extended to calculate and log performance metrics
        """
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)


        train_x = train_data.drop([self.config.target_column], axis=1)
        test_x = test_data.drop([self.config.target_column], axis=1)
        train_y = train_data[[self.config.target_column]]
        test_y = test_data[[self.config.target_column]]


        lr = ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))

In [10]:
"""
Model Trainer Pipeline

This script orchestrates the model training process by initializing the configuration,
creating the model trainer component, and executing the training process.

The pipeline follows these steps:
1. Initialize the ConfigurationManager to load all configuration parameters
2. Get the specific model trainer configuration, which combines:
   - Path information from config.yaml
   - Hyperparameters from params.yaml
   - Target column information from schema.yaml
3. Initialize the ModelTrainer component with the configuration
4. Train the ElasticNet model and save it to disk

The entire process is wrapped in a try-except block to catch and propagate
any exceptions that might occur during execution, ensuring proper error handling.

Note:
- This is the fourth stage in the ML pipeline, following data transformation
- It trains the prediction model using the prepared training data
- The trained model is saved as a joblib file for use in prediction/inference
- There's a variable naming issue in this code: model_trainer_config is used
  both for the configuration object and the ModelTrainer instance
"""

try:
    # Initialize configuration
    config = ConfigurationManager()
    
    # Get component-specific configuration
    model_trainer_config = config.get_model_trainer_config()
    
    # Initialize model trainer component
    # Note: Variable naming issue - reusing model_trainer_config for the trainer instance
    model_trainer = ModelTrainer(config=model_trainer_config)
    
    # Execute model training
    model_trainer.train()
    
except Exception as e:
    # Propagate any exceptions for handling at a higher level
    raise e

[2025-05-08 12:47:21,109: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-08 12:47:21,116: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-08 12:47:21,123: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-05-08 12:47:21,123: INFO: common: created directory at: artifacts]
[2025-05-08 12:47:21,130: INFO: common: created directory at: artifacts/model_trainer]
