In [1]:
%pwd

'f:\\GitHub\\NLP-Emotion-Classification-End-to-End-Project\\NLP-Emotion-Classification-End-to-End-Project\\research'

In [2]:
import os
os.chdir("../")
%pwd

'f:\\GitHub\\NLP-Emotion-Classification-End-to-End-Project\\NLP-Emotion-Classification-End-to-End-Project'

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    data_ingestion_dir: Path
    STATUS_FILE: str
    REQUIRED_FILES: list

In [5]:
from pathlib import Path

from src.emotionClassification.constants import *
from src.emotionClassification.utils.common import read_yaml_file, create_directories


class ConfigurationManager:
    """
    Class to manage the configuration parameters and initialize configurations.
    """

    def __init__(
        self,
        config_file_path: Path = CONFIG_FILE_PATH,
        params_file_path: Path = PARAMS_FILE_PATH,
    ) -> None:
        """
        Initialize the ConfigurationManager with the provided file paths.
        """

        self.config = read_yaml_file(config_file_path)
        self.params = read_yaml_file(params_file_path)

        create_directories(filepath_list=[self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        """
        Return the DataValidationConfig object initialized with the configuration parameters.
        """
        config = self.config.data_validation

        create_directories([config.root_dir])

        return DataValidationConfig(
            root_dir=config.root_dir,
            data_ingestion_dir=config.data_ingestion_dir,
            STATUS_FILE=config.STATUS_FILE,
            REQUIRED_FILES=config.REQUIRED_FILES
        )


In [16]:
import os
from pathlib import Path

from src.emotionClassification.logging import logger


class DataValidation:
    """
    Represents a data validation process.
    """

    def __init__(self, config: DataValidationConfig) -> None:
        """
        Initialize the DataValidation class with the given configuration.
        """
        self.config = config

    def validate_all_required_files_exist(self) -> bool:
        """
        Validate that all required files exist in the specified root directory.
        """
        try: 
            VAL_STATUS = True
            if os.path.exists(self.config.data_ingestion_dir):
                list_of_files = os.listdir(self.config.data_ingestion_dir)
            else:
                logger.error(f"{self.config.data_ingestion_dir} does not exist.")
                VAL_STATUS = False
                with open(self.config.STATUS_FILE, 'w') as status_file:
                    status_file.write(f"{VAL_STATUS}")
                return VAL_STATUS
            
            for file in self.config.REQUIRED_FILES:
                if file not in list_of_files:
                    logger.error(f"Required file {file} not found in {self.config.data_ingestion_dir}")
                    VAL_STATUS = False
                    with open(self.config.STATUS_FILE, 'w') as status_file:
                        status_file.write(f"{VAL_STATUS}")
                    return VAL_STATUS
            
            with open(self.config.STATUS_FILE, 'w') as status_file:
                status_file.write(f"{VAL_STATUS}")
                
            return VAL_STATUS
        except Exception as e:
            logger.exception(f"Error occurred during data validation: {e}")
            VAL_STATUS = False
            return VAL_STATUS

In [17]:
class DataValidationTrainingPipeline:

    def __init__(self) -> None:
        """Initialize the pipeline"""
        pass

    def main(self) -> bool:
        """Execute the pipeline"""

        config = ConfigurationManager()
        data_validation_config = config.get_data_validation_config()

        data_validation = DataValidation(config=data_validation_config)
        val_status = data_validation.validate_all_required_files_exist()
        print(f"Validation Status: {val_status}")

        return val_status


In [19]:
from src.emotionClassification.logging import logger

STAGE_NAME = "Data Validation"

try:
    logger.info(f">>>> Stage {STAGE_NAME} Started <<<<")
    data_validation = DataValidationTrainingPipeline()
    val_status = data_validation.main()
    if val_status:
        logger.info(f">>>> Stage {STAGE_NAME} Completed Successfully <<<<")
    else:
        logger.error(f">>>> Stage {STAGE_NAME} Failed <<<<")
except Exception as e:
    logger.exception(e)
    raise e


[2024-08-29 15:46:13,797: INFO: 3615634054: >>>> Stage Data Validation Started <<<<]
[2024-08-29 15:46:13,800: INFO: common: YAML file config\config.yaml loaded successfully!]
[2024-08-29 15:46:13,805: INFO: common: YAML file params.yaml loaded successfully!]
[2024-08-29 15:46:13,810: INFO: common: Directory artifacts already exists!]
[2024-08-29 15:46:13,812: INFO: common: Directory artifacts/data_validation already exists!]
Validation Status: True
[2024-08-29 15:46:13,815: INFO: 3615634054: >>>> Stage Data Validation Completed Successfully <<<<]
