In [1]:
import os

In [2]:
%pwd

'/home/suganya/Documents/Text-Summarization-Project/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/home/suganya/Documents/Text-Summarization-Project'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    STATUS_FILE: str
    ALL_REQUIRED_FILES: list
    DATA_DIR :str

In [6]:
from textSummarizer.constants import *
from textSummarizer.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        # This CALLS the create_directories function (defined elsewhere)
        create_directories([self.config.artifacts_root])

    def get_data_validation_config (self) -> DataValidationConfig:
        config = self.config.data_validation

        create_directories([config.root_dir])

        data_validation_config = DataValidationConfig(root_dir = config.root_dir,
                                                      STATUS_FILE = config. STATUS_FILE,
                                                      ALL_REQUIRED_FILES = config.ALL_REQUIRED_FILES,
                                                      DATA_DIR=config.DATA_DIR)
        
        return data_validation_config


In [9]:
import os
from textSummarizer.logging import logger

In [14]:
class DataValidation:
    def __init__(self, config: DataValidationConfig):
        self.config = config
    
    def validate_all_file_exist(self) -> bool:
        try:
            validation_status = True  # Start with True
            
            # Check if the directory exists
            if not os.path.exists(self.config.DATA_DIR):
                validation_status = False
                with open(self.config.STATUS_FILE, "w") as f:
                    f.write(f"validation_status: {validation_status}\n")
                    f.write(f"Error: Directory {self.config.DATA_DIR} does not exist")
                return validation_status
            
            all_files = os.listdir(self.config.DATA_DIR)
            
            # Check if all required files exist
            for required_file in self.config.ALL_REQUIRED_FILES:
                if required_file not in all_files:
                    validation_status = False
                    break
            
            with open(self.config.STATUS_FILE, "w") as f:
                f.write(f"validation_status: {validation_status}\n")
                if validation_status:
                    f.write(f"All required files are present: {self.config.ALL_REQUIRED_FILES}")
                else:
                    f.write(f"Missing files. Found: {all_files}\nRequired: {self.config.ALL_REQUIRED_FILES}")
            
            return validation_status
            
        except Exception as e:
            raise e


In [15]:
try:

    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_config()
    data_validation = DataValidation(config = data_validation_config) 
    data_validation.validate_all_file_exist()

except Exception as e:
    raise e

[2025-10-08 16:17:04,360: INFO: common: yaml file: /home/suganya/Documents/Text-Summarization-Project/config/config.yaml loaded successfully]
[2025-10-08 16:17:04,363: INFO: common: yaml file: /home/suganya/Documents/Text-Summarization-Project/params.yaml loaded successfully]
[2025-10-08 16:17:04,364: INFO: common: created directory: artifacts]
[2025-10-08 16:17:04,366: INFO: common: created directory: artifacts/data_validation]
