In [1]:
import os
%pwd


'e:\\TextSummarization\\research'

In [2]:
os.chdir('../')
%pwd

'e:\\TextSummarization'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    STATUS_FILE:str
    ALL_REQUIRED_FILES: list

In [4]:
from src.TextSummarizer.constants import *
from src.TextSummarizer.utils.common import read_yaml, create_directories

In [5]:
class ConfigManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH,params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(Path(config_filepath))
        self.params = read_yaml(Path(params_filepath))
        create_directories([self.config.artifacts_root])

    def get_data_validation(self)->DataValidationConfig:
        config=self.config.data_validation
        create_directories([config.root_dir])


        return DataValidationConfig(
            root_dir=Path(config.root_dir),
            STATUS_FILE=config.STATUS_FILE,
            ALL_REQUIRED_FILES=config.ALL_REQUIRED_FILES
        )

In [6]:
import os 
import urllib.request as request
from zipfile import ZipFile
from src.TextSummarizer.utils.common import get_size
from src.TextSummarizer.logging import logger

In [7]:
class DataValidation:
    def __init__(self, config: DataValidationConfig):
        self.config = config

    def validate_all_files(self):
        try:
            logger.info("Validating all files")
        
            validation_status=None
            all_files=os.listdir(os.path.join('artifacts','data_ingestion','samsum_dataset'))

            for file in all_files:
                if file not in self.config.ALL_REQUIRED_FILES:
                    validation_status=False
                    logger.info(f"File {file} is not in the required files list")
                    with open(self.config.STATUS_FILE,'w') as f:
                        f.write(f"File {file} is not in the required files list")
                else:
                    validation_status=True
                    with open(self.config.STATUS_FILE,'w') as f:
                        f.write(f"File {file} is in the required files list")

            return validation_status
        except Exception as e:
            logger.exception(e)
            raise e

    

In [8]:
try:
    config=ConfigManager()
    data_validation_config=config.get_data_validation()
    data_validation=DataValidation(config=data_validation_config)
    data_validation.validate_all_files()
except Exception as e:
    print(f"An error occurred: {e}")
    raise e



2025-07-08 02:13:49,527 - TextSummarizerLogger - INFO - YAML file config\config.yaml loaded successfully.
2025-07-08 02:13:49,530 - TextSummarizerLogger - INFO - YAML file params\params.yaml loaded successfully.
2025-07-08 02:13:49,533 - TextSummarizerLogger - INFO - Created Directory at : artifacts
2025-07-08 02:13:49,536 - TextSummarizerLogger - INFO - Created Directory at : artifacts/data_validation
2025-07-08 02:13:49,537 - TextSummarizerLogger - INFO - Validating all files
2025-07-08 02:13:49,538 - TextSummarizerLogger - INFO - File dataset_dict.json is not in the required files list
