In [1]:
from dataclasses import dataclass
from pathlib import Path


@dataclass
class DataValidationConfig:
    root_dir: Path
    validation_input: Path
    num_of_columns: int
    validation_schema: dict
    validation_report_path: Path
    validation_output_path: Path

In [2]:
import os
os.chdir(r'D:\repositories\StockPredictorApp')  # Replace with your project root's absolute path
print(os.getcwd())  # Verify the working directory


D:\repositories\StockPredictorApp


In [3]:
from src.stockpredictor.utils.common import create_directories
from src.stockpredictor.constants.__init import *
from src.stockpredictor.utils.common import read_yaml, create_directories
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH) -> None:
        
        self.config = read_yaml(config_filepath)
        #self.params = read_yaml(params_filepath)

        #create_directories([self.config.artifacts_root])

    def get_data_validation_config(self):
        config = self.config.data_validation

        create_directories([config.root_dir])

        data_validation_config = DataValidationConfig(
            root_dir= config.root_dir,
            validation_input= config.validation_input,
            num_of_columns= config.num_of_columns,
            validation_schema = config.validation_schema,
            validation_report_path = config.validation_report_path,
            validation_output_path= config.validation_output_path
        )

        return data_validation_config
    
data_val_config = ConfigurationManager().get_data_validation_config()

[2024-11-23 11:27:51,890: INFO: common: yaml file: D:\repositories\StockPredictorApp\config\config.yaml loaded successfully]
[2024-11-23 11:27:51,891: INFO: common: Directory artifacts/data_validation created successfully]


In [7]:
import os
from pathlib import Path
import yfinance as yf
from src.stockpredictor.utils.common import save_as_csv
from src.stockpredictor.logging.coustom_log import logger
from src.stockpredictor.entity import DataValidationConfig
import pandas as pd

class DataValidation:
    def __init__(self, config: DataValidationConfig) -> None:
        self.config = config

    
    def validate_data(self):
        try:
            # read the data from csv
            raw_data = pd.read_csv(self.config.validation_input,  parse_dates=['Date'])
            validation_status = True

            # check if no.of colums are correct
            validation_status = {}

            for column, expected_dtype in self.config.validation_schema.items():
                if column not in raw_data.columns:
                    validation_status[column] = False
                    continue  # Skip further checks for missing columns

                actual_dtype = raw_data[column].dtype
                if expected_dtype == 'datetime':
                    validation_status[column] = pd.api.types.is_datetime64_any_dtype(actual_dtype)
                elif expected_dtype == 'float':
                    validation_status[column] = pd.api.types.is_float_dtype(actual_dtype)
                elif expected_dtype == 'int':
                    validation_status[column] = pd.api.types.is_integer_dtype(actual_dtype)
                else:
                    validation_status[column] = False  

            # Check the overall validation status
            overall_status = all(validation_status.values())

            # Prepare the content to write to the file
            output_lines = []
            output_lines.append(f"Overall Validation Status: {'Success' if overall_status else 'Failure'}\n")
            output_lines.append("Column-wise Validation Status:\n")
            for col, status in validation_status.items():
                output_lines.append(f" - {col}: {'Passed' if status else 'Failed'}\n")

            # Write to the file file
            output_file = self.config.validation_report_path
            with open(output_file, "w") as f:
                f.writelines(output_lines)

            if overall_status:
                logger.info(f"Data validation successful. Report saved to {output_file}")
                raw_data.to_csv(self.config.validation_output_path)
            print(f"Validation status written to {output_file}")

        except Exception as e:
            raise e
        
data_val = DataValidation(data_val_config)
data_val.validate_data()

[2024-11-23 11:43:56,946: INFO: 3381893042: Data validation successful. Report saved to artifacts/data_validation/validation_report.txt]


Validation status written to artifacts/data_validation/validation_report.txt
