In [1]:
import os

In [2]:
%pwd


'c:\\Users\\RICH-FILES\\Desktop\\ml\\AI-powered-Bank-Product-Recommender-Chatbot\\research'

In [3]:
os.chdir('../.')

In [4]:
%pwd

'c:\\Users\\RICH-FILES\\Desktop\\ml\\AI-powered-Bank-Product-Recommender-Chatbot'

In [5]:
project_dir = "C:/Users/RICH-FILES/Desktop/ml/AI-powered-Bank-Product-Recommender-Chatbot"
os.chdir(project_dir)

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TablesConfig:
    customers: str
    products: str

@dataclass(frozen=True)
class DataGenerationConfig:
    num_customers: int
    gen_root_dir: Path
    data_dir: Path
    output_dir: Path
    customers_filename: Path
    products_filename: Path
    db_file: Path
    table: TablesConfig


In [7]:
from BankProducts.constants import *
from BankProducts.utils.common import read_yaml, create_directories

In [8]:
# create configuration manager 
class ConfigurationManager: 
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH, 
        #schema_filepath = SCHEMA_FILE_PATH,
        ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        #self.schema = read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])
        
    def get_data_generation_config(self)-> DataGenerationConfig:
        """
         This method is responsible for creating the data generation configuration"""
    
        config = self.config.data_generation
        
        
        create_directories([self.config.artifacts_root])
        
        data_generation_config = DataGenerationConfig(
            num_customers = config.num_customers,
            output_dir = Path(config.output_dir),
            customers_filename = config.customers_filename,
            products_filename = config.products_filename,
            gen_root_dir = Path(config.gen_root_dir),
            data_dir  = Path(config.data_dir),
            db_file = Path(config.db_file),
            table= config.table    
            
            )
        
        return data_generation_config
        
        
        

In [9]:
import pandas as pd
import sqlite3
from pathlib import Path
from BankProducts import  logger

In [10]:
class DataValidation:
    def __init__(self, config: DataGenerationConfig):
        self.config = config
        
    def validate_file_exists(self, path: Path, name: str):
        path = Path(path)
        if not path.is_absolute():
            path = self.config.gen_root_dir / path
        if not path.exists():
            raise FileNotFoundError(f"{name} not found at: {path}")
        print(f" {name} exists at {path}")

    def validate_csv_not_empty(self, path: Path, name: str):
        path= Path(path)
        if not path.is_absolute():
            path = self.config.gen_root_dir / path
        df = pd.read_csv(path)
        if df.empty:
            raise ValueError(f"{name} is empty.")
        print(f" {name} is not empty with {len(df)} rows")

    def validate_database_tables(self):
        expected = self.config.table
        with sqlite3.connect(self.config.db_file) as conn:
            result = conn.execute("SELECT name FROM sqlite_master WHERE type='table';")
            actual_tables = {row[0] for row in result.fetchall()}

        missing = []
        for table in [expected.customers, expected.products]:
            if table not in actual_tables:
                missing.append(table)

        if missing:
            raise ValueError(f"Missing tables: {missing}")
        print(f" All expected tables exist in the DB: {expected.customers}, {expected.products}")
        logger.info(f"Data validation completed successfully.")


In [11]:
try:
    config = ConfigurationManager()
    data_gen_config = config.get_data_generation_config()
    
    # Validate CSV files
    data_validation = DataValidation(config=data_gen_config)
    data_validation.validate_file_exists(data_gen_config.customers_filename, "Customers CSV")
    data_validation.validate_csv_not_empty(data_gen_config.customers_filename, "Customers CSV")
    
    data_validation.validate_file_exists(data_gen_config.products_filename, "Products CSV")
    data_validation.validate_csv_not_empty(data_gen_config.products_filename, "Products CSV")
    
    
    # Validate database tables
    data_validation.validate_database_tables()
except Exception as e:
    print(f"Validation failed: {e}")
# The code above is a complete example of how to validate the existence and content of CSV files and database tables.

[2025-05-25 19:06:03,992: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-25 19:06:03,997: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-25 19:06:03,999: INFO: common: created directory at: artifacts]
[2025-05-25 19:06:04,001: INFO: common: created directory at: artifacts]
 Customers CSV exists at artifacts\data_generation\bank_customers.csv
 Customers CSV is not empty with 15000 rows
 Products CSV exists at artifacts\data_generation\product_catalog.csv
 Products CSV is not empty with 5 rows
 All expected tables exist in the DB: customers, products
[2025-05-25 19:06:04,064: INFO: 3303433552: Data validation completed successfully.]
