In [1]:
import os

In [2]:
%pwd

'c:\\Users\\RICH-FILES\\Desktop\\ml\\AI-powered-Bank-Product-Recommender-Chatbot\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\RICH-FILES\\Desktop\\ml\\AI-powered-Bank-Product-Recommender-Chatbot'

In [5]:
project_dir = "C:/Users/RICH-FILES/Desktop/ml/AI-powered-Bank-Product-Recommender-Chatbot"
os.chdir(project_dir)

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TablesConfig:
    customers: str
    products: str

@dataclass(frozen=True)
class DataGenerationConfig:
    num_customers: int
    output_dir: Path
    customers_filename: str
    products_filename: str
    gen_root_dir: Path
    data_dir: Path
    db_file: Path
    table: TablesConfig
    
    
   
    

In [7]:
from BankProducts.constants import *
from BankProducts.utils.common import read_yaml, create_directories

In [8]:
# create configuration manager 
class ConfigurationManager: 
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH, 
        #schema_filepath = SCHEMA_FILE_PATH,
        ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        #self.schema = read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])
        
    def get_data_generation_config(self)-> DataGenerationConfig:
        """
         This method is responsible for creating the data generation configuration"""
    
        config = self.config.data_generation
        
        
        create_directories([self.config.artifacts_root])
        
        data_generation_config = DataGenerationConfig(
            num_customers = config.num_customers,
            output_dir = Path(config.output_dir),
            customers_filename = config.customers_filename,
            products_filename = config.products_filename,
            gen_root_dir = Path(config.gen_root_dir),
            data_dir  = Path(config.data_dir),
            db_file = Path(config.db_file),
            table = config.table        )
        
        return data_generation_config
        
        
        

In [9]:
import pandas as pd
import sqlite3
from pathlib import Path
from BankProducts import logger
from faker import Faker
import pandas as pd
import random
from sqlalchemy import create_engine
from pathlib import Path

In [10]:
fake =Faker()

# Define product catalog
PRODUCT_CATALOG = [
    {
        "product_name": "Savings Account",
        "description": "A basic savings account with competitive interest rates.",
        "eligibility": "All customers above 18 years old"
    },
    {
        "product_name": "Credit Card",
        "description": "A credit card with cashback and reward points.",
        "eligibility": "Credit score above 650 and income above $20,000"
    },
    {
        "product_name": "Home Loan",
        "description": "Flexible home loan with low interest rates.",
        "eligibility": "Credit score above 700 and income above $50,000"
    },
    {
        "product_name": "Education Loan",
        "description": "Loan for students pursuing higher education.",
        "eligibility": "Age below 35 and enrollment in a valid institution"
    },
    {
        "product_name": "Fixed Deposit",
        "description": "Investment with fixed returns over a chosen term.",
        "eligibility": "Minimum deposit of $1,000"
    }
    ]

In [11]:
class DataGeneration:
    def __init__(self, config: DataGenerationConfig):
        self.config = config
        

    def generate_customer_data(self, num_records=40000):
        data = []
        products = [p["product_name"] for p in PRODUCT_CATALOG]
        goals = ["Home Ownership", "Education", "Savings", "Travel", "Retirement"]
        
        logger.info(f"Generating {num_records} fake customer records...")
        for _ in range(num_records):
            data.append({
                "customer_id": fake.uuid4(),
                "name": fake.name(),
                "age": random.randint(18, 70),
                "gender": random.choice(["Male", "Female"]),
                "occupation": fake.job(),
                "annual_income": round(random.uniform(15000, 200000), 2),
                "marital_status": random.choice(["Single", "Married", "Divorced"]),
                "credit_score": random.randint(300, 850),
                "existing_products": ', '.join(random.sample(products, k=random.randint(0, 3))),
                "financial_goals": random.choice(goals)
            })
        
        customers_df = pd.DataFrame(data)
        products_df = pd.DataFrame(PRODUCT_CATALOG)
        
        logger.info("Customer and product data generated.")
        return customers_df, products_df

    def save_to_csv(self, customers_df, products_df, output_dir):
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
        
        customers_path = output_dir / "bank_customers.csv"
        products_path = output_dir / "product_catalog.csv"
        
        customers_df.to_csv(customers_path, index=False)
        products_df.to_csv(products_path, index=False)
        
        logger.info(f"Saved customers to {customers_path}")
        logger.info(f"Saved products to {products_path}")
        
        return customers_path, products_path

    def save_to_db(self, customers_path: str, products_path: str, db_file):
        try:
            # Check if files exist
            if not os.path.exists(customers_path):
                logger.error(f"Customer file not found: {customers_path}")
                raise FileNotFoundError(f"Customer file not found: {customers_path}")
            if not os.path.exists(products_path):
                logger.error(f"Product file not found: {products_path}")
                raise FileNotFoundError(f"Product file not found: {products_path}")
            
            # Load CSV files
            customers = pd.read_csv(customers_path)
            products = pd.read_csv(products_path)

            # Create SQLite engine
            engine = create_engine(f"sqlite:///{db_file}")

            # Write to database
            customers.to_sql("customers", con=engine, if_exists="replace", index=False)
            products.to_sql("products", con=engine, if_exists="replace", index=False)

            logger.info(f"Data saved to SQLite database at {db_file}")
            return db_file

        except Exception as e:
            logger.exception(f"Failed to save data to the database: {e}")
            raise







In [12]:

try:
    config = ConfigurationManager()
    data_generation_config = config.get_data_generation_config()
    data_gen = DataGeneration(config=data_generation_config)

    # Generate data
    customers_df, products_df = data_gen.generate_customer_data()

    # Save to CSV
    customers_path, products_path = data_gen.save_to_csv(customers_df, products_df, data_generation_config.output_dir)
    logger.info(f"Customers data saved to {customers_path}")

    # Save to DB
    data_gen.save_to_db(customers_path, products_path, data_generation_config.db_file)
    logger.info(f"Data saved to SQLite database at {data_generation_config.db_file}")

    print("Data Generation Complete")
except Exception as e:
    raise e
    
    
    
  

[2025-05-24 19:11:18,432: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-24 19:11:18,434: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-24 19:11:18,436: INFO: common: created directory at: artifacts]
[2025-05-24 19:11:18,438: INFO: common: created directory at: artifacts]
[2025-05-24 19:11:18,439: INFO: 2849690336: Generating 40000 fake customer records...]
[2025-05-24 19:11:26,386: INFO: 2849690336: Customer and product data generated.]
[2025-05-24 19:11:26,591: INFO: 2849690336: Saved customers to artifacts\data_generation\bank_customers.csv]
[2025-05-24 19:11:26,592: INFO: 2849690336: Saved products to artifacts\data_generation\product_catalog.csv]
[2025-05-24 19:11:26,593: INFO: 2744110690: Customers data saved to artifacts\data_generation\bank_customers.csv]
[2025-05-24 19:11:27,500: INFO: 2849690336: Data saved to SQLite database at artifacts\data_generation\bank_products_recommender.db]
[2025-05-24 19:11:27,504: INFO: 2744110690: D