In [1]:
from faker import Faker
import pandas as pd
import random
from sqlalchemy import create_engine
from pathlib import Path

# Initialize Faker
fake = Faker()

# Define banking products
product_catalog = [
    {
        "product_name": "Savings Account",
        "description": "A basic savings account with competitive interest rates.",
        "eligibility": "All customers above 18 years old"
    },
    {
        "product_name": "Credit Card",
        "description": "A credit card with cashback and reward points.",
        "eligibility": "Credit score above 650 and income above $20,000"
    },
    {
        "product_name": "Home Loan",
        "description": "Flexible home loan with low interest rates.",
        "eligibility": "Credit score above 700 and income above $50,000"
    },
    {
        "product_name": "Education Loan",
        "description": "Loan for students pursuing higher education.",
        "eligibility": "Age below 35 and enrollment in a valid institution"
    },
    {
        "product_name": "Fixed Deposit",
        "description": "Investment with fixed returns over a chosen term.",
        "eligibility": "Minimum deposit of $1,000"
    }
]

# Function to generate fake customer data
def generate_customer_data(num_records=7000):
    data = []
    products = [p["product_name"] for p in product_catalog]
    goals = ["Home Ownership", "Education", "Savings", "Travel", "Retirement"]
    
    for _ in range(num_records):
        data.append({
            "customer_id": fake.uuid4(),
            "name": fake.name(),
            "age": random.randint(18, 70),
            "gender": random.choice(["Male", "Female"]),
            "occupation": fake.job(),
            "annual_income": round(random.uniform(15000, 200000), 2),
            "marital_status": random.choice(["Single", "Married", "Divorced"]),
            "credit_score": random.randint(300, 850),
            "existing_products": ', '.join(random.sample(products, k=random.randint(0, 3))),
            "financial_goals": random.choice(goals)
        })
    
    customers_df = pd.DataFrame(data)
    products_df = pd.DataFrame(product_catalog)
    return customers_df, products_df

# Function to save to CSV
def save_to_csv(customers_df, products_df, output_dir):
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    customers_path = output_dir / "bank_customers.csv"
    products_path = output_dir / "product_catalog.csv"
    
    customers_df.to_csv(customers_path, index=False)
    products_df.to_csv(products_path, index=False)

    return customers_path, products_path

# Function to save to SQLite DB
def save_to_db(customers_path, products_path, db_path="bank_recommender.db"):
    engine = create_engine(f"sqlite:///{db_path}")
    
    customers = pd.read_csv(customers_path)
    products = pd.read_csv(products_path)
    
    customers.to_sql("customers", con=engine, if_exists="replace", index=False)
    products.to_sql("products", con=engine, if_exists="replace", index=False)

    return db_path

# Main script runner
if __name__ == "__main__":
    customers_df, products_df = generate_customer_data()
    
    # Adjust this path as needed
    csv_output_dir = "C:/Users/RICH-FILES/Desktop/Datasets"
    customers_path, products_path = save_to_csv(customers_df, products_df, csv_output_dir)
    
    db_file = save_to_db(customers_path, products_path)
    
    print(f"Data saved to:\nCSV Folder: {csv_output_dir}\nSQLite DB: {db_file}")


Data saved to:
CSV Folder: C:/Users/RICH-FILES/Desktop/Datasets
SQLite DB: bank_recommender.db
