# 📚 Accounting Synthetic Data Generator v1.0
## Leveraging Snowflake Native Synthetic Data Generation

### Overview
This notebook demonstrates how to generate synthetic accounting datasets using Snowflake's native `GENERATE_SYNTHETIC_DATA`. It includes:

- Legal entities and chart of accounts
- Customers, vendors, invoices, payments
- Journals and GL lines

### Data Architecture
```
📊 SEED DATA (Manual) → 🤖 SYNTHETIC DATA (Snowflake AI)
├── ENTITIES (5) → (5)
├── CHART_OF_ACCOUNTS (250) → (250)
├── CUSTOMERS (200) → (20,000+)
├── VENDORS (200) → (20,000+)
├── INVOICES (1,000) → (1,000,000+)
├── PAYMENTS (1,000) → (1,000,000+)
├── JOURNALS (2,000) → (2,000,000+)
└── GL_LINES (10,000) → (20,000,000+)
```

### Requirements
- Snowflake Enterprise Edition or higher
- Medium Snowpark-optimized warehouse
- Anaconda terms accepted

In [None]:
# 📦 SETUP AND CONFIGURATION (Accounting)

import pandas as pd
import numpy as np
import random
import string
import json
import datetime as dt
from datetime import timedelta
from typing import List, Dict, Any, Optional

from snowflake.snowpark import Session, functions as F
from snowflake.snowpark.types import *

session = get_active_session()

random.seed(42)
np.random.seed(42)

CONFIG = {
    'database': 'ACCT_SYNTH_DB',
    'schema': 'SEED_DATA',
    'synth_schema': 'SYNTHETIC_DATA',
    'warehouse': None,

    'seed_entities': 5,
    'seed_coa': 250,
    'seed_customers': 200,
    'seed_vendors': 200,
    'seed_invoices': 1000,
    'seed_payments': 1000,

    'target_customers': 20000,
    'target_invoices': 1000000,

    'enable_privacy_filter': True,
    'replace_output_tables': True
}

print("🚀 Accounting Synthetic Data Generator v1.0")
print(f"📊 Database: {CONFIG['database']}")
print(f"🌱 Seed: {CONFIG['schema']}  🤖 Synth: {CONFIG['synth_schema']}")
print("✅ Using Snowflake native GENERATE_SYNTHETIC_DATA")


In [None]:
# 🏗️ ENVIRONMENT SETUP (Accounting)

def setup_database_environment():
    print("🏗️ Setting up Snowflake environment...")
    try:
        current_wh = session.sql("SELECT CURRENT_WAREHOUSE()").collect()[0][0]
        if current_wh:
            print(f"   ✅ Using warehouse: {current_wh}")
            CONFIG['warehouse'] = current_wh
        else:
            warehouses = session.sql("SHOW WAREHOUSES").collect()
            if warehouses:
                wh_name = warehouses[0]['name']
                session.sql(f"USE WAREHOUSE {wh_name}").collect()
                CONFIG['warehouse'] = wh_name
                print(f"   🔄 Switched to warehouse: {wh_name}")
            else:
                raise Exception("No warehouses available")
        print(f"   🏗️ Creating database: {CONFIG['database']}")
        session.sql(f"CREATE DATABASE IF NOT EXISTS {CONFIG['database']}").collect()
        session.sql(f"USE DATABASE {CONFIG['database']}").collect()
        print(f"   📁 Creating schemas...")
        session.sql(f"CREATE SCHEMA IF NOT EXISTS {CONFIG['schema']}").collect()
        session.sql(f"CREATE SCHEMA IF NOT EXISTS {CONFIG['synth_schema']}").collect()
        session.sql(f"USE SCHEMA {CONFIG['schema']}").collect()
        current_db = session.sql("SELECT CURRENT_DATABASE()").collect()[0][0]
        current_schema = session.sql("SELECT CURRENT_SCHEMA()").collect()[0][0]
        current_wh = session.sql("SELECT CURRENT_WAREHOUSE()").collect()[0][0]
        print("✅ Environment ready:")
        print(f"   📋 Database: {current_db}")
        print(f"   📋 Active Schema: {current_schema}")
        print(f"   📋 Warehouse: {current_wh}")
        return True
    except Exception as e:
        print(f"❌ Environment setup failed: {e}")
        return False

if setup_database_environment():
    print("🎯 Ready to create seed data!")
else:
    print("💥 Cannot proceed without proper environment setup")


In [None]:
# 🗃️ CREATE SEED DATA TABLES (Accounting)

def create_seed_tables():
    print("🗃️ Creating accounting seed data table schemas...")
    tables = ['ENTITIES','CHART_OF_ACCOUNTS','CUSTOMERS','VENDORS','INVOICES','PAYMENTS','JOURNALS','GL_LINES']
    for t in tables:
        session.sql(f"DROP TABLE IF EXISTS {t}").collect()

    session.sql("""
        CREATE TABLE ENTITIES (
            ENTITY_ID STRING PRIMARY KEY,
            ENTITY_NAME STRING NOT NULL,
            COUNTRY STRING,
            INDUSTRY STRING,
            FISCAL_YEAR_END STRING,
            BASE_CURRENCY STRING,
            CREATED_DATE DATE DEFAULT CURRENT_DATE()
        )
    """).collect()

    session.sql("""
        CREATE TABLE CHART_OF_ACCOUNTS (
            ACCOUNT_ID STRING PRIMARY KEY,
            ENTITY_ID STRING NOT NULL,
            ACCOUNT_NUMBER STRING NOT NULL,
            ACCOUNT_NAME STRING NOT NULL,
            ACCOUNT_TYPE STRING, -- Asset, Liability, Equity, Revenue, Expense
            CREATED_DATE DATE DEFAULT CURRENT_DATE(),
            FOREIGN KEY (ENTITY_ID) REFERENCES ENTITIES(ENTITY_ID)
        )
    """).collect()

    session.sql("""
        CREATE TABLE CUSTOMERS (
            CUSTOMER_ID STRING PRIMARY KEY,
            ENTITY_ID STRING NOT NULL,
            CUSTOMER_NAME STRING NOT NULL,
            COUNTRY STRING,
            SEGMENT STRING,
            EMAIL STRING,
            PHONE STRING,
            CREATED_DATE DATE DEFAULT CURRENT_DATE(),
            FOREIGN KEY (ENTITY_ID) REFERENCES ENTITIES(ENTITY_ID)
        )
    """).collect()

    session.sql("""
        CREATE TABLE VENDORS (
            VENDOR_ID STRING PRIMARY KEY,
            ENTITY_ID STRING NOT NULL,
            VENDOR_NAME STRING NOT NULL,
            COUNTRY STRING,
            CATEGORY STRING,
            EMAIL STRING,
            PHONE STRING,
            CREATED_DATE DATE DEFAULT CURRENT_DATE(),
            FOREIGN KEY (ENTITY_ID) REFERENCES ENTITIES(ENTITY_ID)
        )
    """).collect()

    session.sql("""
        CREATE TABLE INVOICES (
            INVOICE_ID STRING PRIMARY KEY,
            ENTITY_ID STRING NOT NULL,
            CUSTOMER_ID STRING,
            VENDOR_ID STRING,
            INVOICE_DATE DATE NOT NULL,
            DUE_DATE DATE,
            AMOUNT DECIMAL(12,2) NOT NULL,
            CURRENCY STRING,
            STATUS STRING,
            CREATED_DATE DATE DEFAULT CURRENT_DATE(),
            FOREIGN KEY (ENTITY_ID) REFERENCES ENTITIES(ENTITY_ID),
            FOREIGN KEY (CUSTOMER_ID) REFERENCES CUSTOMERS(CUSTOMER_ID),
            FOREIGN KEY (VENDOR_ID) REFERENCES VENDORS(VENDOR_ID)
        )
    """).collect()

    session.sql("""
        CREATE TABLE PAYMENTS (
            PAYMENT_ID STRING PRIMARY KEY,
            INVOICE_ID STRING NOT NULL,
            PAYMENT_DATE DATE NOT NULL,
            AMOUNT DECIMAL(12,2) NOT NULL,
            METHOD STRING,
            CREATED_DATE DATE DEFAULT CURRENT_DATE(),
            FOREIGN KEY (INVOICE_ID) REFERENCES INVOICES(INVOICE_ID)
        )
    """).collect()

    session.sql("""
        CREATE TABLE JOURNALS (
            JOURNAL_ID STRING PRIMARY KEY,
            ENTITY_ID STRING NOT NULL,
            JOURNAL_DATE DATE NOT NULL,
            DESCRIPTION STRING,
            CREATED_DATE DATE DEFAULT CURRENT_DATE(),
            FOREIGN KEY (ENTITY_ID) REFERENCES ENTITIES(ENTITY_ID)
        )
    """).collect()

    session.sql("""
        CREATE TABLE GL_LINES (
            GL_LINE_ID STRING PRIMARY KEY,
            JOURNAL_ID STRING NOT NULL,
            ACCOUNT_ID STRING NOT NULL,
            DEBIT DECIMAL(12,2) DEFAULT 0.00,
            CREDIT DECIMAL(12,2) DEFAULT 0.00,
            CREATED_DATE DATE DEFAULT CURRENT_DATE(),
            FOREIGN KEY (JOURNAL_ID) REFERENCES JOURNALS(JOURNAL_ID),
            FOREIGN KEY (ACCOUNT_ID) REFERENCES CHART_OF_ACCOUNTS(ACCOUNT_ID)
        )
    """).collect()

    print("✅ Accounting seed table schemas created")
    print(f"   📊 Created {len(tables)} tables")

create_seed_tables()


In [None]:
# 🌱 POPULATE SEED DATA (Accounting)

class SeedDataGenerator:
    def __init__(self):
        self.industries = ['Software','Healthcare','Manufacturing','Retail','Financial Services','Energy']
        self.countries = ['USA','UK','Germany','France','Canada','Japan','India','Australia']
        self.entity_names = ['Acme Corp','Globex LLC','Initech Inc','Umbrella Co','Hooli LLC','Vandelay Industries']
        self.coa_templates = [
            ('1000','Cash','Asset'), ('1100','Accounts Receivable','Asset'), ('1200','Inventory','Asset'),
            ('2000','Accounts Payable','Liability'), ('2100','Accrued Expenses','Liability'),
            ('3000','Common Stock','Equity'), ('3100','Retained Earnings','Equity'),
            ('4000','Revenue','Revenue'), ('5000','COGS','Expense'), ('5100','Operating Expenses','Expense')
        ]
        self.customer_segments = ['SMB','Mid-Market','Enterprise','Consumer']
        self.vendor_categories = ['Raw Materials','Services','Logistics','IT','Facilities']
    
    def generate_id(self, prefix: str, counter: int) -> str:
        return f"{prefix}{counter:06d}"

seed_gen = SeedDataGenerator()
print("✅ Accounting seed data generator initialized")


def create_entities_seed():
    data = []
    for i in range(1, CONFIG['seed_entities'] + 1):
        data.append({
            'ENTITY_ID': seed_gen.generate_id('ENT', i),
            'ENTITY_NAME': random.choice(seed_gen.entity_names) + f" {i}",
            'COUNTRY': random.choice(seed_gen.countries),
            'INDUSTRY': random.choice(seed_gen.industries),
            'FISCAL_YEAR_END': random.choice(['12-31','03-31','06-30','09-30']),
            'BASE_CURRENCY': random.choice(['USD','EUR','GBP','JPY','CAD'])
        })
    df = pd.DataFrame(data)
    session.write_pandas(df, 'ENTITIES', auto_create_table=False, overwrite=True)
    print(f"   ✅ Created {len(df)} entities")
    return df


def create_coa_seed():
    entities_df = session.table('ENTITIES').to_pandas()
    data = []
    counter = 1
    for _, ent in entities_df.iterrows():
        for tpl in seed_gen.coa_templates:
            data.append({
                'ACCOUNT_ID': seed_gen.generate_id('ACC', counter),
                'ENTITY_ID': ent['ENTITY_ID'],
                'ACCOUNT_NUMBER': tpl[0],
                'ACCOUNT_NAME': tpl[1],
                'ACCOUNT_TYPE': tpl[2]
            })
            counter += 1
    df = pd.DataFrame(data)
    session.write_pandas(df, 'CHART_OF_ACCOUNTS', auto_create_table=False, overwrite=True)
    print(f"   ✅ Created {len(df)} chart of accounts rows")
    return df


def create_customers_seed():
    entities_df = session.table('ENTITIES').to_pandas()
    data = []
    counter = 1
    for _, ent in entities_df.iterrows():
        for _ in range(max(1, CONFIG['seed_customers'] // len(entities_df))):
            data.append({
                'CUSTOMER_ID': seed_gen.generate_id('CUST', counter),
                'ENTITY_ID': ent['ENTITY_ID'],
                'CUSTOMER_NAME': f"Customer {counter}",
                'COUNTRY': random.choice(seed_gen.countries),
                'SEGMENT': random.choice(seed_gen.customer_segments),
                'EMAIL': f"customer{counter}@example.com",
                'PHONE': f"({random.randint(200,999)}) {random.randint(200,999)}-{random.randint(1000,9999)}"
            })
            counter += 1
    df = pd.DataFrame(data)
    session.write_pandas(df, 'CUSTOMERS', auto_create_table=False, overwrite=True)
    print(f"   ✅ Created {len(df)} customers")
    return df


def create_vendors_seed():
    entities_df = session.table('ENTITIES').to_pandas()
    data = []
    counter = 1
    for _, ent in entities_df.iterrows():
        for _ in range(max(1, CONFIG['seed_vendors'] // len(entities_df))):
            data.append({
                'VENDOR_ID': seed_gen.generate_id('VEND', counter),
                'ENTITY_ID': ent['ENTITY_ID'],
                'VENDOR_NAME': f"Vendor {counter}",
                'COUNTRY': random.choice(seed_gen.countries),
                'CATEGORY': random.choice(seed_gen.vendor_categories),
                'EMAIL': f"vendor{counter}@example.com",
                'PHONE': f"({random.randint(200,999)}) {random.randint(200,999)}-{random.randint(1000,9999)}"
            })
            counter += 1
    df = pd.DataFrame(data)
    session.write_pandas(df, 'VENDORS', auto_create_table=False, overwrite=True)
    print(f"   ✅ Created {len(df)} vendors")
    return df


def create_invoices_seed():
    entities_df = session.table('ENTITIES').to_pandas()
    customers_df = session.table('CUSTOMERS').to_pandas()
    vendors_df = session.table('VENDORS').to_pandas()
    data = []
    for i in range(1, CONFIG['seed_invoices'] + 1):
        ent = entities_df.sample(1).iloc[0]
        is_ar = random.random() > 0.5
        cust_id = customers_df.sample(1).iloc[0]['CUSTOMER_ID'] if is_ar else None
        vend_id = vendors_df.sample(1).iloc[0]['VENDOR_ID'] if not is_ar else None
        amount = round(random.uniform(100.0, 100000.0), 2)
        inv_date = dt.date(random.randint(2018, 2024), random.randint(1, 12), random.randint(1, 28))
        due_date = inv_date + dt.timedelta(days=random.choice([15, 30, 45, 60]))
        data.append({
            'INVOICE_ID': seed_gen.generate_id('INV', i),
            'ENTITY_ID': ent['ENTITY_ID'],
            'CUSTOMER_ID': cust_id,
            'VENDOR_ID': vend_id,
            'INVOICE_DATE': inv_date,
            'DUE_DATE': due_date,
            'AMOUNT': amount,
            'CURRENCY': ent['BASE_CURRENCY'],
            'STATUS': random.choice(['Open','Paid','Partially Paid','Overdue'])
        })
    df = pd.DataFrame(data)
    session.write_pandas(df, 'INVOICES', auto_create_table=False, overwrite=True)
    print(f"   ✅ Created {len(df)} invoices")
    return df


def create_payments_seed():
    invoices_df = session.table('INVOICES').to_pandas()
    data = []
    for i in range(1, CONFIG['seed_payments'] + 1):
        inv = invoices_df.sample(1).iloc[0]
        pay_amount = round(float(inv['AMOUNT']) * random.uniform(0.2, 1.0), 2)
        pay_date = inv['INVOICE_DATE'] + dt.timedelta(days=random.choice([10, 20, 30, 45, 60]))
        data.append({
            'PAYMENT_ID': seed_gen.generate_id('PAY', i),
            'INVOICE_ID': inv['INVOICE_ID'],
            'PAYMENT_DATE': pay_date,
            'AMOUNT': pay_amount,
            'METHOD': random.choice(['ACH','Wire','Check','Card'])
        })
    df = pd.DataFrame(data)
    session.write_pandas(df, 'PAYMENTS', auto_create_table=False, overwrite=True)
    print(f"   ✅ Created {len(df)} payments")
    return df


# Create Accounting seeds
entities_df = create_entities_seed()
coa_df = create_coa_seed()
customers_df = create_customers_seed()
vendors_df = create_vendors_seed()
invoices_df = create_invoices_seed()
payments_df = create_payments_seed()

print(f"🎯 Accounting seed complete: {len(entities_df)} entities, {len(coa_df)} COA, {len(customers_df)} customers, {len(vendors_df)} vendors, {len(invoices_df)} invoices, {len(payments_df)} payments")


In [None]:
# 🤖 SNOWFLAKE SYNTHETIC DATA GENERATION (Accounting)

def generate_synthetic_data():
    print("🤖 Generating synthetic data for accounting core entities...")
    try:
        session.sql("""
            CREATE OR REPLACE SECRET ACCT_CONSISTENCY_SECRET
            TYPE = SYMMETRIC_KEY
            ALGORITHM = GENERIC
        """).collect()

        # Customers
        session.sql(f"""
            CALL SNOWFLAKE.DATA_PRIVACY.GENERATE_SYNTHETIC_DATA({{
                'datasets': [{{
                    'input_table': '{CONFIG['database']}.{CONFIG['schema']}.CUSTOMERS',
                    'output_table': '{CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC',
                    'columns': {{ 'CUSTOMER_ID': {{'join_key': true}}, 'ENTITY_ID': {{'join_key': true}} }}
                }}],
                'consistency_secret': SYSTEM$REFERENCE('SECRET', 'ACCT_CONSISTENCY_SECRET', 'SESSION', 'READ')::STRING,
                'replace_output_tables': {str(CONFIG['replace_output_tables']).lower()},
                'similarity_filter': {str(CONFIG['enable_privacy_filter']).lower()}
            }});
        """).collect()

        # Vendors
        session.sql(f"""
            CALL SNOWFLAKE.DATA_PRIVACY.GENERATE_SYNTHETIC_DATA({{
                'datasets': [{{
                    'input_table': '{CONFIG['database']}.{CONFIG['schema']}.VENDORS',
                    'output_table': '{CONFIG['database']}.{CONFIG['synth_schema']}.VENDORS_SYNTHETIC',
                    'columns': {{ 'VENDOR_ID': {{'join_key': true}}, 'ENTITY_ID': {{'join_key': true}} }}
                }}],
                'consistency_secret': SYSTEM$REFERENCE('SECRET', 'ACCT_CONSISTENCY_SECRET', 'SESSION', 'READ')::STRING,
                'replace_output_tables': {str(CONFIG['replace_output_tables']).lower()},
                'similarity_filter': {str(CONFIG['enable_privacy_filter']).lower()}
            }});
        """).collect()

        # Invoices (structure replicated synthetically; joins later for lines/payments)
        session.sql(f"""
            CALL SNOWFLAKE.DATA_PRIVACY.GENERATE_SYNTHETIC_DATA({{
                'datasets': [{{
                    'input_table': '{CONFIG['database']}.{CONFIG['schema']}.INVOICES',
                    'output_table': '{CONFIG['database']}.{CONFIG['synth_schema']}.INVOICES_SYNTHETIC',
                    'columns': {{ 'INVOICE_ID': {{'join_key': true}}, 'ENTITY_ID': {{'join_key': true}} }}
                }}],
                'consistency_secret': SYSTEM$REFERENCE('SECRET', 'ACCT_CONSISTENCY_SECRET', 'SESSION', 'READ')::STRING,
                'replace_output_tables': {str(CONFIG['replace_output_tables']).lower()},
                'similarity_filter': {str(CONFIG['enable_privacy_filter']).lower()}
            }});
        """).collect()

        cust = session.sql(f"SELECT COUNT(*) FROM {CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC").collect()[0][0]
        vend = session.sql(f"SELECT COUNT(*) FROM {CONFIG['database']}.{CONFIG['synth_schema']}.VENDORS_SYNTHETIC").collect()[0][0]
        inv = session.sql(f"SELECT COUNT(*) FROM {CONFIG['database']}.{CONFIG['synth_schema']}.INVOICES_SYNTHETIC").collect()[0][0]
        print(f"✅ Synthetic Customers: {cust:,}  Vendors: {vend:,}  Invoices: {inv:,}")
        return True
    except Exception as e:
        print(f"❌ Synthetic data generation failed: {e}")
        return False

print("🌱 Ready to generate synthetic accounting data...")
generate_synthetic_data()


In [None]:
# 🚀 MULTI-RUN SCALING (Accounting)

def scale_customers_invoices():
    print("🚀 Scaling customers and invoices via batch runs...")
    try:
        # Customers scaling
        seed_size = session.sql(f"SELECT COUNT(*) FROM {CONFIG['database']}.{CONFIG['schema']}.CUSTOMERS").collect()[0][0]
        try:
            current = session.sql(f"SELECT COUNT(*) FROM {CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC").collect()[0][0]
        except:
            current = 0
        needed = max(0, CONFIG['target_customers'] - current)
        iterations = 0 if seed_size == 0 else min(10, (needed // seed_size) + 1)
        for i in range(1, iterations + 1):
            session.sql(f"""
                CALL SNOWFLAKE.DATA_PRIVACY.GENERATE_SYNTHETIC_DATA({{
                    'datasets': [{{
                        'input_table': '{CONFIG['database']}.{CONFIG['schema']}.CUSTOMERS',
                        'output_table': '{CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC_BATCH_{i}',
                        'columns': {{ 'CUSTOMER_ID': {{'join_key': true}}, 'ENTITY_ID': {{'join_key': true}} }}
                    }}],
                    'consistency_secret': SYSTEM$REFERENCE('SECRET', 'ACCT_CONSISTENCY_SECRET', 'SESSION', 'READ')::STRING,
                    'replace_output_tables': true,
                    'similarity_filter': false
                }});
            """).collect()
            session.sql(f"""
                UPDATE {CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC_BATCH_{i}
                SET CUSTOMER_ID = CONCAT('CUST', LPAD((ROW_NUMBER() OVER (ORDER BY CUSTOMER_ID) + {(i-1) * seed_size})::STRING, 6, '0'))
            """).collect()
        if iterations > 0:
            session.sql(f"DROP TABLE IF EXISTS {CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC").collect()
            session.sql(f"""
                CREATE TABLE {CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC AS
                SELECT * FROM {CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC_BATCH_1
            """
            ).collect()
            batches = session.sql(f"SHOW TABLES LIKE 'CUSTOMERS_SYNTHETIC_BATCH_%' IN SCHEMA {CONFIG['database']}.{CONFIG['synth_schema']}").collect()
            for b in batches[1:]:
                name = b['name']
                session.sql(f"INSERT INTO {CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC SELECT * FROM {CONFIG['database']}.{CONFIG['synth_schema']}.{name}").collect()
            for b in batches:
                name = b['name']
                session.sql(f"DROP TABLE {CONFIG['database']}.{CONFIG['synth_schema']}.{name}").collect()
        final_customers = session.sql(f"SELECT COUNT(*) FROM {CONFIG['database']}.{CONFIG['synth_schema']}.CUSTOMERS_SYNTHETIC").collect()[0][0]
        print(f"📈 Final customers: {final_customers:,}")

        # Invoices scaling note: Typically scale via multiple synthetic runs or programmatic generation of invoice header rows
        return True
    except Exception as e:
        print(f"❌ Scaling failed: {e}")
        return False

scale_customers_invoices()


## 🎯 Accounting Synthetic Data Generator v1.0 - Complete!

### 🏆 What We've Built

Synthetic accounting data using Snowflake `GENERATE_SYNTHETIC_DATA` for:
- Entities, COA, customers, vendors, invoices
- Join-key consistency and privacy filters
- Optional scaling for customers

### 📊 Check Counts Printed During Generation
- Customers
- Vendors
- Invoices

### 🔮 Next Steps
1. Add synthetic generation for `JOURNALS` and `GL_LINES`
2. Create invoice line items and payments application logic
3. Add revenue recognition and AP aging examples

### 🚀 Usage
1. Run setup/environment
2. Run seed creation cells
3. Run synthetic generation
4. Optionally run scaling
