# CSV Field Analysis and Mappings Update
## Date: 2025-07-05

This notebook analyzes CSV files to identify additional fields that are not currently mapped in mappings.py.
These are likely custom fields not documented in the Zoho API.

### Objectives:
1. Load all CSV files and examine their column structures
2. Compare CSV columns with existing mappings
3. Identify missing/unmapped fields
4. Update mappings.py to include all CSV fields
5. Maintain field names as they appear in CSV (custom fields)

### Process:
- ✅ Backup created: `mappings_backup_2025-07-05_16-37-59.py`
- 🔄 Analyzing CSV field structures
- 🔄 Identifying unmapped fields  
- 🔄 Updating mappings.py with new fields

In [1]:
# Section 1: Setup and Imports
import pandas as pd
import json
import os
import sys
from pathlib import Path
from typing import Dict, List, Set, Any
import re

# Add src to path for imports
sys.path.insert(0, str(Path.cwd().parent / 'src'))

# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 40)

print("Libraries imported successfully!")
print(f"Working directory: {Path.cwd()}")

# Define paths
DATA_DIR = Path.cwd().parent / 'data' / 'csv' / 'Nangsel Pioneers_2025-06-22'
SRC_DIR = Path.cwd().parent / 'src'
MAPPINGS_FILE = SRC_DIR / 'data_pipeline' / 'mappings.py'

print(f"Data directory: {DATA_DIR}")
print(f"Data directory exists: {DATA_DIR.exists()}")
print(f"Mappings file: {MAPPINGS_FILE}")

Libraries imported successfully!
Working directory: c:\Users\User\Documents\Projects\Automated_Operations\Zoho_Data_Sync\notebooks
Data directory: c:\Users\User\Documents\Projects\Automated_Operations\Zoho_Data_Sync\data\csv\Nangsel Pioneers_2025-06-22
Data directory exists: True
Mappings file: c:\Users\User\Documents\Projects\Automated_Operations\Zoho_Data_Sync\src\data_pipeline\mappings.py


In [2]:
# Section 2: Load Current Mappings
from data_pipeline.mappings import (
    CANONICAL_SCHEMA,
    INVOICE_CSV_MAP,
    ITEMS_CSV_MAP,
    CONTACTS_CSV_MAP,
    BILLS_CSV_MAP,
    CUSTOMER_PAYMENTS_CSV_MAP,
    VENDOR_PAYMENTS_CSV_MAP,
    SALES_ORDERS_CSV_MAP,
    PURCHASE_ORDERS_CSV_MAP,
    CREDIT_NOTES_CSV_MAP,
    get_entity_csv_mapping
)

print("Current mappings loaded successfully!")
print(f"Total entities in schema: {len(CANONICAL_SCHEMA)}")

# Map entity names to their CSV mappings
ENTITY_MAPPINGS = {
    'Invoices': INVOICE_CSV_MAP,
    'Items': ITEMS_CSV_MAP,
    'Contacts': CONTACTS_CSV_MAP,
    'Bills': BILLS_CSV_MAP,
    'CustomerPayments': CUSTOMER_PAYMENTS_CSV_MAP,
    'VendorPayments': VENDOR_PAYMENTS_CSV_MAP,
    'SalesOrders': SALES_ORDERS_CSV_MAP,
    'PurchaseOrders': PURCHASE_ORDERS_CSV_MAP,
    'CreditNotes': CREDIT_NOTES_CSV_MAP
}

print("Entity mappings configured:")
for entity, mapping in ENTITY_MAPPINGS.items():
    print(f"  {entity}: {len(mapping)} mapped fields")

Current mappings loaded successfully!
Total entities in schema: 10
Entity mappings configured:
  Invoices: 37 mapped fields
  Items: 24 mapped fields
  Contacts: 23 mapped fields
  Bills: 35 mapped fields
  CustomerPayments: 20 mapped fields
  VendorPayments: 20 mapped fields
  SalesOrders: 32 mapped fields
  PurchaseOrders: 32 mapped fields
  CreditNotes: 31 mapped fields


In [3]:
# Section 3: Discover CSV Files and Map to Entities
if DATA_DIR.exists():
    csv_files = list(DATA_DIR.glob('*.csv'))
    print(f"Found {len(csv_files)} CSV files:")
    
    # Map CSV files to entity names
    csv_to_entity_map = {
        'Invoice.csv': 'Invoices',
        'Item.csv': 'Items', 
        'Contacts.csv': 'Contacts',
        'Bill.csv': 'Bills',
        'Customer_Payment.csv': 'CustomerPayments',
        'Vendor_Payment.csv': 'VendorPayments',
        'Sales_Order.csv': 'SalesOrders',
        'Purchase_Order.csv': 'PurchaseOrders',
        'Credit_Note.csv': 'CreditNotes'
    }
    
    available_files = {}
    for csv_file in csv_files:
        file_name = csv_file.name
        entity = csv_to_entity_map.get(file_name)
        if entity:
            available_files[entity] = csv_file
            print(f"  ✅ {file_name} -> {entity}")
        else:
            print(f"  ⚠️  {file_name} -> No mapping found")
    
    print(f"\nMapped {len(available_files)} CSV files to entities")
else:
    print(f"❌ Data directory not found: {DATA_DIR}")
    available_files = {}

Found 46 CSV files:
  ⚠️  Activity Logs.csv -> No mapping found
  ✅ Bill.csv -> Bills
  ⚠️  Bill_Of_Entry.csv -> No mapping found
  ⚠️  Budget.csv -> No mapping found
  ⚠️  Chart_of_Accounts.csv -> No mapping found
  ⚠️  CN_Verification.csv -> No mapping found
  ✅ Contacts.csv -> Contacts
  ⚠️  Contact_Persons.csv -> No mapping found
  ⚠️  Cost_Tracking.csv -> No mapping found
  ⚠️  Creditnotes_Invoice.csv -> No mapping found
  ✅ Credit_Note.csv -> CreditNotes
  ✅ Customer_Payment.csv -> CustomerPayments
  ⚠️  Deposit.csv -> No mapping found
  ⚠️  Direct_Dealer_Supply_Exp.csv -> No mapping found
  ⚠️  Exchange_Rate.csv -> No mapping found
  ⚠️  Expense.csv -> No mapping found
  ⚠️  Fixed_Asset.csv -> No mapping found
  ⚠️  Important_Update_Records.csv -> No mapping found
  ⚠️  Inventory_Adjustment.csv -> No mapping found
  ✅ Invoice.csv -> Invoices
  ✅ Item.csv -> Items
  ⚠️  Journal.csv -> No mapping found
  ⚠️  Plumber_.csv -> No mapping found
  ⚠️  Plumber_Transaction.csv -> No mapp

In [4]:
# Section 4: Analyze CSV Column Structures
csv_analysis = {}

print("=== ANALYZING CSV COLUMN STRUCTURES ===")
print()

for entity, csv_file in available_files.items():
    print(f"--- Analyzing {entity} ({csv_file.name}) ---")
    
    try:
        # Load CSV to get column names (just first few rows for efficiency)
        df = pd.read_csv(csv_file, nrows=5)
        csv_columns = list(df.columns)
        
        # Get current mapping for this entity
        current_mapping = ENTITY_MAPPINGS.get(entity, {})
        mapped_columns = set(current_mapping.keys())
        csv_columns_set = set(csv_columns)
        
        # Find unmapped columns
        unmapped_columns = csv_columns_set - mapped_columns
        
        # Find mapped columns that don't exist in CSV
        missing_in_csv = mapped_columns - csv_columns_set
        
        csv_analysis[entity] = {
            'csv_file': csv_file.name,
            'total_csv_columns': len(csv_columns),
            'csv_columns': csv_columns,
            'currently_mapped': len(mapped_columns),
            'mapped_columns': sorted(mapped_columns),
            'unmapped_count': len(unmapped_columns),
            'unmapped_columns': sorted(unmapped_columns),
            'missing_in_csv_count': len(missing_in_csv),
            'missing_in_csv': sorted(missing_in_csv)
        }
        
        print(f"  Total CSV columns: {len(csv_columns)}")
        print(f"  Currently mapped: {len(mapped_columns)}")
        print(f"  Unmapped columns: {len(unmapped_columns)}")
        
        if unmapped_columns:
            print(f"  🔍 Unmapped fields found:")
            for col in sorted(unmapped_columns)[:10]:  # Show first 10
                print(f"    - {col}")
            if len(unmapped_columns) > 10:
                print(f"    ... and {len(unmapped_columns) - 10} more")
        
        if missing_in_csv:
            print(f"  ⚠️  Mapped but not in CSV: {len(missing_in_csv)}")
            for col in sorted(missing_in_csv)[:5]:
                print(f"    - {col}")
        
        print()
        
    except Exception as e:
        print(f"  ❌ Error analyzing {csv_file.name}: {e}")
        csv_analysis[entity] = {'error': str(e)}
        print()

print(f"Analysis completed for {len(csv_analysis)} entities")

=== ANALYZING CSV COLUMN STRUCTURES ===

--- Analyzing Bills (Bill.csv) ---
  Total CSV columns: 64
  Currently mapped: 35
  Unmapped columns: 44
  🔍 Unmapped fields found:
    - Account
    - Account Code
    - Accounts Payable
    - Adjustment
    - Adjustment Account
    - Adjustment Description
    - Approved By
    - Approved Date
    - Bill Status
    - Bill Type
    ... and 34 more
  ⚠️  Mapped but not in CSV: 15
    - Account ID
    - Account Name
    - Created Time
    - Item Description
    - Item ID

--- Analyzing Contacts (Contacts.csv) ---
  Total CSV columns: 72
  Currently mapped: 23
  Unmapped columns: 53
  🔍 Unmapped fields found:
    - Accounts Receivable
    - Bank Account Payment
    - Billing Attention
    - Billing City
    - Billing Code
    - Billing Country
    - Billing County
    - Billing Fax
    - Billing Phone
    - Billing State
    ... and 43 more
  ⚠️  Mapped but not in CSV: 4
    - Contact Person ID
    - Email
    - Mobile
    - Vendor Display Name

-

In [5]:
# Section 5: Summary of Unmapped Fields Analysis
print("=== SUMMARY OF UNMAPPED FIELDS ANALYSIS ===")
print()

total_unmapped = 0
entities_with_unmapped = []

for entity, analysis in csv_analysis.items():
    if 'error' in analysis:
        print(f"❌ {entity}: Error during analysis")
        continue
        
    unmapped_count = analysis['unmapped_count']
    total_csv_columns = analysis['total_csv_columns']
    currently_mapped = analysis['currently_mapped']
    
    print(f"📋 {entity}:")
    print(f"   Total CSV columns: {total_csv_columns}")
    print(f"   Currently mapped: {currently_mapped}")
    print(f"   Unmapped: {unmapped_count}")
    print(f"   Coverage: {(currently_mapped/total_csv_columns)*100:.1f}%")
    
    if unmapped_count > 0:
        entities_with_unmapped.append(entity)
        total_unmapped += unmapped_count
        print(f"   🔍 Sample unmapped fields: {', '.join(analysis['unmapped_columns'][:3])}...")
    
    print()

print(f"📊 OVERALL SUMMARY:")
print(f"   Entities analyzed: {len([e for e in csv_analysis.values() if 'error' not in e])}")
print(f"   Entities with unmapped fields: {len(entities_with_unmapped)}")
print(f"   Total unmapped fields: {total_unmapped}")
print(f"   Entities needing updates: {', '.join(entities_with_unmapped)}")

if total_unmapped > 0:
    print(f"\n✅ READY TO UPDATE MAPPINGS")
    print(f"   Found {total_unmapped} additional fields to add to mappings.py")
else:
    print(f"\n✅ NO UPDATES NEEDED")
    print(f"   All CSV fields are already mapped")

=== SUMMARY OF UNMAPPED FIELDS ANALYSIS ===

📋 Bills:
   Total CSV columns: 64
   Currently mapped: 35
   Unmapped: 44
   Coverage: 54.7%
   🔍 Sample unmapped fields: Account, Account Code, Accounts Payable...

📋 Contacts:
   Total CSV columns: 72
   Currently mapped: 23
   Unmapped: 53
   Coverage: 31.9%
   🔍 Sample unmapped fields: Accounts Receivable, Bank Account Payment, Billing Attention...

📋 CreditNotes:
   Total CSV columns: 87
   Currently mapped: 31
   Unmapped: 74
   Coverage: 35.6%
   🔍 Sample unmapped fields: Account, Account Code, Accounts Receivable...

📋 CustomerPayments:
   Total CSV columns: 29
   Currently mapped: 20
   Unmapped: 18
   Coverage: 69.0%
   🔍 Sample unmapped fields: Amount Applied to Invoice, Branch ID, Branch Name...

📋 Invoices:
   Total CSV columns: 122
   Currently mapped: 37
   Unmapped: 100
   Coverage: 30.3%
   🔍 Sample unmapped fields: 2Checkout, Account, Account Code...

📋 Items:
   Total CSV columns: 41
   Currently mapped: 24
   Unmapped: 29

In [6]:
# Section 6: Sample Entity Analysis - Bills
print("=== SAMPLE ANALYSIS: BILLS ENTITY ===")

if 'Bills' in csv_analysis and 'error' not in csv_analysis['Bills']:
    bills_analysis = csv_analysis['Bills']
    
    print(f"CSV File: {bills_analysis['csv_file']}")
    print(f"Total CSV columns: {bills_analysis['total_csv_columns']}")
    print(f"Currently mapped: {bills_analysis['currently_mapped']}")
    print(f"Unmapped: {bills_analysis['unmapped_count']}")
    print()
    
    if bills_analysis['unmapped_columns']:
        print("🔍 UNMAPPED FIELDS IN BILLS:")
        for i, field in enumerate(bills_analysis['unmapped_columns'], 1):
            print(f"  {i:2d}. {field}")
        
        print(f"\n📝 These {len(bills_analysis['unmapped_columns'])} fields need to be added to BILLS_CSV_MAP")
        
        # Show what the mapping additions would look like
        print(f"\n📋 MAPPING ADDITIONS NEEDED:")
        print("# Add to BILLS_CSV_MAP:")
        for field in bills_analysis['unmapped_columns'][:5]:  # Show first 5
            print(f"    '{field}': '{field}',")
        if len(bills_analysis['unmapped_columns']) > 5:
            print(f"    # ... and {len(bills_analysis['unmapped_columns']) - 5} more")
            
    else:
        print("✅ No unmapped fields found in Bills")
        
else:
    print("❌ Bills analysis not available or had errors")

# Quick check for custom fields pattern
print(f"\n🔧 CUSTOM FIELDS ANALYSIS:")
if 'Bills' in csv_analysis and 'unmapped_columns' in csv_analysis['Bills']:
    custom_field_pattern = []
    for field in csv_analysis['Bills']['unmapped_columns']:
        if 'cf_' in field.lower() or 'custom' in field.lower():
            custom_field_pattern.append(field)
    
    if custom_field_pattern:
        print(f"Found {len(custom_field_pattern)} likely custom fields in Bills:")
        for field in custom_field_pattern:
            print(f"  - {field}")
    else:
        print("No obvious custom field patterns found (cf_, custom, etc.)")
else:
    print("Unable to analyze custom field patterns")

=== SAMPLE ANALYSIS: BILLS ENTITY ===
CSV File: Bill.csv
Total CSV columns: 64
Currently mapped: 35
Unmapped: 44

🔍 UNMAPPED FIELDS IN BILLS:
   1. Account
   2. Account Code
   3. Accounts Payable
   4. Adjustment
   5. Adjustment Account
   6. Adjustment Description
   7. Approved By
   8. Approved Date
   9. Bill Status
  10. Bill Type
  11. Branch ID
  12. Branch Name
  13. CF.ChP Scheme Settlement Period
  14. Created By
  15. Customer Name
  16. Description
  17. Discount
  18. Discount Account
  19. Discount Account Code
  20. Discount Amount
  21. Discount Type
  22. Entity Discount Amount
  23. Entity Discount Percent
  24. Is Billable
  25. Is Discount Before Tax
  26. Is Inclusive Tax
  27. Is Landed Cost
  28. Payment Terms
  29. Payment Terms Label
  30. Product ID
  31. Purchase Order Number
  32. PurchaseOrder
  33. Region
  34. SubTotal
  35. Submitted By
  36. Submitted Date
  37. TDS Amount
  38. TDS Name
  39. TDS Percentage
  40. TDS Type
  41. Tax Amount
  42. Usag

In [7]:
# Section 7: Generate Updated Mappings and Apply Changes
print("=== GENERATING UPDATED MAPPINGS ===")
print()

# Read the current mappings file
with open(MAPPINGS_FILE, 'r', encoding='utf-8') as f:
    mappings_content = f.read()

print(f"Original mappings.py file size: {len(mappings_content)} characters")

# Generate updates for each entity
updates_needed = []
total_new_fields = 0

for entity, analysis in csv_analysis.items():
    if 'error' in analysis or analysis['unmapped_count'] == 0:
        continue
    
    updates_needed.append({
        'entity': entity,
        'new_fields': analysis['unmapped_columns'],
        'count': analysis['unmapped_count']
    })
    total_new_fields += analysis['unmapped_count']

print(f"Entities needing updates: {len(updates_needed)}")
print(f"Total new fields to add: {total_new_fields}")
print()

for update in updates_needed:
    entity = update['entity']
    new_fields = update['new_fields']
    count = update['count']
    
    print(f"📋 {entity}: {count} new fields")
    for field in new_fields[:3]:  # Show first 3
        print(f"   - {field}")
    if count > 3:
        print(f"   ... and {count - 3} more")
    print()

# Proceed with update if we have changes
if updates_needed:
    print("🔄 PROCEEDING WITH MAPPINGS UPDATE...")
    
    # Create updated content
    updated_content = mappings_content
    
    # Add update header comment
    update_timestamp = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
    update_header = f"""
# ============================================================================
# MAPPINGS UPDATE - {update_timestamp}
# ============================================================================
# Added {total_new_fields} additional CSV fields (likely custom fields)
# across {len(updates_needed)} entities. Fields maintain original CSV names.
# Original backup: mappings_backup_2025-07-05_16-37-59.py
# ============================================================================

"""
    
    # Insert header after the existing docstring
    docstring_end = updated_content.find('"""', updated_content.find('"""') + 3) + 3
    updated_content = updated_content[:docstring_end] + update_header + updated_content[docstring_end:]
    
    print("✅ Added update header to file")
    print(f"✅ Ready to update {len(updates_needed)} entity mappings")
    
else:
    print("ℹ️  No updates needed - all CSV fields are already mapped")

PROCEED_WITH_UPDATE = True  # Set to apply changes

=== GENERATING UPDATED MAPPINGS ===

Original mappings.py file size: 28273 characters
Entities needing updates: 9
Total new fields to add: 469

📋 Bills: 44 new fields
   - Account
   - Account Code
   - Accounts Payable
   ... and 41 more

📋 Contacts: 53 new fields
   - Accounts Receivable
   - Bank Account Payment
   - Billing Attention
   ... and 50 more

📋 CreditNotes: 74 new fields
   - Account
   - Account Code
   - Accounts Receivable
   ... and 71 more

📋 CustomerPayments: 18 new fields
   - Amount Applied to Invoice
   - Branch ID
   - Branch Name
   ... and 15 more

📋 Invoices: 100 new fields
   - 2Checkout
   - Account
   - Account Code
   ... and 97 more

📋 Items: 29 new fields
   - Account
   - Account Code
   - CF.Item Location
   ... and 26 more

📋 PurchaseOrders: 64 new fields
   - Account
   - Account Code
   - Address
   ... and 61 more

📋 SalesOrders: 68 new fields
   - Account
   - Account Code
   - Adjustment
   ... and 65 more

📋 VendorPayments: 19 new fields
   - 

In [8]:
# Section 8: Apply Mappings Updates to File
print("=== APPLYING UPDATES TO MAPPINGS.PY ===")
print()

if 'updates_needed' in locals() and updates_needed and PROCEED_WITH_UPDATE:
    
    # For each entity that needs updates, modify the mapping
    for update in updates_needed:
        entity = update['entity']
        new_fields = update['new_fields']
        
        # Find the mapping variable name
        mapping_var_map = {
            'Invoices': 'INVOICE_CSV_MAP',
            'Items': 'ITEMS_CSV_MAP',
            'Contacts': 'CONTACTS_CSV_MAP',
            'Bills': 'BILLS_CSV_MAP',
            'CustomerPayments': 'CUSTOMER_PAYMENTS_CSV_MAP',
            'VendorPayments': 'VENDOR_PAYMENTS_CSV_MAP',
            'SalesOrders': 'SALES_ORDERS_CSV_MAP',
            'PurchaseOrders': 'PURCHASE_ORDERS_CSV_MAP',
            'CreditNotes': 'CREDIT_NOTES_CSV_MAP'
        }
        
        mapping_var = mapping_var_map.get(entity)
        if not mapping_var:
            print(f"⚠️  Skipping {entity} - no mapping variable found")
            continue
        
        print(f"🔄 Updating {mapping_var} for {entity}...")
        
        # Find the mapping in the file content
        mapping_start = updated_content.find(f"{mapping_var} = {{")
        if mapping_start == -1:
            print(f"❌ Could not find {mapping_var} in file")
            continue
        
        # Find the end of the mapping (closing brace)
        brace_count = 0
        mapping_end = mapping_start
        for i, char in enumerate(updated_content[mapping_start:]):
            if char == '{':
                brace_count += 1
            elif char == '}':
                brace_count -= 1
                if brace_count == 0:
                    mapping_end = mapping_start + i
                    break
        
        if mapping_end == mapping_start:
            print(f"❌ Could not find end of {mapping_var}")
            continue
        
        # Insert new fields before the closing brace
        new_mappings = []
        for field in new_fields:
            # Escape single quotes in field names
            escaped_field = field.replace("'", "\\'")
            new_mappings.append(f"    '{escaped_field}': '{escaped_field}',")
        
        new_mappings_text = "\n".join(new_mappings)
        
        # Insert the new mappings
        insert_pos = mapping_end
        updated_content = (
            updated_content[:insert_pos] + 
            "\n    # Additional fields from CSV analysis\n" +
            new_mappings_text + "\n" +
            updated_content[insert_pos:]
        )
        
        print(f"✅ Added {len(new_fields)} fields to {mapping_var}")
    
    # Write the updated file
    try:
        with open(MAPPINGS_FILE, 'w', encoding='utf-8') as f:
            f.write(updated_content)
        
        print(f"\n🎉 SUCCESS! Updated mappings.py file")
        print(f"   File size: {len(updated_content)} characters")
        print(f"   Added {total_new_fields} new field mappings")
        print(f"   Updated {len(updates_needed)} entity mappings")
        
        # Verify the file was written correctly
        with open(MAPPINGS_FILE, 'r', encoding='utf-8') as f:
            verification_content = f.read()
        
        if len(verification_content) == len(updated_content):
            print(f"✅ File verification successful")
        else:
            print(f"⚠️  File size mismatch - verification needed")
            
    except Exception as e:
        print(f"❌ Error writing file: {e}")
        print("File was not modified")
        
else:
    print("ℹ️  No updates applied - either no changes needed or PROCEED_WITH_UPDATE is False")

print(f"\n📋 NEXT STEPS:")
print(f"1. Test the updated mappings with ETL pipeline")
print(f"2. Verify new fields are properly processed")
print(f"3. Update canonical schema if needed")
print(f"4. Commit changes to git")

=== APPLYING UPDATES TO MAPPINGS.PY ===

🔄 Updating BILLS_CSV_MAP for Bills...
✅ Added 44 fields to BILLS_CSV_MAP
🔄 Updating CONTACTS_CSV_MAP for Contacts...
✅ Added 53 fields to CONTACTS_CSV_MAP
🔄 Updating CREDIT_NOTES_CSV_MAP for CreditNotes...
✅ Added 74 fields to CREDIT_NOTES_CSV_MAP
🔄 Updating CUSTOMER_PAYMENTS_CSV_MAP for CustomerPayments...
✅ Added 18 fields to CUSTOMER_PAYMENTS_CSV_MAP
🔄 Updating INVOICE_CSV_MAP for Invoices...
✅ Added 100 fields to INVOICE_CSV_MAP
🔄 Updating ITEMS_CSV_MAP for Items...
✅ Added 29 fields to ITEMS_CSV_MAP
🔄 Updating PURCHASE_ORDERS_CSV_MAP for PurchaseOrders...
✅ Added 64 fields to PURCHASE_ORDERS_CSV_MAP
🔄 Updating SALES_ORDERS_CSV_MAP for SalesOrders...
✅ Added 68 fields to SALES_ORDERS_CSV_MAP
🔄 Updating VENDOR_PAYMENTS_CSV_MAP for VendorPayments...
✅ Added 19 fields to VENDOR_PAYMENTS_CSV_MAP

🎉 SUCCESS! Updated mappings.py file
   File size: 47883 characters
   Added 469 new field mappings
   Updated 9 entity mappings
✅ File verification suc

# CSV Field Analysis and Mappings Update
## Date: 2025-07-05

This notebook analyzes CSV files to identify additional fields that are not currently mapped in mappings.py.
These are likely custom fields not documented in the Zoho API.

### Objectives:
1. Load all CSV files and examine their column structures
2. Compare CSV columns with existing mappings
3. Identify missing/unmapped fields
4. Update mappings.py to include all CSV fields
5. Maintain field names as they appear in CSV (custom fields)

In [None]:
# Section 1: Setup and Imports
import pandas as pd
import json
import os
import sys
from pathlib import Path
from typing import Dict, List, Set, Any
import re

# Add src to path for imports
sys.path.insert(0, str(Path.cwd().parent / 'src'))

# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 40)

print("Libraries imported successfully!")
print(f"Working directory: {Path.cwd()}")

# Define paths
DATA_DIR = Path.cwd().parent / 'data' / 'csv' / 'Nangsel Pioneers_2025-06-22'
SRC_DIR = Path.cwd().parent / 'src'
MAPPINGS_FILE = SRC_DIR / 'data_pipeline' / 'mappings.py'

print(f"Data directory: {DATA_DIR}")
print(f"Data directory exists: {DATA_DIR.exists()}")
print(f"Mappings file: {MAPPINGS_FILE}")

In [None]:
# Section 2: Load Current Mappings
from data_pipeline.mappings import (
    CANONICAL_SCHEMA,
    INVOICE_CSV_MAP,
    ITEMS_CSV_MAP,
    CONTACTS_CSV_MAP,
    BILLS_CSV_MAP,
    CUSTOMER_PAYMENTS_CSV_MAP,
    VENDOR_PAYMENTS_CSV_MAP,
    SALES_ORDERS_CSV_MAP,
    PURCHASE_ORDERS_CSV_MAP,
    CREDIT_NOTES_CSV_MAP,
    get_entity_csv_mapping
)

print("Current mappings loaded successfully!")
print(f"Total entities in schema: {len(CANONICAL_SCHEMA)}")

# Map entity names to their CSV mappings
ENTITY_MAPPINGS = {
    'Invoices': INVOICE_CSV_MAP,
    'Items': ITEMS_CSV_MAP,
    'Contacts': CONTACTS_CSV_MAP,
    'Bills': BILLS_CSV_MAP,
    'CustomerPayments': CUSTOMER_PAYMENTS_CSV_MAP,
    'VendorPayments': VENDOR_PAYMENTS_CSV_MAP,
    'SalesOrders': SALES_ORDERS_CSV_MAP,
    'PurchaseOrders': PURCHASE_ORDERS_CSV_MAP,
    'CreditNotes': CREDIT_NOTES_CSV_MAP
}

print("Entity mappings configured:")
for entity, mapping in ENTITY_MAPPINGS.items():
    print(f"  {entity}: {len(mapping)} mapped fields")

In [None]:
# Section 3: Discover CSV Files and Map to Entities
if DATA_DIR.exists():
    csv_files = list(DATA_DIR.glob('*.csv'))
    print(f"Found {len(csv_files)} CSV files:")
    
    # Map CSV files to entity names
    csv_to_entity_map = {
        'Invoice.csv': 'Invoices',
        'Item.csv': 'Items', 
        'Contacts.csv': 'Contacts',
        'Bill.csv': 'Bills',
        'Customer_Payment.csv': 'CustomerPayments',
        'Vendor_Payment.csv': 'VendorPayments',
        'Sales_Order.csv': 'SalesOrders',
        'Purchase_Order.csv': 'PurchaseOrders',
        'Credit_Note.csv': 'CreditNotes'
    }
    
    available_files = {}
    for csv_file in csv_files:
        file_name = csv_file.name
        entity = csv_to_entity_map.get(file_name)
        if entity:
            available_files[entity] = csv_file
            print(f"  ✅ {file_name} -> {entity}")
        else:
            print(f"  ⚠️  {file_name} -> No mapping found")
    
    print(f"\nMapped {len(available_files)} CSV files to entities")
else:
    print(f"❌ Data directory not found: {DATA_DIR}")
    available_files = {}

In [None]:
# Section 4: Analyze CSV Column Structures
csv_analysis = {}

print("=== ANALYZING CSV COLUMN STRUCTURES ===")
print()

for entity, csv_file in available_files.items():
    print(f"--- Analyzing {entity} ({csv_file.name}) ---")
    
    try:
        # Load CSV to get column names (just first few rows for efficiency)
        df = pd.read_csv(csv_file, nrows=5)
        csv_columns = list(df.columns)
        
        # Get current mapping for this entity
        current_mapping = ENTITY_MAPPINGS.get(entity, {})
        mapped_columns = set(current_mapping.keys())
        csv_columns_set = set(csv_columns)
        
        # Find unmapped columns
        unmapped_columns = csv_columns_set - mapped_columns
        
        # Find mapped columns that don't exist in CSV
        missing_in_csv = mapped_columns - csv_columns_set
        
        csv_analysis[entity] = {
            'csv_file': csv_file.name,
            'total_csv_columns': len(csv_columns),
            'csv_columns': csv_columns,
            'currently_mapped': len(mapped_columns),
            'mapped_columns': sorted(mapped_columns),
            'unmapped_count': len(unmapped_columns),
            'unmapped_columns': sorted(unmapped_columns),
            'missing_in_csv_count': len(missing_in_csv),
            'missing_in_csv': sorted(missing_in_csv)
        }
        
        print(f"  Total CSV columns: {len(csv_columns)}")
        print(f"  Currently mapped: {len(mapped_columns)}")
        print(f"  Unmapped columns: {len(unmapped_columns)}")
        
        if unmapped_columns:
            print(f"  🔍 Unmapped fields found:")
            for col in sorted(unmapped_columns)[:10]:  # Show first 10
                print(f"    - {col}")
            if len(unmapped_columns) > 10:
                print(f"    ... and {len(unmapped_columns) - 10} more")
        
        if missing_in_csv:
            print(f"  ⚠️  Mapped but not in CSV: {len(missing_in_csv)}")
            for col in sorted(missing_in_csv)[:5]:
                print(f"    - {col}")
        
        print()
        
    except Exception as e:
        print(f"  ❌ Error analyzing {csv_file.name}: {e}")
        csv_analysis[entity] = {'error': str(e)}
        print()

print(f"Analysis completed for {len(csv_analysis)} entities")

In [None]:
# Section 5: Detailed Analysis of Unmapped Fields
print("=== DETAILED UNMAPPED FIELDS ANALYSIS ===")
print()

total_unmapped = 0
all_unmapped_fields = {}

for entity, analysis in csv_analysis.items():
    if 'error' in analysis:
        continue
        
    unmapped = analysis['unmapped_columns']
    if unmapped:
        print(f"--- {entity} ({analysis['unmapped_count']} unmapped fields) ---")
        total_unmapped += len(unmapped)
        all_unmapped_fields[entity] = unmapped
        
        # Categorize fields by likely type
        custom_fields = []
        system_fields = []
        unknown_fields = []
        
        for field in unmapped:
            field_lower = field.lower()
            if any(keyword in field_lower for keyword in ['cf_', 'custom', 'field']):
                custom_fields.append(field)
            elif any(keyword in field_lower for keyword in ['id', 'time', 'date', 'status', 'number']):
                system_fields.append(field)
            else:
                unknown_fields.append(field)
        
        if custom_fields:
            print(f"  🔧 Likely Custom Fields ({len(custom_fields)}):")
            for field in custom_fields:
                print(f"    - {field}")
        
        if system_fields:
            print(f"  ⚙️  Likely System Fields ({len(system_fields)}):")
            for field in system_fields:
                print(f"    - {field}")
        
        if unknown_fields:
            print(f"  ❓ Other Fields ({len(unknown_fields)}):")
            for field in unknown_fields:
                print(f"    - {field}")
        
        print()

print(f"📊 SUMMARY:")
print(f"Total entities analyzed: {len([e for e in csv_analysis.values() if 'error' not in e])}")
print(f"Entities with unmapped fields: {len(all_unmapped_fields)}")
print(f"Total unmapped fields across all entities: {total_unmapped}")

In [None]:
# Section 6: Sample Data Analysis for Unmapped Fields
print("=== SAMPLE DATA ANALYSIS FOR UNMAPPED FIELDS ===")
print()

for entity, unmapped_fields in all_unmapped_fields.items():
    if not unmapped_fields:
        continue
        
    csv_file = available_files[entity]
    print(f"--- {entity} Sample Data ---")
    
    try:
        # Load sample data to understand field types and content
        df = pd.read_csv(csv_file, nrows=10)
        
        for field in unmapped_fields[:5]:  # Show first 5 unmapped fields
            if field in df.columns:
                non_null_values = df[field].dropna()
                unique_values = non_null_values.nunique()
                sample_values = non_null_values.head(3).tolist()
                
                print(f"  📋 {field}:")
                print(f"    Non-null values: {len(non_null_values)}/10")
                print(f"    Unique values: {unique_values}")
                print(f"    Sample values: {sample_values}")
                
                # Suggest data type
                if non_null_values.empty:
                    suggested_type = 'TEXT'  # Default for empty
                elif pd.api.types.is_numeric_dtype(df[field]):
                    suggested_type = 'REAL' if any('.' in str(v) for v in sample_values) else 'INTEGER'
                else:
                    suggested_type = 'TEXT'
                
                print(f"    Suggested type: {suggested_type}")
                print()
        
        if len(unmapped_fields) > 5:
            print(f"  ... and {len(unmapped_fields) - 5} more unmapped fields")
        
        print()
        
    except Exception as e:
        print(f"  ❌ Error analyzing sample data: {e}")
        print()

In [None]:
# Section 7: Generate Updated Mappings Structure
print("=== GENERATING UPDATED MAPPINGS STRUCTURE ===")
print()

updated_mappings = {}

for entity, analysis in csv_analysis.items():
    if 'error' in analysis:
        continue
    
    print(f"--- Generating updated mapping for {entity} ---")
    
    # Start with current mapping
    current_mapping = ENTITY_MAPPINGS.get(entity, {}).copy()
    
    # Add unmapped fields (keep same name as CSV)
    unmapped_fields = analysis['unmapped_columns']
    
    for field in unmapped_fields:
        # For unmapped fields, map CSV field name to itself (canonical name = CSV name)
        current_mapping[field] = field
    
    updated_mappings[entity] = current_mapping
    
    print(f"  Original mapped fields: {analysis['currently_mapped']}")
    print(f"  Added unmapped fields: {len(unmapped_fields)}")
    print(f"  Total fields in updated mapping: {len(current_mapping)}")
    
    if unmapped_fields:
        print(f"  Added fields:")
        for field in unmapped_fields[:10]:  # Show first 10
            print(f"    '{field}': '{field}',")
        if len(unmapped_fields) > 10:
            print(f"    ... and {len(unmapped_fields) - 10} more")
    
    print()

print(f"Updated mappings generated for {len(updated_mappings)} entities")

In [None]:
# Section 8: Update Schema with New Fields
print("=== UPDATING CANONICAL SCHEMA WITH NEW FIELDS ===")
print()

updated_schema = {}

for entity in CANONICAL_SCHEMA.keys():
    print(f"--- Updating schema for {entity} ---")
    
    # Copy current schema
    current_schema = CANONICAL_SCHEMA[entity].copy()
    
    # Get unmapped fields for this entity
    if entity in all_unmapped_fields:
        unmapped_fields = all_unmapped_fields[entity]
        
        # Add unmapped fields to both header and line item columns as needed
        header_columns = current_schema['header_columns'].copy()
        line_items_columns = current_schema.get('line_items_columns', {}).copy()
        
        # Analyze which fields belong to header vs line items
        if entity in available_files:
            csv_file = available_files[entity]
            try:
                df = pd.read_csv(csv_file, nrows=10)
                
                # Simple heuristic: fields with repeating values likely belong to line items
                for field in unmapped_fields:
                    if field in df.columns:
                        # Default to TEXT type for new fields
                        field_type = 'TEXT'
                        
                        # Try to infer better type
                        non_null_values = df[field].dropna()
                        if not non_null_values.empty:
                            if pd.api.types.is_numeric_dtype(df[field]):
                                field_type = 'REAL'
                            elif pd.api.types.is_integer_dtype(df[field]):
                                field_type = 'INTEGER'
                        
                        # For now, add all unmapped fields to header columns
                        # (More sophisticated logic would be needed to determine line item fields)
                        header_columns[field] = field_type
                        
                        print(f"  Added to header: {field} ({field_type})")
                
            except Exception as e:
                print(f"  ⚠️  Error processing CSV data: {e}")
                # Fallback: add as TEXT fields
                for field in unmapped_fields:
                    header_columns[field] = 'TEXT'
                    print(f"  Added to header (fallback): {field} (TEXT)")
        
        # Update the schema
        current_schema['header_columns'] = header_columns
        current_schema['line_items_columns'] = line_items_columns
        
        print(f"  Original header columns: {len(CANONICAL_SCHEMA[entity]['header_columns'])}")
        print(f"  Updated header columns: {len(header_columns)}")
        print(f"  Added: {len(unmapped_fields)} new fields")
    else:
        print(f"  No unmapped fields found - schema unchanged")
    
    updated_schema[entity] = current_schema
    print()

print(f"Schema updates completed for {len(updated_schema)} entities")

In [None]:
# Section 9: Generate Updated Mappings.py Content
print("=== GENERATING UPDATED MAPPINGS.PY CONTENT ===")
print()

# Read the original mappings file to preserve structure and comments
with open(MAPPINGS_FILE, 'r', encoding='utf-8') as f:
    original_content = f.read()

print(f"Original mappings.py size: {len(original_content)} characters")

# Create backup content summary
backup_summary = f"""
# ============================================================================
# MAPPINGS UPDATE SUMMARY - {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
# ============================================================================
# This file was automatically updated to include additional CSV fields
# that were not previously mapped (likely custom fields).
#
# Update Summary:
"""

total_added_fields = 0
for entity, analysis in csv_analysis.items():
    if 'error' not in analysis and analysis['unmapped_count'] > 0:
        total_added_fields += analysis['unmapped_count']
        backup_summary += f"# - {entity}: Added {analysis['unmapped_count']} fields\n"

backup_summary += f"#\n# Total new fields added: {total_added_fields}\n"
backup_summary += f"# Original backup: mappings_backup_2025-07-05_16-37-59.py\n"
backup_summary += f"# ============================================================================\n\n"

print(f"Total new fields to be added: {total_added_fields}")
print("\nUpdate summary prepared for file header")

In [None]:
# Section 10: Write Updated Mappings File
print("=== WRITING UPDATED MAPPINGS.PY FILE ===")
print()

# For this demonstration, let's create the updated content structure
# In practice, you'd want to programmatically update the file

print("📋 IMPLEMENTATION PLAN:")
print("1. ✅ Backup original mappings.py file")
print("2. ✅ Analyze CSV files for unmapped fields")
print("3. ✅ Generate updated mapping structures")
print("4. ✅ Identify new schema requirements")
print("5. 🔄 Update mappings.py with new fields")
print("6. 🔄 Test updated mappings with ETL pipeline")

print("\n📊 SUMMARY OF CHANGES NEEDED:")
for entity, analysis in csv_analysis.items():
    if 'error' not in analysis and analysis['unmapped_count'] > 0:
        print(f"\n{entity}:")
        print(f"  - Current mapped fields: {analysis['currently_mapped']}")
        print(f"  - Fields to add: {analysis['unmapped_count']}")
        print(f"  - New total: {analysis['currently_mapped'] + analysis['unmapped_count']}")
        
        # Show specific fields to add
        if analysis['unmapped_columns']:
            print(f"  - Fields to add: {', '.join(analysis['unmapped_columns'][:5])}{'...' if len(analysis['unmapped_columns']) > 5 else ''}")

print(f"\n🎯 NEXT STEPS:")
print("1. Review the identified unmapped fields above")
print("2. Confirm which fields should be added to mappings")
print("3. Run the update process to modify mappings.py")
print("4. Test the updated mappings with ETL pipeline")

# Set flag for whether to proceed with file update
PROCEED_WITH_UPDATE = True  # Set to True to actually update the file

if PROCEED_WITH_UPDATE:
    print("\n⚠️  READY TO UPDATE MAPPINGS.PY")
    print("The next cell will actually modify the file.")
else:
    print("\n✋ UPDATE PAUSED")
    print("Set PROCEED_WITH_UPDATE = True to update the file.")

In [9]:
# Section 11: Check Specific Target Entities
print("=== CHECKING SPECIFIC TARGET ENTITIES ===")
print()

# List of specific entities to focus on
target_entities = [
    'Invoices',
    'Items', 
    'Contacts',
    'Bills',
    'Organizations',
    'CustomerPayments',
    'VendorPayments',
    'SalesOrders',
    'PurchaseOrders',
    'CreditNotes'
]

print("Target entities to analyze:")
for entity in target_entities:
    print(f"  - {entity}")

print()

# Check which of these entities have CSV files and analysis
target_analysis = {}
total_target_fields = 0

for entity in target_entities:
    if entity in csv_analysis and 'error' not in csv_analysis[entity]:
        analysis = csv_analysis[entity]
        target_analysis[entity] = analysis
        total_target_fields += analysis['unmapped_count']
        
        print(f"📋 {entity}:")
        print(f"   CSV file: {analysis.get('csv_file', 'N/A')}")
        print(f"   Total CSV columns: {analysis['total_csv_columns']}")
        print(f"   Currently mapped: {analysis['currently_mapped']}")
        print(f"   Unmapped fields: {analysis['unmapped_count']}")
        
        if analysis['unmapped_count'] > 0:
            print(f"   Sample unmapped: {', '.join(analysis['unmapped_columns'][:3])}...")
        
        print()
    elif entity in available_files:
        print(f"⚠️  {entity}: CSV file found but analysis failed")
    else:
        print(f"❌ {entity}: No CSV file found")

print(f"📊 SUMMARY FOR TARGET ENTITIES:")
print(f"   Entities with CSV files: {len(target_analysis)}")
print(f"   Total unmapped fields: {total_target_fields}")

if target_analysis:
    print(f"\n📋 BREAKDOWN BY ENTITY:")
    for entity, analysis in target_analysis.items():
        if analysis['unmapped_count'] > 0:
            print(f"   {entity}: {analysis['unmapped_count']} new fields")

=== CHECKING SPECIFIC TARGET ENTITIES ===

Target entities to analyze:
  - Invoices
  - Items
  - Contacts
  - Bills
  - Organizations
  - CustomerPayments
  - VendorPayments
  - SalesOrders
  - PurchaseOrders
  - CreditNotes

📋 Invoices:
   CSV file: Invoice.csv
   Total CSV columns: 122
   Currently mapped: 37
   Unmapped fields: 100
   Sample unmapped: 2Checkout, Account, Account Code...

📋 Items:
   CSV file: Item.csv
   Total CSV columns: 41
   Currently mapped: 24
   Unmapped fields: 29
   Sample unmapped: Account, Account Code, CF.Item Location...

📋 Contacts:
   CSV file: Contacts.csv
   Total CSV columns: 72
   Currently mapped: 23
   Unmapped fields: 53
   Sample unmapped: Accounts Receivable, Bank Account Payment, Billing Attention...

📋 Bills:
   CSV file: Bill.csv
   Total CSV columns: 64
   Currently mapped: 35
   Unmapped fields: 44
   Sample unmapped: Account, Account Code, Accounts Payable...

❌ Organizations: No CSV file found
📋 CustomerPayments:
   CSV file: Customer

In [10]:
# Section 12: Concise Summary for Target Entities
print("=== CONCISE SUMMARY FOR TARGET ENTITIES ===")
print()

target_entities = [
    'Invoices', 'Items', 'Contacts', 'Bills', 'Organizations',
    'CustomerPayments', 'VendorPayments', 'SalesOrders', 
    'PurchaseOrders', 'CreditNotes'
]

entities_found = 0
total_new_fields = 0
breakdown = []

for entity in target_entities:
    if entity in csv_analysis and 'error' not in csv_analysis[entity]:
        analysis = csv_analysis[entity]
        entities_found += 1
        new_fields = analysis['unmapped_count']
        total_new_fields += new_fields
        
        if new_fields > 0:
            breakdown.append(f"{entity}: {new_fields}")

print(f"📊 QUICK SUMMARY:")
print(f"   Target entities with CSV files: {entities_found}/10")
print(f"   Total new fields needed: {total_new_fields}")
print()

if breakdown:
    print("📋 Entities needing field additions:")
    for item in breakdown:
        print(f"   - {item}")
else:
    print("✅ All target entities are fully mapped!")

print(f"\n🎯 RESULT: Need to add {total_new_fields} additional fields across {len(breakdown)} entities")

=== CONCISE SUMMARY FOR TARGET ENTITIES ===

📊 QUICK SUMMARY:
   Target entities with CSV files: 9/10
   Total new fields needed: 469

📋 Entities needing field additions:
   - Invoices: 100
   - Items: 29
   - Contacts: 53
   - Bills: 44
   - CustomerPayments: 18
   - VendorPayments: 19
   - SalesOrders: 68
   - PurchaseOrders: 64
   - CreditNotes: 74

🎯 RESULT: Need to add 469 additional fields across 9 entities


In [11]:
# Section 13: Detailed Entity Status Check
print("=== DETAILED ENTITY STATUS CHECK ===")
print()

target_entities = [
    'Invoices', 'Items', 'Contacts', 'Bills', 'Organizations',
    'CustomerPayments', 'VendorPayments', 'SalesOrders', 
    'PurchaseOrders', 'CreditNotes'
]

print("📋 STATUS FOR EACH TARGET ENTITY:")
print()

for entity in target_entities:
    print(f"--- {entity} ---")
    
    if entity in csv_analysis and 'error' not in csv_analysis[entity]:
        analysis = csv_analysis[entity]
        print(f"   ✅ CSV found: {analysis['csv_file']}")
        print(f"   📊 Total CSV columns: {analysis['total_csv_columns']}")
        print(f"   ✅ Currently mapped: {analysis['currently_mapped']}")
        print(f"   🔍 Need to add: {analysis['unmapped_count']} fields")
        
        # Show coverage percentage
        total_cols = analysis['total_csv_columns']
        mapped_cols = analysis['currently_mapped']
        coverage = (mapped_cols / total_cols * 100) if total_cols > 0 else 0
        print(f"   📈 Current coverage: {coverage:.1f}%")
        
    elif entity in available_files:
        print(f"   ⚠️  CSV found but analysis failed")
    else:
        print(f"   ❌ No CSV file found")
        # Check what CSV files we do have
        print(f"   💡 Available CSV files:")
        for file_entity, csv_path in available_files.items():
            print(f"      - {csv_path.name}")
    
    print()

# Check if Organizations might be named differently
print("🔍 CHECKING FOR ORGANIZATIONS-RELATED FILES:")
all_csv_files = list(DATA_DIR.glob('*.csv')) if DATA_DIR.exists() else []
org_related = [f for f in all_csv_files if 'org' in f.name.lower()]
if org_related:
    print(f"   Found possibly related files: {[f.name for f in org_related]}")
else:
    print("   No organization-related CSV files found")

print(f"\n📊 SUMMARY:")
print(f"   Target entities: {len(target_entities)}")
print(f"   CSV files found: {len([e for e in target_entities if e in csv_analysis])}")
print(f"   Missing CSV files: {len(target_entities) - len([e for e in target_entities if e in csv_analysis])}")
print(f"   Total new fields to add: 469")

=== DETAILED ENTITY STATUS CHECK ===

📋 STATUS FOR EACH TARGET ENTITY:

--- Invoices ---
   ✅ CSV found: Invoice.csv
   📊 Total CSV columns: 122
   ✅ Currently mapped: 37
   🔍 Need to add: 100 fields
   📈 Current coverage: 30.3%

--- Items ---
   ✅ CSV found: Item.csv
   📊 Total CSV columns: 41
   ✅ Currently mapped: 24
   🔍 Need to add: 29 fields
   📈 Current coverage: 58.5%

--- Contacts ---
   ✅ CSV found: Contacts.csv
   📊 Total CSV columns: 72
   ✅ Currently mapped: 23
   🔍 Need to add: 53 fields
   📈 Current coverage: 31.9%

--- Bills ---
   ✅ CSV found: Bill.csv
   📊 Total CSV columns: 64
   ✅ Currently mapped: 35
   🔍 Need to add: 44 fields
   📈 Current coverage: 54.7%

--- Organizations ---
   ❌ No CSV file found
   💡 Available CSV files:
      - Bill.csv
      - Contacts.csv
      - Credit_Note.csv
      - Customer_Payment.csv
      - Invoice.csv
      - Item.csv
      - Purchase_Order.csv
      - Sales_Order.csv
      - Vendor_Payment.csv

--- CustomerPayments ---
   ✅ CSV f