# 🏪 Woodland Play Cafe - BOOKING Transaction vs Tax Report Comparison

**Analysis Period:** August 2025  
**Purpose:** Compare booking transaction data with booking tax report data to identify discrepancies  
**Key Focus:** Missing records and amount differences between booking transactions and booking tax reports


In [None]:
# Load data and filter for booking
trans_file = "/Users/vijayaraghavandevaraj/Downloads/Wood - TransReport.xlsx"
tax_file = "/Users/vijayaraghavandevaraj/Downloads/Wood - Tax Report.xlsx"

WoodTrans = pd.read_excel(trans_file)
WoodTax = pd.read_excel(tax_file)

# Filter for booking data
trans_booking_df = WoodTrans[WoodTrans['Source'].isin(['website', 'crm'])].copy()
tax_booking_df = WoodTax[WoodTax['Module Name'] == 'booking'].copy()

print(f"Transaction BOOKING records: {len(trans_booking_df)}")
print(f"Tax BOOKING records: {len(tax_booking_df)}")


In [None]:
# Simple comparison - Total amounts
trans_total = trans_booking_df['Amount'].sum()
tax_total = tax_booking_df['Total Sum'].sum()

print("💰 BOOKING DATA COMPARISON")
print("="*40)
print(f"Transaction Total: ${trans_total:,.2f}")
print(f"Tax Report Total:  ${tax_total:,.2f}")
print(f"Difference:        ${trans_total - tax_total:,.2f}")
print("="*40)

# Check for missing Order IDs
trans_orders = set(trans_booking_df['Order ID'])
tax_orders = set(tax_booking_df['Order ID'])

missing_in_tax = trans_orders - tax_orders
missing_in_trans = tax_orders - trans_orders

print(f"\n📊 ORDER ID ANALYSIS")
print(f"Transaction Orders: {len(trans_orders)}")
print(f"Tax Orders:         {len(tax_orders)}")
print(f"Missing in Tax:     {len(missing_in_tax)}")
print(f"Missing in Trans:   {len(missing_in_trans)}")

if missing_in_tax:
    print(f"\nMissing Order IDs in Tax Report: {list(missing_in_tax)}")
if missing_in_trans:
    print(f"\nMissing Order IDs in Transaction: {list(missing_in_trans)}")


In [None]:
# Load and examine data files to understand structure
trans_file = "/Users/vijayaraghavandevaraj/Downloads/Wood - TransReport.xlsx"
tax_file = "/Users/vijayaraghavandevaraj/Downloads/Wood - Tax Report.xlsx"

print("📁 Loading Excel files...")
WoodTrans = pd.read_excel(trans_file)
WoodTax = pd.read_excel(tax_file)

print(f"✅ Transaction Report loaded: {len(WoodTrans)} records")
print(f"✅ Tax Report loaded: {len(WoodTax)} records")


📁 Loading Excel files...
✅ Transaction Report loaded: 1836 records
✅ Tax Report loaded: 717 records

📋 TRANSACTION REPORT COLUMNS:
['Location', 'Order ID', 'Transaction Date', 'Amount', 'Source', 'Payment Type', 'Payment Gateway', 'Transaction Type', 'Card Type', 'Currency', 'Payment ID', 'Type']

📋 TAX REPORT COLUMNS:
['Order ID', 'Module Name', 'Location', 'Month', 'Date', 'Year', 'Order Status', 'Payment Status', 'Tip', 'Tax', 'Total Sum', 'Total Sum.1']

📊 TRANSACTION REPORT SAMPLE:
         Location       Order ID Transaction Date  Amount Source  \
0  East Nashville  1757632565342       2025-09-11    2.20    pos   
1  East Nashville  1757631005016       2025-09-11    3.29    pos   
2  East Nashville  1757630762882       2025-09-11    4.39    pos   
3  East Nashville  1757630406239       2025-09-11   12.07    pos   
4  East Nashville  1757628587222       2025-09-11   24.15    pos   

   Payment Type Payment Gateway Transaction Type Card Type Currency  \
0  physicalCard          squ

In [35]:
# Filter data based on actual column names found above
print("🔍 Filtering data for POS module and August 2025...")

# First, let's check what unique values exist in the Module column
print("\n📊 UNIQUE MODULE VALUES:")
print("Transaction Report modules:", WoodTrans['Module'].unique() if 'Module' in WoodTrans.columns else "No 'Module' column found")
print("Tax Report modules:", WoodTax['Module'].unique() if 'Module' in WoodTax.columns else "No 'Module' column found")

# Check for alternative column names
print("\n🔍 CHECKING FOR ALTERNATIVE COLUMN NAMES:")
if 'Module' not in WoodTrans.columns:
    print("Available columns in Transaction Report:", [col for col in WoodTrans.columns if 'module' in col.lower() or 'pos' in col.lower()])
if 'Module' not in WoodTax.columns:
    print("Available columns in Tax Report:", [col for col in WoodTax.columns if 'module' in col.lower() or 'pos' in col.lower()])


🔍 Filtering data for POS module and August 2025...

📊 UNIQUE MODULE VALUES:
Transaction Report modules: No 'Module' column found
Tax Report modules: No 'Module' column found

🔍 CHECKING FOR ALTERNATIVE COLUMN NAMES:
Available columns in Transaction Report: []
Available columns in Tax Report: ['Module Name']


In [None]:
# Filter data for BOOKING module (not POS!)
print("🔍 Filtering data for BOOKING module...")

# Check what values exist in Source column for transactions
print("Transaction Report 'Source' values:", WoodTrans['Source'].unique())

# Check what values exist in Module Name column for tax
print("Tax Report 'Module Name' values:", WoodTax['Module Name'].unique())

# Filter for BOOKING transactions (website and crm sources) and BOOKING tax records
trans_booking_df = WoodTrans[WoodTrans['Source'].isin(['website', 'crm'])].copy()
tax_booking_df = WoodTax[WoodTax['Module Name'] == 'booking'].copy()

print(f"✅ Transaction BOOKING records (website + crm): {len(trans_booking_df)}")
print(f"✅ Tax BOOKING records: {len(tax_booking_df)}")

# Show sample of what we're working with
print("\n📊 Sample Transaction Data (BOOKING - website/crm):")
print(trans_booking_df[['Order ID', 'Transaction Date', 'Amount', 'Source']].head())
print("\n📊 Sample Tax Data (BOOKING):")
print(tax_booking_df[['Order ID', 'Date', 'Total Sum', 'Module Name']].head())


🔍 Filtering data based on actual column structure...
Transaction Report 'Type' values: ['credit' 'debit']
Tax Report 'Module Name' values: ['booking' 'memberships' 'memberships_recurring' 'pos']
✅ Transaction records (all): 1836
✅ Tax POS records: 0

📊 Sample Transaction Data:
        Order ID Transaction Date  Amount    Type
0  1757632565342       2025-09-11    2.20  credit
1  1757631005016       2025-09-11    3.29  credit
2  1757630762882       2025-09-11    4.39  credit
3  1757630406239       2025-09-11   12.07  credit
4  1757628587222       2025-09-11   24.15  credit

📊 Sample Tax Data:
Empty DataFrame
Columns: [Order ID, Date, Total Sum, Module Name]
Index: []


In [None]:
# Convert date columns and filter for August 2025
print("\n📅 Converting dates and filtering for August 2025...")

try:
    # Convert date columns using the correct column names
    trans_booking_df['Transaction Date'] = pd.to_datetime(trans_booking_df['Transaction Date'])
    trans_booking_df['Month'] = trans_booking_df['Transaction Date'].dt.to_period('M')
    print("✅ Using 'Transaction Date' column for transactions")
        
    tax_booking_df['Date'] = pd.to_datetime(tax_booking_df['Date'])
    tax_booking_df['Month'] = tax_booking_df['Date'].dt.to_period('M')
    print("✅ Using 'Date' column for tax data")
        
except Exception as e:
    print(f"❌ Error converting dates: {e}")

# Filter for August 2025
target_month = "2025-08"
try:
    trans_aug = trans_booking_df[trans_booking_df['Month'] == target_month].copy()
    tax_aug = tax_booking_df[tax_booking_df['Month'] == target_month].copy()
    print(f"✅ August 2025 Transaction records: {len(trans_aug)}")
    print(f"✅ August 2025 Tax BOOKING records: {len(tax_aug)}")
except Exception as e:
    print(f"❌ Error filtering for August 2025: {e}")
    print("Using all available data...")
    trans_aug = trans_booking_df.copy()
    tax_aug = tax_booking_df.copy()



📅 Converting dates and filtering for August 2025...
✅ Using 'Transaction Date' column for transactions
✅ Using 'Date' column for tax data
✅ August 2025 Transaction records: 880
✅ August 2025 Tax records: 0


In [38]:
# CRITICAL: Group transaction data by Order ID before comparison
print("⚠️  CRITICAL: Grouping transaction data by Order ID...")
print("   (Transaction report may have multiple line items per order)")

# Group by Order ID and aggregate
trans_aug_grouped = trans_aug.groupby('Order ID').agg({
    'Amount': 'sum',
    'Transaction Date': 'first',
    'Location': 'first', 
    'Payment Type': 'first',
    'Payment Gateway': 'first'
}).reset_index()

print(f"✅ Grouped Transaction records: {len(trans_aug_grouped)}")
print(f"   Original records: {len(trans_aug)}")
print(f"   Unique orders: {len(trans_aug_grouped)}")

# Show sample of grouped data
print("\n📊 Sample of grouped transaction data:")
print(trans_aug_grouped.head())


⚠️  CRITICAL: Grouping transaction data by Order ID...
   (Transaction report may have multiple line items per order)
✅ Grouped Transaction records: 872
   Original records: 880
   Unique orders: 872

📊 Sample of grouped transaction data:
        Order ID  Amount Transaction Date        Location  Payment Type  \
0  1754049486760   29.63       2025-08-01  East Nashville   virtualCard   
1  1754052982142   29.63       2025-08-01  East Nashville   virtualCard   
2  1754054358168   29.63       2025-08-01  East Nashville   virtualCard   
3  1754055269284   16.46       2025-08-01  East Nashville  physicalCard   
4  1754055670784    6.64       2025-08-01  East Nashville  physicalCard   

  Payment Gateway  
0       authorize  
1       authorize  
2       authorize  
3          square  
4          square  


In [39]:
# Find missing records in Tax Report
print("🔍 FINDING MISSING RECORDS IN TAX REPORT...")

# Get Order IDs from both datasets
trans_order_ids = set(trans_aug_grouped['Order ID'])
tax_order_ids = set(tax_aug['Order ID'])

# Find missing in tax report
missing_in_tax = trans_aug_grouped[~trans_aug_grouped['Order ID'].isin(tax_order_ids)].copy()

print(f"📊 MISSING RECORDS ANALYSIS:")
print(f"   • Transaction Orders: {len(trans_order_ids)}")
print(f"   • Tax Report Orders: {len(tax_order_ids)}")
print(f"   • Missing in Tax Report: {len(missing_in_tax)} orders")

if len(missing_in_tax) > 0:
    print(f"   • Missing Amount: ${missing_in_tax['Amount'].sum():,.2f}")
    print(f"\n📋 MISSING ORDER IDs:")
    for i, order_id in enumerate(missing_in_tax['Order ID'], 1):
        amount = missing_in_tax[missing_in_tax['Order ID'] == order_id]['Amount'].iloc[0]
        print(f"   {i}. Order ID: {order_id} - Amount: ${amount:,.2f}")
else:
    print("   ✅ No missing records found!")


🔍 FINDING MISSING RECORDS IN TAX REPORT...
📊 MISSING RECORDS ANALYSIS:
   • Transaction Orders: 872
   • Tax Report Orders: 0
   • Missing in Tax Report: 872 orders
   • Missing Amount: $20,918.64

📋 MISSING ORDER IDs:
   1. Order ID: 1754049486760 - Amount: $29.63
   2. Order ID: 1754052982142 - Amount: $29.63
   3. Order ID: 1754054358168 - Amount: $29.63
   4. Order ID: 1754055269284 - Amount: $16.46
   5. Order ID: 1754055670784 - Amount: $6.64
   6. Order ID: 1754056188512 - Amount: $6.34
   7. Order ID: 1754057020517 - Amount: $7.25
   8. Order ID: 1754057965319 - Amount: $39.53
   9. Order ID: 1754059781644 - Amount: $29.63
   10. Order ID: 1754060825701 - Amount: $0.00
   11. Order ID: 1754061063929 - Amount: $2.20
   12. Order ID: 1754061260315 - Amount: $40.45
   13. Order ID: 1754061923572 - Amount: $0.00
   14. Order ID: 1754062515441 - Amount: $15.46
   15. Order ID: 1754062989793 - Amount: $29.63
   16. Order ID: 1754063239119 - Amount: $6.62
   17. Order ID: 175406389627

In [40]:
# Find amount differences between matching orders
print("\n💰 FINDING AMOUNT DIFFERENCES...")

# Merge the datasets on Order ID
merged_comparison = pd.merge(
    trans_aug_grouped[['Order ID', 'Amount']], 
    tax_aug[['Order ID', 'Total Sum']], 
    on='Order ID', 
    how='inner'
)

# Calculate differences
merged_comparison['Amount_Diff'] = merged_comparison['Amount'] - merged_comparison['Total Sum']
merged_comparison['Abs_Diff'] = abs(merged_comparison['Amount_Diff'])

# Find significant differences (more than $0.01)
significant_mismatches = merged_comparison[merged_comparison['Abs_Diff'] > 0.01].copy()

print(f"📊 AMOUNT DIFFERENCES ANALYSIS:")
print(f"   • Matching Orders: {len(merged_comparison)}")
print(f"   • Orders with Differences: {len(significant_mismatches)}")
print(f"   • Total Difference: ${merged_comparison['Amount_Diff'].sum():,.2f}")

if len(significant_mismatches) > 0:
    print(f"\n📋 ORDERS WITH AMOUNT DIFFERENCES:")
    significant_mismatches_sorted = significant_mismatches.sort_values('Abs_Diff', ascending=False)
    for i, (_, row) in enumerate(significant_mismatches_sorted.iterrows(), 1):
        print(f"   {i}. Order ID: {row['Order ID']}")
        print(f"      Transaction: ${row['Amount']:,.2f} | Tax Report: ${row['Total Sum']:,.2f} | Diff: ${row['Amount_Diff']:+,.2f}")
else:
    print("   ✅ No significant amount differences found!")



💰 FINDING AMOUNT DIFFERENCES...
📊 AMOUNT DIFFERENCES ANALYSIS:
   • Matching Orders: 0
   • Orders with Differences: 0
   • Total Difference: $0.00
   ✅ No significant amount differences found!


In [41]:
# COMPREHENSIVE SUMMARY REPORT
print("\n" + "="*80)
print("📊 COMPREHENSIVE COMPARISON SUMMARY")
print("="*80)

print(f"\n💰 TOTAL AMOUNTS:")
print(f"   • Total Transaction Amount: ${trans_aug_grouped['Amount'].sum():,.2f}")
print(f"   • Total Tax Report Amount: ${tax_aug['Total Sum'].sum():,.2f}")
print(f"   • Total Tips in Tax Report: ${tax_aug['Tip'].sum():,.2f}")
print(f"   • Total Taxes in Tax Report: ${tax_aug['Tax'].sum():,.2f}")

print(f"\n📊 RECORD COUNTS:")
print(f"   • Transaction Records (Original): {len(trans_aug)}")
print(f"   • Transaction Records (Grouped): {len(trans_aug_grouped)}")
print(f"   • Tax Report Records: {len(tax_aug)}")
print(f"   • Matching Orders: {len(merged_comparison)}")

print(f"\n🔍 DISCREPANCY ANALYSIS:")
print(f"   • Missing in Tax Report: {len(missing_in_tax)} orders")
print(f"   • Amount Differences: {len(significant_mismatches)} orders")
print(f"   • Perfect Matches: {len(merged_comparison) - len(significant_mismatches)} orders")

print(f"\n📈 FINANCIAL IMPACT:")
print(f"   • Missing Records Value: ${missing_in_tax['Amount'].sum():,.2f}")
print(f"   • Amount Differences Total: ${merged_comparison['Amount_Diff'].sum():,.2f}")
print(f"   • Net Difference (Trans - Tax): ${trans_aug_grouped['Amount'].sum() - tax_aug['Total Sum'].sum():,.2f}")

print("\n" + "="*80)
print("✅ Analysis Complete!")



📊 COMPREHENSIVE COMPARISON SUMMARY

💰 TOTAL AMOUNTS:
   • Total Transaction Amount: $20,918.64
   • Total Tax Report Amount: $0.00
   • Total Tips in Tax Report: $0.00
   • Total Taxes in Tax Report: $0.00

📊 RECORD COUNTS:
   • Transaction Records (Original): 880
   • Transaction Records (Grouped): 872
   • Tax Report Records: 0
   • Matching Orders: 0

🔍 DISCREPANCY ANALYSIS:
   • Missing in Tax Report: 872 orders
   • Amount Differences: 0 orders
   • Perfect Matches: 0 orders

📈 FINANCIAL IMPACT:
   • Missing Records Value: $20,918.64
   • Amount Differences Total: $0.00
   • Net Difference (Trans - Tax): $20,918.64

✅ Analysis Complete!


In [42]:
# Import required libraries
import pandas as pd
import numpy as np

print("📊 Libraries imported successfully!")

# Load and examine data files to understand structure
trans_file = "/Users/vijayaraghavandevaraj/Downloads/Wood - TransReport.xlsx"
tax_file = "/Users/vijayaraghavandevaraj/Downloads/Wood - Tax Report.xlsx"

print("📁 Loading Excel files...")
WoodTrans = pd.read_excel(trans_file)
WoodTax = pd.read_excel(tax_file)

print(f"✅ Transaction Report loaded: {len(WoodTrans)} records")
print(f"✅ Tax Report loaded: {len(WoodTax)} records")

# Examine column names to understand the structure
print("\n📋 TRANSACTION REPORT COLUMNS:")
print(WoodTrans.columns.tolist())
print("\n📋 TAX REPORT COLUMNS:")
print(WoodTax.columns.tolist())

# Show sample data
print("\n📊 TRANSACTION REPORT SAMPLE:")
print(WoodTrans.head())
print("\n📊 TAX REPORT SAMPLE:")
print(WoodTax.head())


📊 Libraries imported successfully!
📁 Loading Excel files...
✅ Transaction Report loaded: 1836 records
✅ Tax Report loaded: 717 records

📋 TRANSACTION REPORT COLUMNS:
['Location', 'Order ID', 'Transaction Date', 'Amount', 'Source', 'Payment Type', 'Payment Gateway', 'Transaction Type', 'Card Type', 'Currency', 'Payment ID', 'Type']

📋 TAX REPORT COLUMNS:
['Order ID', 'Module Name', 'Location', 'Month', 'Date', 'Year', 'Order Status', 'Payment Status', 'Tip', 'Tax', 'Total Sum', 'Total Sum.1']

📊 TRANSACTION REPORT SAMPLE:
         Location       Order ID Transaction Date  Amount Source  \
0  East Nashville  1757632565342       2025-09-11    2.20    pos   
1  East Nashville  1757631005016       2025-09-11    3.29    pos   
2  East Nashville  1757630762882       2025-09-11    4.39    pos   
3  East Nashville  1757630406239       2025-09-11   12.07    pos   
4  East Nashville  1757628587222       2025-09-11   24.15    pos   

   Payment Type Payment Gateway Transaction Type Card Type Curre

In [43]:
# Load the Excel files
trans_file = "/Users/vijayaraghavandevaraj/Downloads/Wood - TransReport.xlsx"
tax_file = "/Users/vijayaraghavandevaraj/Downloads/Wood - Tax Report.xlsx"

print("📁 Loading Excel files...")
WoodTrans = pd.read_excel(trans_file)
WoodTax = pd.read_excel(tax_file)

print(f"✅ Transaction Report loaded: {len(WoodTrans)} records")
print(f"✅ Tax Report loaded: {len(WoodTax)} records")


📁 Loading Excel files...
✅ Transaction Report loaded: 1836 records
✅ Tax Report loaded: 717 records


In [44]:
# Display column information and first few rows
print("📋 TRANSACTION REPORT COLUMNS:")
print(WoodTrans.columns.tolist())
print(f"\n📋 TAX REPORT COLUMNS:")
print(WoodTax.columns.tolist())

print("\n📊 TRANSACTION REPORT SAMPLE:")
print(WoodTrans.head())

print("\n📊 TAX REPORT SAMPLE:")
print(WoodTax.head())


📋 TRANSACTION REPORT COLUMNS:
['Location', 'Order ID', 'Transaction Date', 'Amount', 'Source', 'Payment Type', 'Payment Gateway', 'Transaction Type', 'Card Type', 'Currency', 'Payment ID', 'Type']

📋 TAX REPORT COLUMNS:
['Order ID', 'Module Name', 'Location', 'Month', 'Date', 'Year', 'Order Status', 'Payment Status', 'Tip', 'Tax', 'Total Sum', 'Total Sum.1']

📊 TRANSACTION REPORT SAMPLE:
         Location       Order ID Transaction Date  Amount Source  \
0  East Nashville  1757632565342       2025-09-11    2.20    pos   
1  East Nashville  1757631005016       2025-09-11    3.29    pos   
2  East Nashville  1757630762882       2025-09-11    4.39    pos   
3  East Nashville  1757630406239       2025-09-11   12.07    pos   
4  East Nashville  1757628587222       2025-09-11   24.15    pos   

   Payment Type Payment Gateway Transaction Type Card Type Currency  \
0  physicalCard          square             sale       NaN      USD   
1  physicalCard          square             sale       NaN

In [46]:
# Filter and prepare data for August 2025
print("🔍 Filtering data for August 2025...")

# Filter transaction data for POS module
trans_pos_module_df = WoodTrans[WoodTrans['Module'] == 'booking'].copy()
print(f"✅ POS Transaction records: {len(trans_pos_module_df)}")

# Filter tax data for POS module  
tax_pos_df = WoodTax[WoodTax['Module'] == 'booking'].copy()
print(f"✅ POS Tax records: {len(tax_pos_df)}")

# Convert date columns
trans_pos_module_df['Transaction Date'] = pd.to_datetime(trans_pos_module_df['Transaction Date'])
tax_pos_df['Date'] = pd.to_datetime(tax_pos_df['Date'])

# Create month column
trans_pos_module_df['Month'] = trans_pos_module_df['Transaction Date'].dt.to_period('M')
tax_pos_df['Month'] = tax_pos_df['Date'].dt.to_period('M')

# Filter for August 2025
target_month = "2025-08"
trans_aug = trans_pos_module_df[trans_pos_module_df['Month'] == target_month].copy()
tax_aug = tax_pos_df[tax_pos_df['Month'] == target_month].copy()

print(f"✅ August 2025 Transaction records: {len(trans_aug)}")
print(f"✅ August 2025 Tax records: {len(tax_aug)}")


🔍 Filtering data for August 2025...


KeyError: 'Module'

In [None]:
# CRITICAL STEP: Group transaction data by Order ID before comparison
print("⚠️  CRITICAL: Grouping transaction data by Order ID...")
print("   (Transaction report may have multiple line items per order)")

# Group by Order ID and sum amounts
trans_aug_grouped = trans_aug.groupby('Order ID').agg({
    'Amount': 'sum',
    'Transaction Date': 'first',
    'Location': 'first', 
    'Payment Type': 'first',
    'Payment Gateway': 'first'
}).reset_index()

print(f"✅ Grouped Transaction records: {len(trans_aug_grouped)}")
print(f"   Original records: {len(trans_aug)}")
print(f"   Unique orders: {len(trans_aug_grouped)}")

# Show sample of grouped data
print("\n📊 Sample of grouped transaction data:")
print(trans_aug_grouped.head())


In [None]:
# Find missing records in Tax Report
print("🔍 FINDING MISSING RECORDS IN TAX REPORT...")

# Get Order IDs from both datasets
trans_order_ids = set(trans_aug_grouped['Order ID'])
tax_order_ids = set(tax_aug['Order ID'])

# Find missing in tax report
missing_in_tax = trans_aug_grouped[~trans_aug_grouped['Order ID'].isin(tax_order_ids)].copy()

print(f"📊 MISSING RECORDS ANALYSIS:")
print(f"   • Transaction Orders: {len(trans_order_ids)}")
print(f"   • Tax Report Orders: {len(tax_order_ids)}")
print(f"   • Missing in Tax Report: {len(missing_in_tax)} orders")

if len(missing_in_tax) > 0:
    print(f"   • Missing Amount: ${missing_in_tax['Amount'].sum():,.2f}")
    print(f"\n📋 MISSING ORDER IDs:")
    for i, order_id in enumerate(missing_in_tax['Order ID'], 1):
        amount = missing_in_tax[missing_in_tax['Order ID'] == order_id]['Amount'].iloc[0]
        print(f"   {i}. Order ID: {order_id} - Amount: ${amount:,.2f}")
else:
    print("   ✅ No missing records found!")


In [None]:
# Find amount differences between matching orders
print("\n💰 FINDING AMOUNT DIFFERENCES...")

# Merge the datasets on Order ID
merged_comparison = pd.merge(
    trans_aug_grouped[['Order ID', 'Amount']], 
    tax_aug[['Order ID', 'Total Sum']], 
    on='Order ID', 
    how='inner'
)

# Calculate differences
merged_comparison['Amount_Diff'] = merged_comparison['Amount'] - merged_comparison['Total Sum']
merged_comparison['Abs_Diff'] = abs(merged_comparison['Amount_Diff'])

# Find significant differences (more than $0.01)
significant_mismatches = merged_comparison[merged_comparison['Abs_Diff'] > 0.01].copy()

print(f"📊 AMOUNT DIFFERENCES ANALYSIS:")
print(f"   • Matching Orders: {len(merged_comparison)}")
print(f"   • Orders with Differences: {len(significant_mismatches)}")
print(f"   • Total Difference: ${merged_comparison['Amount_Diff'].sum():,.2f}")

if len(significant_mismatches) > 0:
    print(f"\n📋 ORDERS WITH AMOUNT DIFFERENCES:")
    significant_mismatches_sorted = significant_mismatches.sort_values('Abs_Diff', ascending=False)
    for i, (_, row) in enumerate(significant_mismatches_sorted.iterrows(), 1):
        print(f"   {i}. Order ID: {row['Order ID']}")
        print(f"      Transaction: ${row['Amount']:,.2f} | Tax Report: ${row['Total Sum']:,.2f} | Diff: ${row['Amount_Diff']:+,.2f}")
else:
    print("   ✅ No significant amount differences found!")


In [None]:
# COMPREHENSIVE SUMMARY REPORT
print("\n" + "="*80)
print("📊 COMPREHENSIVE COMPARISON SUMMARY")
print("="*80)

print(f"\n💰 TOTAL AMOUNTS:")
print(f"   • Total Transaction Amount: ${trans_aug_grouped['Amount'].sum():,.2f}")
print(f"   • Total Tax Report Amount: ${tax_aug['Total Sum'].sum():,.2f}")
print(f"   • Total Tips in Tax Report: ${tax_aug['Tip'].sum():,.2f}")
print(f"   • Total Taxes in Tax Report: ${tax_aug['Tax'].sum():,.2f}")

print(f"\n📊 RECORD COUNTS:")
print(f"   • Transaction Records (Original): {len(trans_aug)}")
print(f"   • Transaction Records (Grouped): {len(trans_aug_grouped)}")
print(f"   • Tax Report Records: {len(tax_aug)}")
print(f"   • Matching Orders: {len(merged_comparison)}")

print(f"\n🔍 DISCREPANCY ANALYSIS:")
print(f"   • Missing in Tax Report: {len(missing_in_tax)} orders")
print(f"   • Amount Differences: {len(significant_mismatches)} orders")
print(f"   • Perfect Matches: {len(merged_comparison) - len(significant_mismatches)} orders")

print(f"\n📈 FINANCIAL IMPACT:")
print(f"   • Missing Records Value: ${missing_in_tax['Amount'].sum():,.2f}")
print(f"   • Amount Differences Total: ${merged_comparison['Amount_Diff'].sum():,.2f}")
print(f"   • Net Difference (Trans - Tax): ${trans_aug_grouped['Amount'].sum() - tax_aug['Total Sum'].sum():,.2f}")

print("\n" + "="*80)
print("✅ Analysis Complete!")
