In [None]:
# Cell 1: Imports and Configuration
from pathlib import Path
import pandas as pd
from PIL import Image
from IPython.display import display
from rich import print as rprint
from rich.console import Console
from rich.table import Table

# Import evaluation system and document-aware field loader
from common.simple_model_evaluator import SimpleModelEvaluator
from common.batch_processor import load_document_field_definitions

console = Console()

# Base directories
BASE_DIR = Path('/home/jovyan/nfs_share/tod')
CSV_PATH = BASE_DIR / "evaluation_data" / "ground_truth.csv"
UPDATES_CSV_PATH = BASE_DIR / "evaluation_data" / "ground_truth_updates.csv"
IMAGE_DIR = BASE_DIR / "evaluation_data"
OUTPUT_DIR = BASE_DIR / "output" / "csv"

# Initialize evaluator
evaluator = SimpleModelEvaluator()

# Load document-aware field definitions
doc_field_definitions = load_document_field_definitions()

print(f"Ground Truth CSV: {CSV_PATH}")
print(f"Updates CSV: {UPDATES_CSV_PATH}")
print(f"Image Directory: {IMAGE_DIR}")
print(f"Output Directory: {OUTPUT_DIR}")
print(f"Ground truth exists: {CSV_PATH.exists()}")
print(f"Updates file exists: {UPDATES_CSV_PATH.exists()}")
print(f"Image dir exists: {IMAGE_DIR.exists()}")
print(f"Output dir exists: {OUTPUT_DIR.exists()}")
print("✅ Evaluation system loaded")
print(f"✅ Document-aware field definitions loaded:")
print(f"   - Invoice: {len(doc_field_definitions['invoice'])} fields")
print(f"   - Receipt: {len(doc_field_definitions['receipt'])} fields")
print(f"   - Bank Statement: {len(doc_field_definitions['bank_statement'])} fields")

In [2]:
# Cell 2: Load Model Results (Optional - for comparison)
# Find available model result files
model_files = list(OUTPUT_DIR.glob("*batch_results*.csv"))

if model_files:
    print("\nAvailable model result files:")
    for i, f in enumerate(model_files, 1):
        print(f"{i}. {f.name}")
    
    # Select which model results to load (default: most recent)
    MODEL_RESULTS_FILE = max(model_files, key=lambda x: x.stat().st_mtime)  # Most recent
    print(f"\nUsing: {MODEL_RESULTS_FILE.name}")
    
    # Load model results
    model_df = pd.read_csv(MODEL_RESULTS_FILE)
    print(f"Loaded {len(model_df)} model predictions")
else:
    print("⚠️ No model result files found in output/csv/")
    print("Comparison feature will be disabled.")
    model_df = None


Available model result files:
1. internvl3_non_quantized_batch_results_20250930_234054.csv
2. internvl3_batch_results_20250930_234346.csv
3. llama_batch_results_20250930_235426.csv
4. internvl3_non_quantized_batch_results_20251001_014926.csv
5. llama_batch_results_20251001_025145.csv
6. llama_batch_results_20251001_030022.csv
7. internvl3_non_quantized_batch_results_20251001_054744.csv
8. internvl3_non_quantized_batch_results_20251005_233614.csv
9. internvl3_batch_results_20251005_234335.csv
10. internvl3_non_quantized_batch_results_20251006_200849.csv
11. internvl3_non_quantized_batch_results_20251006_213136.csv

Using: internvl3_non_quantized_batch_results_20251006_213136.csv
Loaded 9 model predictions


In [3]:
# Cell 3: Define Image Name
# Change this to the image you want to inspect/edit
IMAGE_NAME = 'image_003.png'

image_path = IMAGE_DIR / IMAGE_NAME
print(f"Working with: {IMAGE_NAME}")
print(f"Image exists: {image_path.exists()}")

Working with: image_003.png
Image exists: True


In [4]:
# Cell 4: Load CSV and Compare with Model Predictions (Using Document-Aware Evaluation)
gt_df = pd.read_csv(CSV_PATH)

# Find the row for this image
gt_row = gt_df[gt_df['image_file'] == IMAGE_NAME]

if gt_row.empty:
    print(f"⚠️ No ground truth row found for {IMAGE_NAME}")
    print("\nAvailable images (first 10):")
    print(gt_df['image_file'].head(10).tolist())
else:
    # Get document type from ground truth or infer from DOCUMENT_TYPE field
    if 'document_type' in gt_row.columns:
        document_type = str(gt_row['document_type'].iloc[0]).upper()
    elif 'DOCUMENT_TYPE' in gt_row.columns:
        document_type = str(gt_row['DOCUMENT_TYPE'].iloc[0]).upper()
    else:
        # Can't determine document type - use all fields
        console.print(f"[yellow]⚠️ No document_type column found in ground truth CSV[/yellow]")
        console.print(f"[yellow]⚠️ Available columns: {list(gt_row.columns)}[/yellow]\n")
        skip_fields = {'image_name', 'image_file', 'processing_time', 
                       'field_count', 'found_fields', 'field_coverage', 'prompt_used', 
                       'timestamp', 'overall_accuracy', 'fields_extracted', 'fields_matched',
                       'total_fields', 'inference_only'}
        extraction_fields = [col for col in gt_df.columns if col not in skip_fields]
        document_type = "UNKNOWN"
    
    # Get document-aware field list if we have a valid document type
    if document_type != "UNKNOWN":
        doc_type_lower = document_type.lower()
        if doc_type_lower in doc_field_definitions:
            extraction_fields = doc_field_definitions[doc_type_lower]
            console.print(f"[cyan]📋 Document Type: {document_type}[/cyan]")
            console.print(f"[cyan]📊 Evaluating {len(extraction_fields)} {doc_type_lower}-specific fields[/cyan]\n")
        else:
            # Fallback to all fields if document type not recognized
            skip_fields = {'image_name', 'image_file', 'document_type', 'DOCUMENT_TYPE', 'processing_time', 
                           'field_count', 'found_fields', 'field_coverage', 'prompt_used', 
                           'timestamp', 'overall_accuracy', 'fields_extracted', 'fields_matched',
                           'total_fields', 'inference_only'}
            extraction_fields = [col for col in gt_df.columns if col not in skip_fields]
            console.print(f"[yellow]⚠️ Unknown document type: {document_type}, using all fields[/yellow]\n")
    
    # Try to find model prediction
    model_row = None
    if model_df is not None:
        # Try exact match first
        model_row = model_df[model_df['image_name'] == IMAGE_NAME]
        
        # Try without extension if exact match fails
        if model_row.empty:
            image_stem = Path(IMAGE_NAME).stem
            model_row = model_df[model_df['image_name'].str.contains(image_stem, na=False)]
        
        if model_row.empty:
            print(f"⚠️ No model prediction found for {IMAGE_NAME}")
            model_row = None
        else:
            model_row = model_row.iloc[0]  # Get first row as Series
    
    if model_row is not None:
        # Use the evaluation system for proper comparison
        console.rule("[bold blue]Field-by-Field Comparison (Document-Aware Evaluation)[/bold blue]")
        
        # Prepare ground truth dict - ONLY document-aware fields
        ground_truth = {field: str(gt_row[field].iloc[0]) for field in extraction_fields if field in gt_row.columns}
        
        # Prepare extracted data dict (from model) - ONLY document-aware fields
        extracted_data = {field: str(model_row[field]) if field in model_row.index else "NOT_FOUND" 
                         for field in extraction_fields}
        
        # Run evaluation
        eval_result = evaluator.evaluate_extraction(extracted_data, ground_truth, IMAGE_NAME)
        
        # Display comparison table with evaluation results
        table = Table(show_header=True, header_style="bold magenta")
        table.add_column("Field", style="cyan", width=30)
        table.add_column("Ground Truth", style="green", width=35)
        table.add_column("Model Prediction", style="yellow", width=35)
        table.add_column("Match", style="white", width=8)
        
        # Add rows with evaluation results - ONLY document-aware fields
        for field in extraction_fields:
            if field not in gt_row.columns:
                continue
                
            gt_value = str(gt_row[field].iloc[0])[:33]  # Truncate for display
            model_value = str(model_row[field])[:33] if field in model_row.index else "NOT_FOUND"
            
            # Determine match status by checking if field is in correct/incorrect/missing lists
            if field in eval_result.missing_fields:
                match_symbol = "❌ 0.0"
            elif field in eval_result.incorrect_fields:
                # Use evaluator's _values_match to check if it's a fuzzy match
                full_gt = str(gt_row[field].iloc[0])
                full_extracted = str(model_row[field]) if field in model_row.index else "NOT_FOUND"
                is_match = evaluator._values_match(full_extracted, full_gt)
                match_symbol = "≈ 0.8" if is_match else "❌ 0.0"
            else:
                # Field is in correct list (exact match)
                match_symbol = "✅ 1.0"
            
            table.add_row(field, gt_value, model_value, match_symbol)
        
        console.print(table)
        
        # Display evaluation summary
        accuracy_pct = eval_result.accuracy * 100
        console.print(f"\n[bold]Overall Accuracy: {accuracy_pct:.1f}%[/bold]")
        console.print(f"Correct Fields: {eval_result.correct_fields}/{eval_result.total_fields}")
        console.print(f"Missing Fields: {len(eval_result.missing_fields)}")
        console.print(f"Incorrect Fields: {len(eval_result.incorrect_fields)}")
        
        console.print("\n[dim]Legend: ✅ Exact match (1.0) | ≈ Fuzzy match (≥0.8) | ❌ No match (<0.8)[/dim]")
        console.rule()
    else:
        # No model predictions available - just show ground truth
        console.rule("[bold blue]Ground Truth Data (Document-Aware Fields)[/bold blue]")
        print(f"\nGround truth for {IMAGE_NAME} ({document_type}):")
        print("=" * 80)
        for field in extraction_fields:
            if field in gt_row.columns:
                value = gt_row[field].iloc[0]
                print(f"{field:30s}: {value}")
        print("=" * 80)

In [None]:
# Cell 5: Edit Row Data
# Modify the values below based on your inspection of the image

# Get the row index
row_idx = gt_df[gt_df['image_file'] == IMAGE_NAME].index[0]

# Mark that this row has been updated (for tracking)
row_updated = False

# Uncomment and edit the fields you want to update:
# gt_df.at[row_idx, 'SUPPLIER_NAME'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'BUSINESS_ADDRESS'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'BUSINESS_ABN'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'INVOICE_DATE'] = '24/08/2022'
# row_updated = True

# gt_df.at[row_idx, 'PAYER_NAME'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'PAYER_ADDRESS'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'STATEMENT_DATE_RANGE'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'GST_AMOUNT'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'IS_GST_INCLUDED'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'TOTAL_AMOUNT'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'LINE_ITEM_DESCRIPTIONS'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'TRANSACTION_DATES'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'LINE_ITEM_PRICES'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'LINE_ITEM_QUANTITIES'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'LINE_ITEM_TOTAL_PRICES'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'TRANSACTION_AMOUNTS_PAID'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'TRANSACTION_AMOUNTS_RECEIVED'] = 'New Value'
# row_updated = True

# gt_df.at[row_idx, 'ACCOUNT_BALANCE'] = 'New Value'
# row_updated = True

# Display updated row
if row_updated:
    print(f"✅ Updated data for {IMAGE_NAME}:")
    print("=" * 80)
    updated_row = gt_df.loc[row_idx]
    for col in gt_df.columns:
        print(f"{col:30s}: {updated_row[col]}")
    print("=" * 80)
else:
    print(f"⚠️ No changes made to {IMAGE_NAME}")

In [None]:
# Cell 6: Save Updated Row to Updates File
# Save ONLY the updated row to ground_truth_updates.csv

if row_updated:
    # Get the updated row
    updated_row_df = gt_df.loc[[row_idx]]
    
    # Check if updates file exists
    if UPDATES_CSV_PATH.exists():
        # Load existing updates
        existing_updates = pd.read_csv(UPDATES_CSV_PATH)
        
        # Check if this image already has an update entry
        if IMAGE_NAME in existing_updates['image_file'].values:
            # Replace existing update for this image
            existing_updates = existing_updates[existing_updates['image_file'] != IMAGE_NAME]
            updates_df = pd.concat([existing_updates, updated_row_df], ignore_index=True)
            print(f"✅ Replaced existing update for {IMAGE_NAME}")
        else:
            # Append new update
            updates_df = pd.concat([existing_updates, updated_row_df], ignore_index=True)
            print(f"✅ Added new update for {IMAGE_NAME}")
    else:
        # Create new updates file
        updates_df = updated_row_df
        print(f"✅ Created new updates file with {IMAGE_NAME}")
    
    # Save updates file
    updates_df.to_csv(UPDATES_CSV_PATH, index=False)
    print(f"✅ Saved to: {UPDATES_CSV_PATH}")
    print(f"📊 Total updates in file: {len(updates_df)}")
    
    print(f"\n⚠️ Original ground_truth.csv remains UNTOUCHED")
else:
    print(f"⚠️ No updates to save (row_updated = False)")

In [None]:
# Cell 7: Copy Images of Updated Rows to Separate Location
# Copies images corresponding to all rows in ground_truth_updates.csv

import shutil

# Define destination directory for updated images
UPDATED_IMAGES_DIR = BASE_DIR / "evaluation_data" / "updated_images"

if UPDATES_CSV_PATH.exists():
    # Load all updates
    updates_df = pd.read_csv(UPDATES_CSV_PATH)
    
    # Create destination directory if it doesn't exist
    UPDATED_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
    
    print(f"📁 Copying {len(updates_df)} updated images to: {UPDATED_IMAGES_DIR}")
    print("=" * 80)
    
    copied_count = 0
    missing_count = 0
    
    for idx, row in updates_df.iterrows():
        image_file = row['image_file']
        source_path = IMAGE_DIR / image_file
        dest_path = UPDATED_IMAGES_DIR / image_file
        
        if source_path.exists():
            shutil.copy2(source_path, dest_path)
            print(f"✅ Copied: {image_file}")
            copied_count += 1
        else:
            print(f"❌ Missing: {image_file}")
            missing_count += 1
    
    print("=" * 80)
    print(f"📊 Summary:")
    print(f"   - Copied: {copied_count} images")
    print(f"   - Missing: {missing_count} images")
    print(f"   - Destination: {UPDATED_IMAGES_DIR}")
else:
    print(f"⚠️ No updates file found at: {UPDATES_CSV_PATH}")
    print(f"⚠️ Run Cell 6 first to create updates")