In [None]:
#Cell 0
import pandas as pd
from pathlib import Path
import glob
import sys

sys.path.insert(0, str(Path.cwd()))
from common.evaluation_metrics import calculate_field_accuracy
from common.config import get_document_type_fields

print("✅ Imports loaded")

In [None]:
#Cell 1
GROUND_TRUTH_PATH = 'evaluation_data/ground_truth.csv'
llama_files = sorted(glob.glob('output/csv/llama_batch_results_*.csv'))
LLAMA_RESULTS_PATH = llama_files[-1] if llama_files else None

print(f"Ground truth: {GROUND_TRUTH_PATH}")
print(f"Llama results: {LLAMA_RESULTS_PATH}")

In [None]:
#Cell 2
gt = pd.read_csv(GROUND_TRUTH_PATH)
llama = pd.read_csv(LLAMA_RESULTS_PATH)

gt_receipts = gt[gt['DOCUMENT_TYPE'] == 'RECEIPT'].copy()
llama_receipts = llama[llama['document_type'] == 'RECEIPT'].copy()

print(f"Ground truth receipts: {len(gt_receipts)}")
print(f"Llama receipts: {len(llama_receipts)}")

In [None]:
#Cell 3
gt_receipts['image_stem'] = gt_receipts['image_file'].apply(lambda x: Path(x).stem)
llama_receipts['image_stem'] = llama_receipts['image_file'].apply(lambda x: Path(x).stem)

merged = gt_receipts.merge(
    llama_receipts,
    on='image_stem',
    how='inner',
    suffixes=('_gt', '_llama')
)

print(f"Merged receipts: {len(merged)}")

In [None]:
#Cell 4
RECEIPT_FIELDS = [
    'DOCUMENT_TYPE', 'BUSINESS_ABN', 'SUPPLIER_NAME', 'BUSINESS_ADDRESS',
    'PAYER_NAME', 'PAYER_ADDRESS', 'INVOICE_DATE',
    'LINE_ITEM_DESCRIPTIONS', 'LINE_ITEM_QUANTITIES', 'LINE_ITEM_PRICES', 'LINE_ITEM_TOTAL_PRICES',
    'IS_GST_INCLUDED', 'GST_AMOUNT', 'TOTAL_AMOUNT'
]

results = []
for _, row in merged.iterrows():
    image = row['image_stem']
    for field in RECEIPT_FIELDS:
        gt_val = str(row[f'{field}_gt']) if pd.notna(row[f'{field}_gt']) else ''
        llama_val = str(row[field]) if pd.notna(row[field]) else ''
        
        accuracy = calculate_field_accuracy(llama_val, gt_val, field)
        
        results.append({
            'image': image,
            'field': field,
            'ground_truth': gt_val,
            'llama': llama_val,
            'accuracy': accuracy,
            'match': accuracy == 1.0
        })

comparison_df = pd.DataFrame(results)
print(f"✅ Compared {len(comparison_df)} field values")

In [None]:
#Cell 5
overall_accuracy = comparison_df['accuracy'].mean() * 100
exact_matches = comparison_df['match'].sum()
total_fields = len(comparison_df)

print("="*80)
print("RECEIPT EXTRACTION COMPARISON")
print("="*80)
print(f"Images compared: {len(merged)}")
print(f"Overall accuracy: {overall_accuracy:.1f}%")
print(f"Exact matches: {exact_matches}/{total_fields} ({exact_matches/total_fields*100:.1f}%)")

In [None]:
#Cell 6
field_accuracy = comparison_df.groupby('field').agg({
    'accuracy': 'mean',
    'match': 'sum'
}).reset_index()
field_accuracy['total'] = comparison_df.groupby('field').size().values
field_accuracy['accuracy_pct'] = field_accuracy['accuracy'] * 100
field_accuracy = field_accuracy.sort_values('accuracy_pct', ascending=False)

print("\n" + "="*80)
print("PER-FIELD ACCURACY")
print("="*80)
print(field_accuracy[['field', 'match', 'total', 'accuracy_pct']].to_string(index=False))

In [None]:
#Cell 7
mismatches = comparison_df[comparison_df['accuracy'] < 1.0].copy()

if len(mismatches) > 0:
    print("\n" + "="*80)
    print(f"MISMATCHES ({len(mismatches)} fields)")
    print("="*80)
    for _, row in mismatches.iterrows():
        print(f"\n{row['image']} | {row['field']} (accuracy: {row['accuracy']:.2f})")
        print(f"  GT:    {row['ground_truth'][:100]}")
        print(f"  Llama: {row['llama'][:100]}")
else:
    print("\n✅ All fields match perfectly!")

In [None]:
#Cell 8
print("="*80)
print("AVAILABLE RECEIPT IMAGES FOR UPDATE")
print("="*80)
available_images = sorted(merged['image_stem'].unique())
for i, img in enumerate(available_images, 1):
    print(f"{i}. {img}")
print(f"\nTotal: {len(available_images)} receipt images")

In [None]:
#Cell 9
# EDIT THESE LISTS TO SELECT WHAT TO UPDATE

# Select which images to update (use image stems without extension)
selected_images = [
    # 'image_001',
    # 'image_002',
]

# Select which fields to update
# Available fields:
# 'DOCUMENT_TYPE', 'BUSINESS_ABN', 'SUPPLIER_NAME', 'BUSINESS_ADDRESS',
# 'PAYER_NAME', 'PAYER_ADDRESS', 'INVOICE_DATE',
# 'LINE_ITEM_DESCRIPTIONS', 'LINE_ITEM_QUANTITIES', 'LINE_ITEM_PRICES', 'LINE_ITEM_TOTAL_PRICES',
# 'IS_GST_INCLUDED', 'GST_AMOUNT', 'TOTAL_AMOUNT'

selected_fields = [
    # 'TOTAL_AMOUNT',
    # 'GST_AMOUNT',
]

print(f"Selected {len(selected_images)} images: {selected_images}")
print(f"Selected {len(selected_fields)} fields: {selected_fields}")

In [None]:
#Cell 10
gt_updated = gt.copy()
gt_updated['image_stem'] = gt_updated['image_file'].apply(lambda x: Path(x).stem)

changes = []
for img in selected_images:
    for field in selected_fields:
        # Get Llama's prediction for this image/field
        llama_row = merged[merged['image_stem'] == img]
        if len(llama_row) == 0:
            print(f"⚠️  Skipping {img} - not found in merged data")
            continue
        
        llama_value = llama_row.iloc[0][field]
        
        # Get current GT value
        gt_idx = gt_updated[gt_updated['image_stem'] == img].index
        if len(gt_idx) == 0:
            print(f"⚠️  Skipping {img} - not found in ground truth")
            continue
        
        old_value = gt_updated.loc[gt_idx[0], field]
        
        # Update
        gt_updated.loc[gt_idx[0], field] = llama_value
        
        changes.append({
            'image': img,
            'field': field,
            'old_value': old_value,
            'new_value': llama_value
        })

changes_df = pd.DataFrame(changes)
gt_updated = gt_updated.drop(columns=['image_stem'])

print(f"✅ Prepared {len(changes)} field updates")

In [None]:
#Cell 11
if len(changes_df) > 0:
    print("="*80)
    print("PREVIEW OF CHANGES")
    print("="*80)
    pd.set_option('display.max_colwidth', 80)
    print(changes_df.to_string(index=False))
    print(f"\nTotal changes: {len(changes_df)}")
else:
    print("⚠️  No changes to preview. Check your selections in Cell 9.")

In [None]:
#Cell 12
if len(changes_df) > 0:
    output_path = 'evaluation_data/ground_truth_updated.csv'
    gt_updated.to_csv(output_path, index=False)
    
    print("="*80)
    print("GROUND TRUTH UPDATED")
    print("="*80)
    print(f"✅ Saved to: {output_path}")
    print(f"✅ Updated {len(changes_df)} fields across {len(selected_images)} images")
    print(f"\nFields updated: {selected_fields}")
    print(f"Images updated: {selected_images}")
else:
    print("⚠️  No updates to save. Select images and fields in Cell 9.")