In [None]:
# ============================================================================
# JPEG2000 Batch Decode Report
# ============================================================================
# This notebook performs batch decoding of JPEG2000 images and generates
# a comprehensive report comparing jpegexp-rs with standard decoders.
# ============================================================================

# Install pandas if not available (required for report generation)
try:
    import pandas as pd
except ImportError:
    import sys
    !{sys.executable} -m pip install pandas
    import pandas as pd
print('pandas version:', pd.__version__)

# JPEG2000 Batch Decode Report

## Overview
This notebook performs batch decoding of all JPEG2000 images in the test directory and generates a comprehensive report comparing:
- **jpegexp-rs**: Our Rust-based decoder (via CLI binary)
- **imagecodecs/OpenJPEG**: Standard Python decoder library

## Purpose
- Test decoder compatibility across a large set of JPEG2000 files
- Identify files that cause errors or panics
- Compare success rates between decoders
- Generate a detailed report table for analysis

## Methodology
1. Recursively scan `tests/test_images/JPEG2000` directory
2. Attempt to decode each file with both decoders
3. Record success/failure status and error messages
4. Generate a pandas DataFrame with results
5. Display results in an HTML table

## Output
A comprehensive table showing:
- File name
- jpegexp-rs decode status (OK, ERROR, PANIC, EXCEPTION)
- imagecodecs decode status (OK, ERROR)
- Error messages (when applicable)

In [None]:
# Standard library imports
import os
import subprocess

# Third-party imports
import imagecodecs  # Provides OpenJPEG decoder via imagecodecs.jpeg2k_decode()
import pandas as pd  # For data analysis and report generation
from IPython.display import display, HTML  # For formatted table display

# ============================================================================
# Configuration
# ============================================================================

# Absolute path to test images directory
# NOTE: Update this path if your test images are located elsewhere
TEST_IMAGES_DIR = os.path.abspath(os.path.join("..", "tests", "test_images", "JPEG2000"))

# Path to jpegexp-rs binary (adjust based on build configuration)
# The binary should be built with: cargo build --release
if os.name == "nt":
    bin_path = os.path.abspath(os.path.join("..", "target", "release", "jpegexp.exe"))
else:
    bin_path = os.path.abspath(os.path.join("..", "target", "release", "jpegexp"))
JPEGEXP_BIN = bin_path

# Display configuration
print(f"Test images directory: {TEST_IMAGES_DIR}")
print(f"jpegexp-rs binary: {JPEGEXP_BIN}")
print(f"Binary exists: {os.path.exists(JPEGEXP_BIN)}")

# Recursively find all image files in the test directory
# This includes files in subdirectories
image_files = []
for root, dirs, files in os.walk(TEST_IMAGES_DIR):
    for f in files:
        image_files.append(os.path.join(root, f))

print(f"Total files found (recursive): {len(image_files)}")

# ============================================================================
# Batch Decoding and Report Generation
# ============================================================================

results = []

# Validate prerequisites
if not os.path.exists(JPEGEXP_BIN):
    print(f"ERROR: Binary not found at {JPEGEXP_BIN}.")
    print("Please build it with: cargo build --release")
elif not image_files:
    print(f"No images found in {TEST_IMAGES_DIR}")
    print(f"Absolute path: {os.path.abspath(TEST_IMAGES_DIR)}")
else:
    print(f"Processing {len(image_files)} files from {TEST_IMAGES_DIR}")
    print("This may take a while...")

    # Process each image file
    for img_path in image_files:
        # Get relative filename for display
        fname = os.path.relpath(img_path, TEST_IMAGES_DIR)
        entry = {"file": fname}

        # ====================================================================
        # Decode with jpegexp-rs (via CLI)
        # ====================================================================
        try:
            # Output raw file for decoded pixels
            out_raw = f"{img_path}.jpegexp.raw"
            cmd = [JPEGEXP_BIN, "decode", "-i", img_path, "-o", out_raw, "-f", "raw"]

            # Run with timeout to prevent hanging
            proc = subprocess.run(cmd, capture_output=True, text=True, timeout=10)

            # Check for panics (Rust panics are critical errors)
            if proc.returncode != 0 and 'panicked' in proc.stderr:
                entry["jpegexp_rs"] = f"PANIC: {proc.stderr.strip()}"
            else:
                entry["jpegexp_rs"] = "OK" if proc.returncode == 0 else f"ERROR: {proc.stderr.strip()}"
        except subprocess.TimeoutExpired:
            entry["jpegexp_rs"] = "TIMEOUT: Decoding took too long (>10s)"
        except Exception as e:
            entry["jpegexp_rs"] = f"EXCEPTION: {e}"

        # ====================================================================
        # Decode with imagecodecs (OpenJPEG reference implementation)
        # ====================================================================
        try:
            with open(img_path, "rb") as f:
                arr = imagecodecs.jpeg2k_decode(f.read())
            entry["imagecodecs"] = "OK"
        except Exception as e:
            entry["imagecodecs"] = f"ERROR: {e}"

        results.append(entry)
    # ====================================================================
    # Generate and Display Report
    # ====================================================================
    df = pd.DataFrame(results)

    # Configure pandas display options for full visibility
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_colwidth', None)

    # Display as HTML table for better readability
    display(HTML(df.to_html(index=False)))

    # Print summary statistics
    print("\n" + "="*60)
    print("SUMMARY STATISTICS")
    print("="*60)
    total = len(df)
    jpegexp_ok = len(df[df['jpegexp_rs'] == 'OK'])
    imagecodecs_ok = len(df[df['imagecodecs'] == 'OK'])
    print(f"Total files processed: {total}")
    print(f"jpegexp-rs success rate: {jpegexp_ok}/{total} ({100*jpegexp_ok/total:.1f}%)")
    print(f"imagecodecs success rate: {imagecodecs_ok}/{total} ({100*imagecodecs_ok/total:.1f}%)")

## Report Interpretation

### Column Descriptions
- **file**: Relative path to the JPEG2000 image file
- **jpegexp_rs**: Decode status from jpegexp-rs decoder
  - `OK`: Successfully decoded
  - `ERROR: <message>`: Decode failed with error message
  - `PANIC: <message>`: Rust panic occurred (critical error)
  - `EXCEPTION: <message>`: Python exception during execution
  - `TIMEOUT`: Decoding exceeded 10 second timeout
- **imagecodecs**: Decode status from imagecodecs/OpenJPEG decoder
  - `OK`: Successfully decoded
  - `ERROR: <message>`: Decode failed with error message

### Analysis Tips
1. **Compare success rates**: Check which decoder handles more files successfully
2. **Identify problematic files**: Files that fail in both decoders may be corrupt
3. **Find jpegexp-rs specific issues**: Files that decode with imagecodecs but fail with jpegexp-rs
4. **Check for panics**: PANIC entries indicate critical bugs that need investigation
5. **Review error messages**: Error messages can provide clues about format compatibility issues

### Next Steps
- Investigate files with PANIC or ERROR status
- Compare error messages to identify patterns
- Test edge cases with files that have known issues