In [None]:
# Cell 1
import sys
sys.path.insert(0, '/home/jovyan/_LMM_POC')

from pathlib import Path
import random

import numpy as np
import matplotlib.pyplot as plt

import torch
from PIL import Image
from transformers import AutoProcessor, MllamaForConditionalGeneration

from common.reproducibility import set_seed

set_seed(42)

In [None]:
# Cell 2
from common.llama_model_loader_robust import load_llama_model_robust
from rich import print as rprint

model_path = "/home/jovyan/shared_PTM/Llama-3.2-11B-Vision-Instruct"

rprint("[bold green]Loading Llama model with robust multi-GPU detection...[/bold green]")

model, processor = load_llama_model_robust(
    model_path=model_path,
    use_quantization=False,  # No quantization as requested
    device_map='auto',
    max_new_tokens=2000,
    torch_dtype='bfloat16',
    low_cpu_mem_usage=True,
    verbose=True
)

# Call tie_weights() after loading
try:
    model.tie_weights()
    rprint("[green]✅ Model weights tied successfully[/green]")
except Exception as e:
    rprint(f"[yellow]⚠️ tie_weights() warning (can be ignored): {e}[/yellow]")

rprint("[bold green]✅ Model ready for single-image extraction[/bold green]")

In [None]:
# Cell 3
# imageName = "/home/jovyan/nfs_share/tod/LMM_POC/evaluation_data/image_008.png"
imageName = "/home/jovyan/_LMM_POC/evaluation_data/image_008.png"

print("📂 Loading image...")
image = Image.open(imageName)
print(f"✅ Image loaded: {image.size}")

In [None]:
# Cell 4
# basic flat 5 column ["Date", "Description", "Withdrawal", "Credit", "Balance"] transaction table prompt
# prompt_text = """
# You are an expert document analyzer specializing in bank statement extraction.
# Extract structured data from this flat table bank statement for taxpayer expense claims.

# CONVERSATION PROTOCOL:
# - Start your response immediately with "DOCUMENT_TYPE: BANK_STATEMENT"
# - Do NOT include conversational text like "I'll extract..." or "Based on the document..."
# - Do NOT use bullet points, numbered lists, asterisks, or markdown formatting (no **, no ##, no 1., no -)
# - Output ONLY the structured extraction data below
# - End immediately after "TRANSACTION_AMOUNTS_PAID:" with no additional text
# - NO explanations, NO comments, NO additional text

# CRITICAL:
# - The transaction table in the image has a "Date", a "Description", a "Withdrawal", a "Deposit" and a "Balance" column
# - Specifically, it has a "Date" column, a "Description" column, a "Withdrawal" column, a "Deposit" column and a "Balance" column

# ANTI-HALLUCINATION RULES:
# - YOU MUST NOT GUESS values you are unsure of
# - Rows may have missing values
# - Rows NEVER HAVE REPEATED AMOUNTS, SO YOU MUST NOT REPEAT VALUES THAT YOU ARE UNSURE OF
# - If a value is unclear or missing, use "NOT_FOUND" instead of guessing

# STEP 1:
# - Extract the Transaction Table formatted as markdown.

# """

prompt_text = """
You are an expert document analyzer specializing in bank statement extraction.

Step 1
  - Extract the Transaction Table formatted as markdown.
"""

In [None]:
# Cell 5
from common.text_cleaning import clean_llama_response, clean_markdown_table

print("✅ Text cleaning utilities loaded")

# Create message structure for Llama chat template
messageDataStructure = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {"type": "text", "text": prompt_text},
        ],
    }
]

# Process the input
textInput = processor.apply_chat_template(
    messageDataStructure, add_generation_prompt=True
)
inputs = processor(image, textInput, return_tensors="pt").to(model.device)

# Generate response with deterministic parameters
output = model.generate(
    **inputs,
    max_new_tokens=4000,
    do_sample=False,
    temperature=None,
    top_p=None,
)
generatedOutput = processor.decode(output[0])

# Clean the response to remove chat and markdown artifacts
cleanedOutput = clean_llama_response(generatedOutput)
cleanedOutput = cleanedOutput.replace("\"**", "") # remove "**" markdown formatting

# Clean markdown table: replace empty cells with NOT_FOUND
if '|' in cleanedOutput:  # Check if it contains a table
    cleanedOutput = clean_markdown_table(cleanedOutput)
    print("✅ Empty cells replaced with NOT_FOUND")

print("✅ Response generated successfully!")
print("\n" + "=" * 60)
print("CLEANED EXTRACTION:")
print("=" * 60)
print(cleanedOutput)
print("=" * 60)

# Save the cleaned response to a file
output_path = Path("llama_grouped_bank_statement_output.txt")

with output_path.open("w", encoding="utf-8") as text_file:
    text_file.write(cleanedOutput)

print(f"✅ Response saved to: {output_path}")
print(f"📁 File size: {output_path.stat().st_size} bytes")

In [None]:
# Cell 6
# Transform markdown table into structured extraction format
from common.table_parser import parse_markdown_table, extract_columns, format_structured_extraction

# Parse markdown table from cleanedOutput
table_rows = parse_markdown_table(cleanedOutput)

print(f"📊 Parsed {len(table_rows)} transaction rows from markdown table\n")

# Extract columns (0=Date, 1=Description, 2=Withdrawal)
dates, descriptions, withdrawals = extract_columns(table_rows, 0, 1, 2)

# Generate structured output (STEPS 2-5)
structured_output = format_structured_extraction(dates, descriptions, withdrawals)

# Display structured output
print("=" * 60)
print("STRUCTURED EXTRACTION OUTPUT:")
print("=" * 60)
print(structured_output)
print("=" * 60)

# Save structured output
structured_path = Path("llama_structured_extraction_output.txt")
with structured_path.open("w", encoding="utf-8") as f:
    f.write(structured_output)

print(f"\n✅ Structured output saved to: {structured_path}")
print(f"📁 File size: {structured_path.stat().st_size} bytes")

In [None]:
# Cell 7
# Filter out rows with NOT_FOUND in TRANSACTION_AMOUNTS_PAID
from common.table_parser import filter_not_found_rows

# Filter rows using module function
filtered_dates, filtered_descriptions, filtered_amounts = filter_not_found_rows(
    dates, descriptions, withdrawals
)

print(f"📊 Filtered out {len(dates) - len(filtered_dates)} rows with NOT_FOUND")
print(f"✅ {len(filtered_dates)} transactions with actual withdrawal amounts\n")

# Generate filtered structured output
filtered_structured_output = format_structured_extraction(
    filtered_dates, filtered_descriptions, filtered_amounts
)

# Display filtered structured output
print("=" * 60)
print("FILTERED STRUCTURED EXTRACTION OUTPUT:")
print("(Rows with NOT_FOUND in withdrawals removed)")
print("=" * 60)
print(filtered_structured_output)
print("=" * 60)

# Save filtered structured output
filtered_path = Path("llama_filtered_extraction_output.txt")
with filtered_path.open("w", encoding="utf-8") as f:
    f.write(filtered_structured_output)

print(f"\n✅ Filtered output saved to: {filtered_path}")
print(f"📁 File size: {filtered_path.stat().st_size} bytes")