In [None]:
#Cell 0: Imports
from pathlib import Path

import torch
import yaml
from PIL import Image
from transformers import AutoProcessor, MllamaForConditionalGeneration

In [None]:
#Cell 1: Load bank statement structure classifier prompt
prompt_path = Path("/home/jovyan/nfs_share/tod/LMM_POC/prompts/bank_statement_structure_classifier.yaml")

print("📄 Loading classification prompt...")
with prompt_path.open("r", encoding="utf-8") as f:
    classifier_config = yaml.safe_load(f)

print(f"✅ Loaded classifier: {classifier_config['name']}")
print(f"📋 Version: {classifier_config['version']}")
print(f"🎯 Task: {classifier_config['task']}")
print(f"📊 Classification categories: {len(classifier_config['classification_categories'])}")

In [None]:
#Cell 2: Load Llama-3.2-Vision model
model_id = "/home/jovyan/nfs_share/models/Llama-3.2-11B-Vision-Instruct"

print("🔧 Loading Llama-3.2-Vision model...")
model = MllamaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)

print("✅ Model loaded successfully!")

In [None]:
#Cell 3: Load test bank statement image
image_path = "/home/jovyan/nfs_share/tod/LMM_POC/evaluation_data/image_003.png"

print("📷 Loading bank statement image...")
image = Image.open(image_path)
print(f"✅ Image loaded: {image.size}")
print(f"📁 Image path: {image_path}")

In [None]:
#Cell 4: Build classification prompt from YAML
classification_instruction = classifier_config['instruction']

# Build detailed prompt with all categories
categories_text = "\n\n"
for category_name, category_data in classifier_config['classification_categories'].items():
    categories_text += f"**{category_data['identifier']}**\n"
    categories_text += f"{category_data['description']}\n"
    categories_text += "Structural features:\n"
    for feature in category_data['structural_features']:
        categories_text += f"- {feature}\n"
    categories_text += "\n"

output_format = classifier_config['output_format']

full_prompt = f"{classification_instruction}\n{categories_text}\n{output_format}"

print("📝 Classification prompt constructed")
print(f"📏 Prompt length: {len(full_prompt)} characters")
print("\n" + "="*60)
print("CLASSIFICATION PROMPT (first 500 chars):")
print("="*60)
print(full_prompt[:500])
print("...")
print("="*60)

In [None]:
#Cell 5: Generate classification response
messageDataStructure = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {
                "type": "text",
                "text": full_prompt,
            },
        ],
    }
]

print("🤖 Generating classification with Llama-3.2-Vision...")

# Process the input
textInput = processor.apply_chat_template(
    messageDataStructure, add_generation_prompt=True
)
inputs = processor(image, textInput, return_tensors="pt").to(model.device)

# Generate response
output = model.generate(**inputs, max_new_tokens=2000)
generatedOutput = processor.decode(output[0])

print("✅ Classification generated successfully!")

In [None]:
#Cell 6: Display classification results
print("\n" + "="*60)
print("LLAMA-3.2-VISION STRUCTURE CLASSIFICATION:")
print("="*60)
print(generatedOutput)
print("="*60)

# Extract clean response (remove chat template artifacts)
if "<|start_header_id|>assistant<|end_header_id|>" in generatedOutput:
    clean_response = generatedOutput.split("<|start_header_id|>assistant<|end_header_id|>")[1]
    clean_response = clean_response.replace("<|eot_id|>", "").strip()
    print("\n" + "="*60)
    print("CLEAN CLASSIFICATION RESPONSE:")
    print("="*60)
    print(clean_response)
    print("="*60)

In [None]:
#Cell 7: Save classification results
output_dir = Path("classification_results")
output_dir.mkdir(exist_ok=True)

# Save full output
output_path = output_dir / "llama_structure_classification.txt"
with output_path.open("w", encoding="utf-8") as text_file:
    text_file.write(generatedOutput)

# Save clean output
if "<|start_header_id|>assistant<|end_header_id|>" in generatedOutput:
    clean_output_path = output_dir / "llama_structure_classification_clean.txt"
    with clean_output_path.open("w", encoding="utf-8") as text_file:
        text_file.write(clean_response)
    print(f"✅ Clean response saved to: {clean_output_path}")

print(f"✅ Full response saved to: {output_path}")
print(f"📄 File size: {output_path.stat().st_size} bytes")