In [11]:

# ALTERNATIVE: Simplified client without olmocr toolkit (less accurate)
# This approach doesn't use document anchoring but still works for basic OCR

import base64
from openai import OpenAI
from PIL import Image
import pdf2image
import io
import json

def simple_pdf_to_olmocr(pdf_path, page_num=1):
    """
    Simple PDF to OLMoCR without document anchoring
    Note: This is less accurate than the full approach above
    """

    # Convert PDF page to image
    pages = pdf2image.convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
    if not pages:
        raise ValueError("Could not convert PDF page to image")

    # Resize image to 1024px longest dimension
    image = pages[0]
    max_dim = max(image.size)
    if max_dim > 1024:
        scale = 1024 / max_dim
        new_size = (int(image.size[0] * scale), int(image.size[1] * scale))
        image = image.resize(new_size, Image.Resampling.LANCZOS)

    # Convert to base64
    buffer = io.BytesIO()
    image.save(buffer, format='PNG')
    image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')

    # Simple prompt (not as good as document anchoring)
    prompt = "Extract property account number, address, owner from this document and output the result as JSON"

    # Send to vLLM
    client = OpenAI(api_key="EMPTY", base_url="http://localhost:8000/v1")

    response = client.chat.completions.create(
        model="olmOCR-7B-0225-preview",
        messages=[{
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
            ]
        }],
        max_tokens=4096,
        temperature=0.1
    )

    return response.choices[0].message.content

# Usage
result = simple_pdf_to_olmocr("somepdf.pdf", 1)
print(result)


{"property_account_number": "O/A/04/01222010", "address": "4TH FLOOR FLAT NO.401, S NO 3/1/2+3+4, WING \" A \" K- SQUARE BEHIND STATE BANK OF INDIA, BALEWADI, PUNE 411045", "owner": "KAMALJEET SINGH & AMARPREET SAINI", "bank_ref_number": "BIC5KN0L2EHX6", "payment_mode": "Bharat QR Code", "amount": "13670", "transaction_date": "17/05/2025"}
