In [2]:
import logging
import os
from pathlib import Path
import requests
from dotenv import load_dotenv
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import (
    ApiVlmOptions,
    ResponseFormat,
    VlmPipelineOptions,
)
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline
from io import BytesIO
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def ollama_vlm_options(model: str, prompt: str):
    options = ApiVlmOptions(
        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
        params=dict(
            model=model,
        ),
        prompt=prompt,
        timeout=300,  # Increased timeout to 300 seconds
        scale=1.0,
        response_format=ResponseFormat.MARKDOWN,
    )
    return options

In [4]:
logger = logging.getLogger(__name__)  # Get the logger

In [1]:
# def api_image_request(url: str, prompt: str, image_data: bytes, timeout: int) -> str:
#     """
#     Sends an image and prompt to a VLM API and returns the text response.

#     Args:
#         url: The URL of the VLM API endpoint.
#         prompt: The text prompt to send with the image.
#         image_data: The image data as bytes.
#         timeout: The timeout for the request in seconds.

#     Returns:
#         The text response from the API.

#     Raises:
#         requests.exceptions.HTTPError: If the API returns an HTTP error.
#         Exception: For other errors during the API call.
#     """
#     try:
#         logger.debug(f"api_image_request: Sending request to URL: {url}")  # Log URL
#         logger.debug(f"api_image_request: Prompt: {prompt[:50]}...")  # Log first 50 chars of prompt
#         logger.debug(f"api_image_request: Image data length: {len(image_data)} bytes")  # Log image size

#         r = requests.post(
#             url,
#             headers={"Content-Type": "multipart/form-data"},
#             files={
#                 "image": ("image.jpg", image_data, "image/jpeg"),
#                 "prompt": (None, prompt),
#             },
#             timeout=timeout,
#         )

#         logger.debug(f"api_image_request: Response status code: {r.status_code}")  # Log status code
#         logger.debug(f"api_image_request: Response text: {r.text[:100]}...")  # Log first 100 chars of response

#         r.raise_for_status()
#         return r.text
#     except requests.exceptions.HTTPError as e:
#         logger.error(f"api_image_request: HTTPError: {e}, Response text: {e.response.text}")
#         raise e
#     except Exception as e:
#         logger.error(f"api_image_request: Exception: {e}")
#         raise e

In [10]:
def main(path:str):
    logging.basicConfig(level=logging.INFO)
    input_doc_path = Path(path)
    output_file_path = Path("output.md")  # Define the output file

    pipeline_options = VlmPipelineOptions(
        enable_remote_services=True
    )

    pipeline_options.vlm_options = ollama_vlm_options(
        model="granite3.2-vision:latest",
        prompt="OCR the full page to markdown.",
    )

    doc_converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
                pipeline_options=pipeline_options,
                pipeline_cls=VlmPipeline,
            )
        }
    )
    try:
        result = doc_converter.convert(input_doc_path)
        markdown_content = result.document.export_to_markdown()
        print(markdown_content)

        # Write the markdown content to a file
        with open(output_file_path, "w", encoding="utf-8") as f:
            f.write(markdown_content)
        logging.info(f"Markdown output written to {output_file_path}")
    except Exception as e:
        logging.error(f"An error occurred: {e}")  # catch any error
        print(f"An error occurred: {e}")

In [12]:
path = "C:/Users/jongb/Desktop/임시/invoce_01.png"
result = main(path=path)
result


INFO:docling.document_converter:Going to convert document batch...
INFO:docling.document_converter:Initializing pipeline for StandardPdfPipeline with options hash 70041f74270850b7bedf7c8f5c2dcede
INFO:docling.utils.accelerator_utils:Accelerator device: 'cpu'
INFO:docling.utils.accelerator_utils:Accelerator device: 'cpu'
INFO:docling.utils.accelerator_utils:Accelerator device: 'cpu'
INFO:docling.pipeline.base_pipeline:Processing document invoce_01.png
INFO:docling.document_converter:Finished converting document invoce_01.png in 48.70 sec.
INFO:root:Markdown output written to output.md


## Sample Invoice @ Click to edit

YOUR COMPANY

1234 Your Street California 90210 United States 1-888-123-4567 City,

Billed To

Date |ssued

Invoice Number

Amount Due

Your Client 1234 Clients Street City, California 90210 United States 1-888-123-8910

26/3/2021

INV-10012

$1,699.48

Due Date

25/4/2021

| DESCRIPTION                                    | RATE              | QTY   | AMOUNT    |
|------------------------------------------------|-------------------|-------|-----------|
| Services                                       | 555.00            | 10    | 5550.00   |
| Cost ofvarious services                        | #Tax              |       |           |
| Consulting                                     | 57500             | 15    |           |
| Consultant for your business.                  | +Tax              |       |           |
| Materials                                      | 5123.39           |       | 5123.39   |
| Costof materials and supplies to complete job. | #T