[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pdf-tools/components-code-sample-hub/blob/main/jupyter/pdftools_toolbox/pdftools_toolbox_list_content_bounds.ipynb)

In [None]:
%pip install pdftools_toolbox
%pip install ipython

# List bounds of page content
For each page, list the page size and the rectangular
bounding box of all content on the page in PDF points
(1/72 inch).

In [None]:
import io
from pdftools_toolbox.geometry.real import Point, Rectangle
from pdftools_toolbox.pdf import Document
from pdftools_toolbox.pdf.content import ContentExtractor

In [None]:
# Download a file from a given URL and save it to the local system
def prepare_file(url: str, path: str):
    import requests
    response = requests.get(url)
    response.raise_for_status()

    with open(path, 'wb') as f:
        f.write(response.content)

In [None]:
# Set input arguments
input_url = 'https://pdftools-public-downloads-production.s3.eu-west-1.amazonaws.com/samples/testfiles/InvoiceNone.pdf'
input_file_path = 'InvoiceNone.pdf'
prepare_file(input_url, input_file_path)

In [None]:
def enlarge(content_box: Rectangle, point: Point):
    """
    Enlarge the bounding box to include the given point.
    """
    content_box.left = min(content_box.left, point.x)
    content_box.right = max(content_box.right, point.x)
    content_box.bottom = min(content_box.bottom, point.y)
    content_box.top = max(content_box.top, point.y)

In [None]:
def list_content_bounds(input_doc: Document):
    """
    Process the input PDF file to list page size and bounding boxes.
    """
    # Iterate over all pages
    for page_number, page in enumerate(input_doc.pages, start=1):
        print(f"Page {page_number}")

        # Print page size
        size = page.size
        print("  Size:")
        print(f"    Width: {size.width}")
        print(f"    Height: {size.height}")

        # Compute rectangular bounding box of all content on page
        content_box = Rectangle(
            left=float("inf"),
            bottom=float("inf"),
            right=float("-inf"),
            top=float("-inf"),
        )

        # Extract content and compute bounding box
        extractor = ContentExtractor(page.content)
        for element in extractor:
            # Enlarge the content box for each content element
            tr = element.transform
            box = element.bounding_box

            # The location on the page is given by the transformed points
            enlarge(content_box, tr.transform_point(Point(x=box.left, y=box.bottom)))
            enlarge(content_box, tr.transform_point(Point(x=box.right, y=box.bottom)))
            enlarge(content_box, tr.transform_point(Point(x=box.right, y=box.top)))
            enlarge(content_box, tr.transform_point(Point(x=box.left, y=box.top)))

        print("  Content bounding box:")
        print(f"    Left: {content_box.left}")
        print(f"    Bottom: {content_box.bottom}")
        print(f"    Right: {content_box.right}")
        print(f"    Top: {content_box.top}")

In [None]:
try:
    # Set and check license key. If the license key is not valid, an exception is thrown.
    from pdftools_toolbox.sdk import Sdk
    Sdk.initialize("INSERT-LICENSE-KEY", None)

    # Open input document
    with io.FileIO(input_file_path, "rb") as in_stream:
        with Document.open(in_stream, None) as in_doc:
            # Process the PDF
            list_content_bounds(in_doc)

    print("Execution successful.")
except Exception as e:
    print(f"An error occurred: {e}")