In [None]:
## Docling examples

import logging
import time
from pathlib import Path
import pandas as pd
from pprint import pprint

## Example 1 : Convert to Markdown / text from a PDF document

from docling.document_converter import DocumentConverter

source = "docs/sample.pdf"  # document per local path or URL
converter = DocumentConverter()
result = converter.convert(source)
pprint(result.document.export_to_markdown())


Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 102857.59it/s]
Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


## Sample Contract

## Contract No.\_\_\_\_\_\_\_\_\_\_\_ PROFESSIONAL SERVICES AGREEMENT

THIS AGREEMENT made and entered into this \_\_\_\_\_\_\_day of , 20 by and between the SANTA CRUZ COUNTY REGIONAL TRANSPORTATION COMMISSION, hereinafter called COMMISSION, and \_\_\_\_\_\_\_\_ \_\_\_\_, hereinafter called CONSULTANT for \_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_ (services/project name).

## 1. DUTIES.

- A. CONSULTANT agrees to exercise special skill to accomplish the following results in a manner reasonably satisfactory to COMMISSION: \_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_, as specified in Exhibit A: Scope of Services, which by this reference is incorporated herein.
- B. CONSULTANT shall provide the personnel listed below to perform the above-specified services, which persons are hereby designated as key personnel under this Agreement.

Name

Firm

Function

Principal in Charge

Project Manager

- C. No person named in paragraph B of this Section, or his or her s

In [None]:
pprint(result.document.export_to_text())

Sample Contract

Contract No.\_\_\_\_\_\_\_\_\_\_\_ PROFESSIONAL SERVICES AGREEMENT

THIS AGREEMENT made and entered into this \_\_\_\_\_\_\_day of , 20 by and between the SANTA CRUZ COUNTY REGIONAL TRANSPORTATION COMMISSION, hereinafter called COMMISSION, and \_\_\_\_\_\_\_\_ \_\_\_\_, hereinafter called CONSULTANT for \_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_ (services/project name).

1. DUTIES.

 A. CONSULTANT agrees to exercise special skill to accomplish the following results in a manner reasonably satisfactory to COMMISSION: \_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_, as specified in Exhibit A: Scope of Services, which by this reference is incorporated herein.

 B. CONSULTANT shall provide the personnel listed below to perform the above-specified services, which persons are hereby designated as key personnel under this Agreement.

Name

Firm

Function

Principal in Charge

Project Manager

 C. No person named in paragraph B of this Section, or his or her successor, s

In [None]:
pprint(result.document.export_to_dict())

{'schema_name': 'DoclingDocument', 'version': '1.0.0', 'name': 'SampleContract-Shuttle', 'origin': {'mimetype': 'application/pdf', 'binary_hash': 10327854376992172826, 'filename': 'SampleContract-Shuttle.pdf'}, 'furniture': {'self_ref': '#/furniture', 'children': [], 'name': '_root_', 'label': 'unspecified'}, 'body': {'self_ref': '#/body', 'children': [{'$ref': '#/texts/0'}, {'$ref': '#/texts/1'}, {'$ref': '#/texts/2'}, {'$ref': '#/texts/3'}, {'$ref': '#/groups/0'}, {'$ref': '#/texts/6'}, {'$ref': '#/texts/7'}, {'$ref': '#/texts/8'}, {'$ref': '#/texts/9'}, {'$ref': '#/texts/10'}, {'$ref': '#/groups/1'}, {'$ref': '#/texts/12'}, {'$ref': '#/groups/2'}, {'$ref': '#/texts/15'}, {'$ref': '#/texts/16'}, {'$ref': '#/texts/17'}, {'$ref': '#/groups/3'}, {'$ref': '#/texts/23'}, {'$ref': '#/texts/24'}, {'$ref': '#/groups/4'}, {'$ref': '#/texts/28'}, {'$ref': '#/texts/29'}, {'$ref': '#/groups/5'}, {'$ref': '#/texts/39'}, {'$ref': '#/texts/40'}, {'$ref': '#/groups/6'}, {'$ref': '#/texts/43'}, {'$re

In [None]:


pprint(result.document.export_to_dict()['texts'])

{'schema_name': 'DoclingDocument', 'version': '1.0.0', 'name': 'SampleContract-Shuttle', 'origin': {'mimetype': 'application/pdf', 'binary_hash': 10327854376992172826, 'filename': 'SampleContract-Shuttle.pdf'}, 'furniture': {'self_ref': '#/furniture', 'children': [], 'name': '_root_', 'label': 'unspecified'}, 'body': {'self_ref': '#/body', 'children': [{'$ref': '#/texts/0'}, {'$ref': '#/texts/1'}, {'$ref': '#/texts/2'}, {'$ref': '#/texts/3'}, {'$ref': '#/groups/0'}, {'$ref': '#/texts/6'}, {'$ref': '#/texts/7'}, {'$ref': '#/texts/8'}, {'$ref': '#/texts/9'}, {'$ref': '#/texts/10'}, {'$ref': '#/groups/1'}, {'$ref': '#/texts/12'}, {'$ref': '#/groups/2'}, {'$ref': '#/texts/15'}, {'$ref': '#/texts/16'}, {'$ref': '#/texts/17'}, {'$ref': '#/groups/3'}, {'$ref': '#/texts/23'}, {'$ref': '#/texts/24'}, {'$ref': '#/groups/4'}, {'$ref': '#/texts/28'}, {'$ref': '#/texts/29'}, {'$ref': '#/groups/5'}, {'$ref': '#/texts/39'}, {'$ref': '#/texts/40'}, {'$ref': '#/groups/6'}, {'$ref': '#/texts/43'}, {'$re

In [None]:
## Example 2 : Tables

if(True):
    input_doc_path = Path("./docs/table.pdf")
    output_dir = Path("scratch")

    doc_converter = DocumentConverter()
    conv_res = doc_converter.convert(input_doc_path)
    output_dir.mkdir(parents=True, exist_ok=True)
    doc_filename = conv_res.input.file.stem

    # Export tables
    for table_ix, table in enumerate(conv_res.document.tables):
        table_df: pd.DataFrame = table.export_to_dataframe()
        print(f"## Table {table_ix}")
        print(table_df.to_markdown())

        # Save the table as csv
        element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.csv"
        table_df.to_csv(element_csv_filename)

        # Save the table as html
        element_html_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.html"
        with element_html_filename.open("w") as fp:
            fp.write(table.export_to_html())



Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 96297.80it/s]
Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


## Table 0
|    | Disability.Category   |   Participants. |   Ballots.Completed |   Ballots.Incomplete/  Terminated | Results  Time to.Accuracy   | Results  Time to.complete   |
|---:|:----------------------|----------------:|--------------------:|----------------------------------:|:----------------------------|:----------------------------|
|  0 | Blind                 |               5 |                   1 |                                 4 | 34.5%, n=1                  | 1199 sec, n=1               |
|  1 | Low Vision            |               5 |                   2 |                                 3 | 98.3% n=2                   | 1716 sec, n=3               |
|  2 | Dexterity             |               5 |                   4 |                                 1 | 98.3%, n=4                  | 1672.1 sec, n=4             |
|  3 | Mobility              |               3 |                   3 |                                 0 | 95.4%, n=3                  | 1416 sec, n=3    

In [19]:
## Example 3 : OCR (Scanned Document)

from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import (
    EasyOcrOptions,
    OcrMacOptions,
    PdfPipelineOptions,
    TesseractCliOcrOptions,
    TesseractOcrOptions,
)
from docling.document_converter import DocumentConverter, PdfFormatOption

if(True):
    input_doc =  Path("./docs/scanned.pdf")

    pipeline_options = PdfPipelineOptions()
    pipeline_options.do_ocr = True
    pipeline_options.do_table_structure = True
    pipeline_options.table_structure_options.do_cell_matching = True

    # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions, OcrMacOptions(Mac only)
    # ocr_options = EasyOcrOptions(force_full_page_ocr=True)
    # ocr_options = TesseractOcrOptions(force_full_page_ocr=True)
    # ocr_options = OcrMacOptions(force_full_page_ocr=True)
    ocr_options = EasyOcrOptions(force_full_page_ocr=True)
    pipeline_options.ocr_options = ocr_options

    converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
                pipeline_options=pipeline_options,
            )
        }
    )

    doc = converter.convert(input_doc).document
    md = doc.export_to_markdown()
    print(md)



Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 89240.51it/s]
Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


<!-- image -->

## EXECUIVE qIaECToa

Don Naylor; CAE

## EXECUTIVE CoMMitee

President: Temy M. Weidner (EIU) 44 Heather Drive Charleslon, IL 61920 (217) 345-5320 tmweidner @eiuedu

## Past-President:

David Hilquist (OCC) Cayuga Coun Hawthome Woods, IL 60047 (847) 823-2214 dhilquis @oaklon edu

## Treasurer:

Kalharine Kral (UIUC) 23 Long Grove Drive Monticello, IL 61856 (217) 782-2849 kral@monticellopro.net

Member-at-Large: Bunon Wítthuhn (WIU) 1106 Baybery Lane Macomb, IL 61455 {309) 833-1939 13jose22 @msn.comn

Member-at-Large: Alan Voelker (NIU) 1505 Margaret Lane DaKalb, IL 60115 815756-7447 avoelker@niu.edu

Member-at-Large: Ranc Thomas (LCC) 2480 Wedgwood Dnive West Florissant, MO 63033 (314) 921-7364 rhomas257 @a0l.com

Member-at-Large: Janet Cook (ISU) 17438 Eaat Walden Road Hudson, IL 61748 (309) 726-1664 jmcook @ilsluodu

## STATE UNIVERSITIES ANNUITANTS ASSOCIATION AND FOUNDATION

Sørving retirees; spouses and survivors from the public Universities and Community Colleges