### Export tables

https://ds4sd.github.io/docling/examples/export_tables/

In [4]:
import logging
import time
from pathlib import Path
import pandas as pd
from docling.document_converter import DocumentConverter

In [5]:
_log = logging.getLogger(__name__)

In [3]:
def main(pdf_file:str, output_dir:str):
    logging.basicConfig(level=logging.INFO)

    input_doc_path = Path(pdf_file)
    output_dir = Path(output_dir)

    doc_converter = DocumentConverter()

    start_time = time.time()

    conv_res = doc_converter.convert(input_doc_path)

    output_dir.mkdir(parents=True, exist_ok=True)

    doc_filename = conv_res.input.file.stem

    # Export tables
    for table_ix, table in enumerate(conv_res.document.tables):
        table_df: pd.DataFrame = table.export_to_dataframe()
        print(f"## Table {table_ix}")
        print(table_df.to_markdown())

        # Save the table as csv
        element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.csv"
        _log.info(f"Saving CSV table to {element_csv_filename}")
        table_df.to_csv(element_csv_filename)

        # Save the table as html
        element_html_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.html"
        _log.info(f"Saving HTML table to {element_html_filename}")
        with element_html_filename.open("w") as fp:
            fp.write(table.export_to_html())

    end_time = time.time() - start_time

    _log.info(f"Document converted and tables exported in {end_time:.2f} seconds.")




In [6]:
source = "../AutoGen_LLM_agent.pdf"
output_dir = "../scratch_example1"
main(source,output_dir)

INFO:docling.document_converter:Going to convert document batch...
INFO:docling.utils.accelerator_utils:Accelerator device: 'cpu'
INFO:docling.utils.accelerator_utils:Accelerator device: 'cpu'
INFO:docling.utils.accelerator_utils:Accelerator device: 'cpu'
INFO:docling.pipeline.base_pipeline:Processing document AutoGen_LLM_agent.pdf
INFO:docling.document_converter:Finished converting document AutoGen_LLM_agent.pdf in 212.39 sec.
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-1.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-1.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-2.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-2.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-3.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-3.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-4.csv
INFO:__main__:Saving HTML table to ..

## Table 0
|    | 0                                                                                                                                                                                                                                                                                                                                                 |
|---:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|  0 | Vaibhav Adlakha, Parishad BehnamGhader, Xing Han Lu, Nicholas Meade, and Siva Reddy. Evaluating correctness and faithfulness of instruction-following models for question answering. arXiv preprint arXiv:2307.16877 , 2023.                                                                    

INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-9.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-10.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-10.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-11.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-11.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-12.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-12.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-13.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-13.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-14.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-14.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-15.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-15.html

## Table 9
|    | Model         | Two-agent   | Group chat   | Group chat w/ task-based speaker selection   |
|---:|:--------------|:------------|:-------------|:---------------------------------------------|
|  0 | GPT-3.5-turbo | 9.9, 9      | 5.3, 0       | 4, 0                                         |
|  1 | GPT-4         | 6.8, 3      | 4.5, 0       | 4, 0                                         |
## Table 10
|    |                        | Correctness                | Main failure reason                                                                                                                              |
|---:|:-----------------------|:---------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|
|  0 | click-dialog           | AutoGen : 10/10 RCI: 10/10 | N/A. N/A.                                                                                               

INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-19.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-20.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-20.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-21.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-21.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-22.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-22.html
INFO:__main__:Saving CSV table to ..\scratch\AutoGen_LLM_agent-table-23.csv
INFO:__main__:Saving HTML table to ..\scratch\AutoGen_LLM_agent-table-23.html
INFO:__main__:Document converted and tables exported in 212.89 seconds.


## Table 19
|    |          | BabyAGI                                                                                                                                                                                                                                                                                                                                              |
|---:|:---------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|  0 | /user    | OBJECTIVE=Solve math problems INITIAL TASK=Completely simplify and rationalize the denominator:                                                                                                                                                         