diff --git a/CHANGES.md b/CHANGES.md index 4f040db6..51cd7aa0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,15 @@ # Change Log +## Changes in version 0.0.24 + +### Fixes: + +* [270](https://github.com/pymupdf/RAG/issues/270) - Fix UnboundLocalError for table_strategy in pymupdf_rag.py + +### Other Changes: + + + ## Changes in version 0.0.23 ### Fixes: diff --git a/pdf4llm/setup.py b/pdf4llm/setup.py index 1b0cbd9f..8df1f1e6 100644 --- a/pdf4llm/setup.py +++ b/pdf4llm/setup.py @@ -13,11 +13,11 @@ "Programming Language :: Python :: 3", "Topic :: Utilities", ] -requires = ["pymupdf4llm==0.0.23"] +requires = ["pymupdf4llm==0.0.24"] setuptools.setup( name="pdf4llm", - version="0.0.23", + version="0.0.24", author="Artifex", author_email="support@artifex.com", description="PyMuPDF Utilities for LLM/RAG", diff --git a/pymupdf4llm/pymupdf4llm/__init__.py b/pymupdf4llm/pymupdf4llm/__init__.py index 08571e43..a3f35140 100644 --- a/pymupdf4llm/pymupdf4llm/__init__.py +++ b/pymupdf4llm/pymupdf4llm/__init__.py @@ -1,6 +1,6 @@ from .helpers.pymupdf_rag import IdentifyHeaders, to_markdown -__version__ = "0.0.23" +__version__ = "0.0.24" version = __version__ version_tuple = tuple(map(int, version.split("."))) diff --git a/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py b/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py index af725f1d..510935a7 100644 --- a/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py +++ b/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py @@ -988,12 +988,12 @@ def get_page_output( graphics_count = len([b for b in page.get_bboxlog() if "path" in b[0]]) if GRAPHICS_LIMIT and graphics_count > GRAPHICS_LIMIT: IGNORE_GRAPHICS = True - table_strategy = None # Locate all tables on page parms.written_tables = [] # stores already written tables omitted_table_rects = [] - if table_strategy is None: + if IGNORE_GRAPHICS or not table_strategy: + # do not try to extract tables parms.tabs = None else: parms.tabs = page.find_tables(clip=parms.clip, strategy=table_strategy) @@ -1147,7 +1147,7 @@ def get_page_output( toc = doc.get_toc() # Text extraction flags: - # omit invisible text, collect styles, use accurate bounding boxes + # omit clipped text, collect styles, use accurate bounding boxes textflags = ( 0 | mupdf.FZ_STEXT_CLIP diff --git a/pymupdf4llm/setup.py b/pymupdf4llm/setup.py index 2df69920..4477a13b 100644 --- a/pymupdf4llm/setup.py +++ b/pymupdf4llm/setup.py @@ -17,7 +17,7 @@ setuptools.setup( name="pymupdf4llm", - version="0.0.23", + version="0.0.24", author="Artifex", author_email="support@artifex.com", description="PyMuPDF Utilities for LLM/RAG",