pymupdf · JorjMcKie · Dec 3, 2025 · Dec 3, 2025 · Copilot · Dec 3, 2025
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,5 +1,16 @@
 # Change Log
 
+## Changes in version 0.2.6
+
+### Fixes:
+
+* [Forum](https://forum.mupdf.com/t/bug-pymupdf4llm-list-index-out-of-range-in-document-layout-py-2/216) - List index out of range ...
-* [Forum](https://forum.mupdf.com/t/bug-pymupdf4llm-list-index-out-of-range-in-document-layout-py-2/216) - List index out of range ...
+* [Forum](https://forum.mupdf.com/t/bug-pymupdf4llm-list-index-out-of-range-in-document-layout-py-2/216) - Fixed "List index out of range" error when processing tables with no text lines.
-* [Forum](https://forum.mupdf.com/t/bug-pymupdf4llm-list-index-out-of-range-in-document-layout-py-2/216) - List index out of range ...
+* [Forum](https://forum.mupdf.com/t/bug-pymupdf4llm-list-index-out-of-range-in-document-layout-py-2/216) - Fixed "List index out of range" error when processing tables with no text lines.
+
+### Other Changes:
+
+
+------
+
 ## Changes in version 0.2.5
 
 ### Fixes:

diff --git a/pdf4llm/setup.py b/pdf4llm/setup.py
@@ -6,7 +6,7 @@
 with open(os.path.join(setup_py_cwd, "README.md"), encoding="utf-8") as f:
     readme = f.read()
 
-version = "0.2.4"  # must always equal the pymupdf4llm version
+version = "0.2.6"  # must always equal the pymupdf4llm version
 
 classifiers = [
     "Development Status :: 5 - Production/Stable",

diff --git a/pymupdf4llm/pymupdf4llm/helpers/document_layout.py b/pymupdf4llm/pymupdf4llm/helpers/document_layout.py
@@ -548,7 +548,7 @@ def fallback_text_to_md(textlines, ignore_code: bool = False, clip=None):
     for tl in textlines:
         ltext = "|" + "|".join([s["text"].strip() for s in tl["spans"]]) + "|\n"
         output += ltext
-    output += "**----- End of picture text -----**<br>\n"
+    output += "\n**----- End of picture text -----**<br>\n"
     return output + "\n\n"
 
 
@@ -631,7 +631,7 @@ def to_markdown(
                     continue
 
                 # pictures and formulas: either write image file or embed
-                if btype in ("picture", "formula", "fallback"):
+                if btype in ("picture", "formula", "table-fallback"):
                     if isinstance(box.image, str):
                         output += GRAPHICS_TEXT % box.image + "\n\n"
                     elif isinstance(box.image, bytes):
@@ -650,7 +650,7 @@ def to_markdown(
                                 ignore_code=ignore_code or page.full_ocred,
                                 clip=clip,
                             )
-                        elif btype == "fallback":
+                        elif btype == "table-fallback":
                             output += fallback_text_to_md(
                                 box.textlines,
                                 ignore_code=ignore_code or page.full_ocred,
@@ -741,7 +741,7 @@ def to_text(
                     continue
                 if btype == "page-footer" and footer is False:
                     continue
-                if btype in ("picture", "formula", "fallback"):
+                if btype in ("picture", "formula", "table-fallback"):
                     output += f"==> picture [{clip.width} x {clip.height}] <==\n\n"
                     if box.textlines:
                         if btype == "picture":
@@ -750,7 +750,7 @@ def to_text(
                                 ignore_code=ignore_code or page.full_ocred,
                                 clip=clip,
                             )
-                        elif btype == "fallback":
+                        elif btype == "table-fallback":
                             output += fallback_text_to_text(
                                 box.textlines,
                                 ignore_code=ignore_code or page.full_ocred,
@@ -1018,7 +1018,7 @@ def parse_document(
 
                 except Exception as e:
                     # print(f"table detection error '{e}' on page {page.number+1}")
-                    layoutbox.boxclass = "fallback"
+                    layoutbox.boxclass = "table-fallback"
                     # table structure not detected: treat like an image
                     if document.embed_images or document.write_images:
                         pix = page.get_pixmap(clip=clip, dpi=document.image_dpi)

diff --git a/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py b/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py
@@ -101,7 +101,7 @@ def sanitize_spans(line):
             ):
                 continue  # no joining
             # We need to join bbox and text of two consecutive spans
-            # On occasion, spans may also be duplicated.
+            # Sometimes, spans may also be duplicated.
             if s0["text"] != s1["text"] or s0["bbox"] != s1["bbox"]:
                 s0["text"] += s1["text"]
             s0["bbox"] |= s1["bbox"]  # join boundary boxes
@@ -131,7 +131,8 @@ def sanitize_spans(line):
                 continue
             for sno, s in enumerate(line["spans"]):  # the numered spans
                 sbbox = pymupdf.Rect(s["bbox"])  # span bbox as a Rect
-                if is_white(s["text"]):  # ignore white text
+                if is_white(s["text"]):
+                    # ignore white text if not a Type3 font
                     continue
                 # Ignore invisible text. Type 3 font text is never invisible.
                 if (

diff --git a/pymupdf4llm/pymupdf4llm/helpers/utils.py b/pymupdf4llm/pymupdf4llm/helpers/utils.py
@@ -258,6 +258,10 @@ def clean_tables(page, blocks):
             l for b in blocks if b["type"] == 0 for l in b["lines"] if l["bbox"] in bbox
         ]
         y_vals0 = sorted(set(round(l["bbox"][3]) for l in lines))
+        if not y_vals0:
+            # no text lines in the table bbox
+            page.layout_information[i][4] = "table-fallback"
+            continue
         y_vals = [y_vals0[0]]
         for y in y_vals0[1:]:
             if y - y_vals[-1] > 3:

diff --git a/pymupdf4llm/pymupdf4llm/versions_file.py b/pymupdf4llm/pymupdf4llm/versions_file.py
@@ -1,3 +1,3 @@
 # Generated file - do not edit.
 MINIMUM_PYMUPDF_VERSION = (1, 26, 6)
-VERSION = '0.2.5'
+VERSION = '0.2.6'
diff --git a/pymupdf4llm/setup.py b/pymupdf4llm/setup.py
@@ -11,7 +11,7 @@
     "Topic :: Utilities",
 ]
 
-version = "0.2.5"
+version = "0.2.6"
 pymupdf_version = "1.26.6"
 pymupdf_version_tuple = tuple(int(x) for x in pymupdf_version.split("."))
 requires = [f"pymupdf>={pymupdf_version}", "tabulate"]