From 358f54972772fd8f83ca921016341085d8d2d64c Mon Sep 17 00:00:00 2001
From: "Jorj X. McKie" <jorj.x.mckie@outlook.de>
Date: Wed, 15 Oct 2025 10:15:44 -0400
Subject: [PATCH] Support the PyMuPDF-Layout Feature

---
 src/__init__.py |  70 +++++-
 src/extra.i     | 591 +++++++++++++++++++++++++++++++++---------------
 src/table.py    | 276 ++++++++++++++++------
 3 files changed, 677 insertions(+), 260 deletions(-)

diff --git a/src/__init__.py b/src/__init__.py
index f1cb0a051..012f6506e 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -28,7 +28,7 @@
 import zipfile
 
 from . import extra
-
+import importlib.util
 
 # Set up g_out_log and g_out_message from environment variables.
 #
@@ -333,6 +333,37 @@ def __init__(self):
 
 _globals = _Globals()
 
+_get_layout: typing.Optional[typing.Callable] = None
+
+# global switch ensuring that the recommendation message is shown at most once
+_recommend_layout = True  # must be referred to as "global" everywhere
+
+
+def no_recommend_layout():
+    """For users who never want to see the layout recommendation."""
+    global _recommend_layout
+    _recommend_layout = False
+
+
+def _warn_layout_once():
+    """Check if we should recommend installing the layout package."""
+    msg="""Consider using the pymupdf_layout package for a greatly improved page layout analysis."""
+
+    global _recommend_layout
+    if (
+        1
+        and _recommend_layout  # still True?
+        and _get_layout is None  # no layout function stored here
+
+        # client did not globally disable the recommendation
+        and os.getenv("PYMUPDF_SUGGEST_LAYOUT_ANALYZER") != "0"
+
+        # layout is not available in this Python
+        and not importlib.util.find_spec("pymupdf.layout")
+    ):
+        print(msg)
+        _recommend_layout = False  # never show the message again
+
 
 # Optionally use MuPDF via cppyy bindings; experimental and not tested recently
 # as of 2023-01-20 11:51:40
@@ -9952,7 +9983,7 @@ def _get_resource_properties(self):
         return rc
 
     def _get_textpage(self, clip=None, flags=0, matrix=None):
-        if g_use_extra:
+        if 1 or g_use_extra:
             ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
             tpage = mupdf.FzStextPage(ll_tpage)
             return tpage
@@ -10781,6 +10812,20 @@ def clip_to_rect(self, rect):
         pclip = JM_rect_from_py(clip)
         mupdf.pdf_clip_page(pdfpage, pclip)
 
+    def get_layout(self):
+        """Try to access layout information."""
+
+        if self.layout_information is not None:
+            # layout information already present
+            return
+
+        if not _get_layout:
+            # no layout information available
+            return
+
+        layout_info = _get_layout(self)
+        self.layout_information = layout_info
+
     @property
     def artbox(self):
         """The ArtBox"""
@@ -11432,7 +11477,7 @@ def get_cdrawings(self, extended=None, callback=None, method=None):
         assert isinstance(page, mupdf.FzPage), f'{self.this=}'
         clips = True if extended else False
         prect = mupdf.fz_bound_page(page)
-        if g_use_extra:
+        if 1 or g_use_extra:
             rc = extra.get_cdrawings(page, extended, callback, method)
         else:
             rc = list()
@@ -12157,7 +12202,7 @@ def get_texttrace(self):
             self.set_rotation(0)
         page = self.this
         rc = []
-        if g_use_extra:
+        if 1 or g_use_extra:
             dev = extra.JM_new_texttrace_device(rc)
         else:
             dev = JM_new_texttrace_device(rc)
@@ -13206,6 +13251,9 @@ def xref(self):
 
     rect = property(bound, doc="page rectangle")
 
+    # any result of layout analysis is stored here
+    layout_information = None
+
 
 class Pixmap:
 
@@ -16391,7 +16439,7 @@ def _textpage_dict(self, raw=False):
 
     def extractBLOCKS(self):
         """Return a list with text block information."""
-        if g_use_extra:
+        if 1 or g_use_extra:
             return extra.extractBLOCKS(self.this)
         block_n = -1
         this_tpage = self.this
@@ -16587,7 +16635,7 @@ def extractTextbox(self, rect):
 
     def extractWORDS(self, delimiters=None):
         """Return a list with text word information."""
-        if g_use_extra:
+        if 1 or g_use_extra:
             return extra.extractWORDS(self.this, delimiters)
         buflen = 0
         last_char_rtl = 0
@@ -18969,7 +19017,7 @@ def JM_color_FromSequence(color):
 
 
 def JM_color_count( pm, clip):
-    if g_use_extra:
+    if 1 or g_use_extra:
         return extra.ll_JM_color_count(pm.m_internal, clip)
     
     rc = dict()
@@ -20469,7 +20517,7 @@ def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
 
 
 def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
-    if g_use_extra:
+    if 1 or g_use_extra:
         return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
     char_list = None
     span_list = []
@@ -20682,7 +20730,7 @@ def JM_make_image_block(block, block_dict):
 
 
 def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
-    if g_use_extra:
+    if 1 or g_use_extra:
         return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
     line_list = []
     block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
@@ -20705,7 +20753,7 @@ def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
 
 
 def JM_make_textpage_dict(tp, page_dict, raw):
-    if g_use_extra:
+    if 1 or g_use_extra:
         return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
     text_buffer = mupdf.fz_new_buffer(128)
     block_list = []
@@ -21356,7 +21404,7 @@ def JM_rotate_page_matrix(page):
 
 
 def JM_search_stext_page(page, needle):
-    if g_use_extra:
+    if 1 or g_use_extra:
         return extra.JM_search_stext_page(page.m_internal, needle)
     
     rect = mupdf.FzRect(page.m_internal.mediabox)
diff --git a/src/extra.i b/src/extra.i
index 9d448dac1..8e4e75efb 100644
--- a/src/extra.i
+++ b/src/extra.i
@@ -1739,29 +1739,6 @@ static const char* JM_font_name(fz_font* font)
     return s + 1;
 }
 
-static int detect_super_script(fz_stext_line *line, fz_stext_char *ch)
-{
-    if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0)
-    {
-        return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f;
-    }
-    return 0;
-}
-
-static int JM_char_font_flags(fz_font *font, fz_stext_line *line, fz_stext_char *ch)
-{
-    int flags = 0;
-    if (line && ch)
-    {
-        flags += detect_super_script(line, ch) * TEXT_FONT_SUPERSCRIPT;
-    }
-    flags += mupdf::ll_fz_font_is_italic(font) * TEXT_FONT_ITALIC;
-    flags += mupdf::ll_fz_font_is_serif(font) * TEXT_FONT_SERIFED;
-    flags += mupdf::ll_fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED;
-    flags += mupdf::ll_fz_font_is_bold(font) * TEXT_FONT_BOLD;
-    return flags;
-}
-
 static void jm_trace_text_span(
         jm_tracedraw_device* dev,
         fz_text_span* span,
@@ -2297,37 +2274,64 @@ void JM_append_rune(fz_buffer *buff, int ch);
 // but lines within a block are concatenated by space instead a new-line
 // character (which else leads to 2 new-lines).
 //-----------------------------------------------------------------------------
-void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page)
+void _as_text(fz_stext_block *block, mupdf::FzBuffer& res, mupdf::FzStextPage& page)
 {
+    /*
+    Recursive function for output by blocks as identified by the
+    MuPDF SEGMENT logic.
+    The recursion happens when we encounter a structure block.
+    */
     fz_rect rect = page.m_internal->mediabox;
-
-    for (auto block: page)
+    int last_char;
+    fz_stext_line *line;
+    fz_stext_char *ch;
+    while (block)
     {
-        if (block.m_internal->type == FZ_STEXT_BLOCK_TEXT)
+        switch (block->type)
         {
-            for (auto line: block)
-            {
-                int last_char = 0;
-                for (auto ch: line)
+            case FZ_STEXT_BLOCK_STRUCT:
+                if (block->u.s.down)
+                {
+                    _as_text(block->u.s.down->first_block, res, page);
+                }
+                break;
+
+            case FZ_STEXT_BLOCK_TEXT:
+                last_char = 0;
+                for (line = block->u.t.first_line; line; line = line->next)
                 {
-                    fz_rect chbbox = JM_char_bbox( line, ch);
-                    if (mupdf::ll_fz_is_infinite_rect(rect)
-                            || JM_rects_overlap(rect, chbbox)
-                            )
+                    for (ch = line->first_char; ch; ch = ch->next)
+                    {
+                        fz_rect chbbox = JM_char_bbox( line, ch);
+                        if (mupdf::ll_fz_is_infinite_rect(rect) || JM_rects_overlap(rect, chbbox))
+                        {
+                            last_char = ch->c;
+                            JM_append_rune(res.m_internal, last_char);
+                        }
+                    }
+                    if (last_char != 10 && last_char > 0)
                     {
-                        last_char = ch.m_internal->c;
-                        JM_append_rune(res.m_internal, last_char);
+                        mupdf::ll_fz_append_string(res.m_internal, "\n");
+                        last_char = 10;
                     }
                 }
                 if (last_char != 10 && last_char > 0)
                 {
                     mupdf::ll_fz_append_string(res.m_internal, "\n");
+                    last_char = 10;
                 }
-            }
+                break;
         }
+        block = block->next;
     }
 }
 
+void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page)
+{
+    fz_stext_block *block = page.m_internal->first_block;
+    _as_text(block, res, page);
+}
+
 
 
 // path_type is one of:
@@ -3006,6 +3010,25 @@ PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *
 }
 
 
+static int detect_super_script(fz_stext_line *line, fz_stext_char *ch)
+{
+    if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0)
+    {
+        return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f;
+    }
+    return 0;
+}
+
+static int JM_char_font_flags(fz_font *font, fz_stext_line *line, fz_stext_char *ch)
+{
+    int flags = detect_super_script(line, ch);
+    flags += mupdf::ll_fz_font_is_italic(font) * TEXT_FONT_ITALIC;
+    flags += mupdf::ll_fz_font_is_serif(font) * TEXT_FONT_SERIFED;
+    flags += mupdf::ll_fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED;
+    flags += mupdf::ll_fz_font_is_bold(font) * TEXT_FONT_BOLD;
+    return flags;
+}
+
 //---------------------------------------------------------------------------
 // APPEND non-ascii runes in unicode escape format to fz_buffer
 //---------------------------------------------------------------------------
@@ -3264,51 +3287,77 @@ int JM_append_word(
     return word_n + 1;  // word counter
 }
 
-PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters)
+int _as_words(fz_stext_block *block, mupdf::FzBuffer& buff, fz_rect tp_rect, PyObject *lines, int block_n, PyObject *delimiters)
 {
-    int block_n = -1;
-    fz_rect wbbox = fz_empty_rect;  // word bbox
-    fz_rect tp_rect = this_tpage.m_internal->mediabox;
-
-    PyObject *lines = NULL;
-    mupdf::FzBuffer buff = mupdf::fz_new_buffer(64);
-    lines = PyList_New(0);
-    for (mupdf::FzStextBlock block: this_tpage)
+    /* 'buff' is intermediate storage for composing a word. Used as parameter only for
+    avoiding repeated allocation of an FzBuffer.*/
+    int line_n;
+    fz_stext_line *line;
+    fz_stext_char *ch;
+    fz_rect wbbox, blockrect;
+    while (block)
     {
-        block_n++;
-        if (block.m_internal->type != FZ_STEXT_BLOCK_TEXT)
+        switch (block->type)
         {
-            continue;
-        }
-        int line_n = -1;
-        for (mupdf::FzStextLine line: block)
-        {
-            line_n++;
-            int word_n = 0;                 // word counter per line
-            mupdf::fz_clear_buffer(buff);   // reset word buffer
-            size_t buflen = 0;              // reset char counter
-            int last_char_rtl = 0;          // was last character RTL?
-            for (mupdf::FzStextChar ch: line)
-            {
-                mupdf::FzRect cbbox = JM_char_bbox(line, ch);
-                if (!JM_rects_overlap(tp_rect, *cbbox.internal()) && !fz_is_infinite_rect(tp_rect))
+            case FZ_STEXT_BLOCK_STRUCT:
+                if (block->u.s.down)
                 {
-                    continue;
+                    block_n = _as_words(block->u.s.down->first_block, buff, tp_rect, lines, block_n, delimiters);
                 }
-                // prevent Unicode ZWJ 0x200d to start a word
-                if (buflen == 0 && ch.m_internal->c == 0x200d)
+                break;
+
+            case FZ_STEXT_BLOCK_TEXT:
+                block_n++;
+                blockrect = block->bbox;
+                wbbox = fz_empty_rect;
+                line_n = -1;
+                for (line = block->u.t.first_line; line; line = line->next)
                 {
-                    continue;
-                }
-                int word_delimiter = JM_is_word_delimiter(ch.m_internal->c, delimiters);
-                int this_char_rtl = JM_is_rtl_char(ch.m_internal->c);
-                if (word_delimiter || this_char_rtl != last_char_rtl)
-                {
-                    if (buflen == 0 && word_delimiter)
+                    line_n++;
+                    int word_n = 0;                 // word counter per line
+                    mupdf::fz_clear_buffer(buff);   // reset word buffer
+                    int last_char_rtl = 0;          // was last character RTL?
+                            for (ch = line->first_char; ch; ch = ch->next)
                     {
-                        continue;  // skip delimiters at line start
+                        mupdf::FzRect cbbox = JM_char_bbox(line, ch);
+                        if (!JM_rects_overlap(tp_rect, *cbbox.internal()) && !fz_is_infinite_rect(tp_rect))
+                        {
+                            continue;
+                        }
+                        // prevent Unicode ZWJ 0x200d to start a word
+                        if (mupdf::fz_buffer_storage(buff, NULL) == 0 && ch->c == 0x200d)
+                        {
+                            continue;
+                        }
+                        int word_delimiter = JM_is_word_delimiter(ch->c, delimiters);
+                        int this_char_rtl = JM_is_rtl_char(ch->c);
+                        if (word_delimiter || this_char_rtl != last_char_rtl)
+                        {
+                            if (mupdf::fz_buffer_storage(buff, NULL) == 0 && word_delimiter)
+                            {
+                                continue;  // skip delimiters at line start
+                            }
+                            if (!fz_is_empty_rect(wbbox))
+                            {
+                                word_n = JM_append_word(
+                                        lines,
+                                        buff.m_internal,
+                                        &wbbox,
+                                        block_n,
+                                        line_n,
+                                        word_n
+                                        );
+                            }
+                            mupdf::fz_clear_buffer(buff);
+                            if (word_delimiter) continue;
+                        }
+                        // append one unicode character to the word
+                        JM_append_rune(buff.m_internal, ch->c);
+                        last_char_rtl = this_char_rtl;
+                        // enlarge word bbox
+                        wbbox = fz_union_rect(wbbox, JM_char_bbox(line, ch));
                     }
-                    if (!fz_is_empty_rect(wbbox))
+                    if (mupdf::fz_buffer_storage(buff, NULL) && !fz_is_empty_rect(wbbox))
                     {
                         word_n = JM_append_word(
                                 lines,
@@ -3320,35 +3369,27 @@ PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters)
                                 );
                     }
                     mupdf::fz_clear_buffer(buff);
-                    buflen = 0;  // reset char counter
-                    if (word_delimiter) continue;
                 }
-                // append one unicode character to the word
-                JM_append_rune(buff.m_internal, ch.m_internal->c);
-                last_char_rtl = this_char_rtl;
-                buflen++;
-                // enlarge word bbox
-                wbbox = fz_union_rect(wbbox, JM_char_bbox(line, ch));
-            }
-            if (buflen && !fz_is_empty_rect(wbbox))
-            {
-                word_n = JM_append_word(
-                        lines,
-                        buff.m_internal,
-                        &wbbox,
-                        block_n,
-                        line_n,
-                        word_n
-                        );
-            }
-            mupdf::fz_clear_buffer(buff);
-            buflen = 0;
+                break;
         }
+        block = block->next;     
     }
-    return lines;
+    return block_n;
 }
 
 
+PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters)
+{
+    int block_n = -1;
+    fz_rect tp_rect = this_tpage.m_internal->mediabox;
+    PyObject *lines = NULL;
+    mupdf::FzBuffer buff = mupdf::fz_new_buffer(64);
+    lines = PyList_New(0);
+    mupdf::FzStextBlock block = this_tpage.m_internal->first_block;
+    block_n = _as_words(block.m_internal, buff, tp_rect, lines, block_n, delimiters);
+    return lines;
+}
+
 
 struct ScopedPyObject
 /* PyObject* wrapper, destructor calls Py_CLEAR() unless `release()` has been
@@ -3385,74 +3426,116 @@ called. */
     PyObject*   m_pyobject = nullptr;
 };
 
-
-PyObject* extractBLOCKS(mupdf::FzStextPage& self)
+int _as_blocks(fz_stext_block *block, fz_rect tp_rect, PyObject *lines, int block_n)
 {
-    fz_stext_page *this_tpage = self.m_internal;
-    fz_rect tp_rect = this_tpage->mediabox;
-    mupdf::FzBuffer res(1024);
-    ScopedPyObject lines( PyList_New(0));
-    int block_n = -1;
-    for (fz_stext_block* block = this_tpage->first_block; block; block = block->next)
+    /*
+    Recursive function for output by blocks as identified by the
+    MuPDF SEGMENT logic.
+    Recursion happens on encountering a structure block.
+    In addition to the previous support of text and image, we now also support
+    vector blocks.
+    */
+    PyObject *text = NULL;
+    fz_rect blockrect;
+    mupdf::FzBuffer res;
+    while (block)
     {
-        ScopedPyObject text;
-        block_n++;
-        fz_rect blockrect = fz_empty_rect;
-        if (block->type == FZ_STEXT_BLOCK_TEXT)
+        switch (block->type)
         {
-            mupdf::fz_clear_buffer(res);  // set text buffer to empty
-            int line_n = -1;
-            int last_char = 0;
-            (void) line_n;  /* Not actually used, but keeping in the code for now. */
-            for (fz_stext_line* line = block->u.t.first_line; line; line = line->next)
-            {
-                line_n++;
-                fz_rect linerect = fz_empty_rect;
-                for (fz_stext_char* ch = line->first_char; ch; ch = ch->next)
+            case FZ_STEXT_BLOCK_STRUCT:
+                if (block->u.s.down)
+                {
+                    block_n = _as_blocks(block->u.s.down->first_block, tp_rect, lines, block_n);
+                }
+                break;
+
+            case FZ_STEXT_BLOCK_TEXT:
+                blockrect = fz_empty_rect;
+                res = mupdf::fz_new_buffer(1024);
+                int last_char;
+                for (fz_stext_line* line = block->u.t.first_line; line; line = line->next)
                 {
-                    fz_rect cbbox = JM_char_bbox(line, ch);
-                    if (!JM_rects_overlap(tp_rect, cbbox) && !fz_is_infinite_rect(tp_rect))
+                    fz_rect linerect = fz_empty_rect;
+                    for (fz_stext_char* ch = line->first_char; ch; ch = ch->next)
                     {
-                        continue;
+                        fz_rect cbbox = JM_char_bbox(line, ch);
+                        if (!JM_rects_overlap(tp_rect, cbbox) && !fz_is_infinite_rect(tp_rect))
+                        {
+                            continue;
+                        }
+                        JM_append_rune(res.m_internal, ch->c);
+                        last_char = ch->c;
+                        linerect = fz_union_rect(linerect, cbbox);
+                    }
+                    if (last_char != 10 && !fz_is_empty_rect(linerect))
+                    {
+                            JM_append_rune(res.m_internal, 10);
                     }
-                    JM_append_rune(res.m_internal, ch->c);
-                    last_char = ch->c;
-                    linerect = fz_union_rect(linerect, cbbox);
+                    blockrect = fz_union_rect(blockrect, linerect);
                 }
-                if (last_char != 10 && !fz_is_empty_rect(linerect))
+                text = JM_EscapeStrFromBuffer(res);
+                break;
+
+            case FZ_STEXT_BLOCK_IMAGE:
+                if (fz_contains_rect(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
                 {
-                    mupdf::fz_append_byte(res, 10);
+                    blockrect = block->bbox;
+                    fz_image *img = block->u.i.image;
+                    fz_colorspace *cs = img->colorspace;
+                    text = PyUnicode_FromFormat(
+                                    "<image: %s, width: %d, height: %d, bpc: %d>\n",
+                            mupdf::ll_fz_colorspace_name(cs),
+                            img->w,
+                            img->h,
+                            img->bpc
+                            );
                 }
-                blockrect = fz_union_rect(blockrect, linerect);
-            }
-            text = JM_EscapeStrFromBuffer(res);
+                break;
+
+            case FZ_STEXT_BLOCK_VECTOR:
+                if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
+                {
+                    blockrect = block->bbox;
+                    int alpha = (int) (block->u.v.argb >> 24);
+                    int color = (int) (block->u.v.argb & 0xffffff);
+                    text = PyUnicode_FromFormat(
+                            "\n<vector %s, color: #%06x, alpha: %i, is-rect: %s, continues: %s>\n",
+                            (block->u.v.flags & FZ_STEXT_VECTOR_IS_STROKED) ? "stroked" : "filled",
+                            color,
+                            alpha,
+                            (block->u.v.flags & FZ_STEXT_VECTOR_IS_RECTANGLE) ? "true":"false",
+                            (block->u.v.flags & FZ_STEXT_VECTOR_CONTINUES) ? "true":"false");
+                }
+                break;
         }
-        else if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
+
+        if (text)
         {
-            fz_image *img = block->u.i.image;
-            fz_colorspace *cs = img->colorspace;
-            text = PyUnicode_FromFormat(
-                    "<image: %s, width: %d, height: %d, bpc: %d>",
-                    mupdf::ll_fz_colorspace_name(cs),
-                    img->w,
-                    img->h,
-                    img->bpc
-                    );
-            blockrect = fz_union_rect(blockrect, block->bbox);
+            block_n += 1;
+            PyObject *litem = PyTuple_New(7);
+            PyTuple_SET_ITEM(litem, 0, Py_BuildValue("f", blockrect.x0));
+            PyTuple_SET_ITEM(litem, 1, Py_BuildValue("f", blockrect.y0));
+            PyTuple_SET_ITEM(litem, 2, Py_BuildValue("f", blockrect.x1));
+            PyTuple_SET_ITEM(litem, 3, Py_BuildValue("f", blockrect.y1));
+            PyTuple_SET_ITEM(litem, 4, Py_BuildValue("O", text));
+            PyTuple_SET_ITEM(litem, 5, Py_BuildValue("i", block_n));
+            PyTuple_SET_ITEM(litem, 6, Py_BuildValue("i", block->type));
+            LIST_APPEND(lines, litem);
         }
-        if (!fz_is_empty_rect(blockrect))
-        {
-            ScopedPyObject litem = PyTuple_New(7);
-            PyTuple_SET_ITEM(litem.get(), 0, Py_BuildValue("f", blockrect.x0));
-            PyTuple_SET_ITEM(litem.get(), 1, Py_BuildValue("f", blockrect.y0));
-            PyTuple_SET_ITEM(litem.get(), 2, Py_BuildValue("f", blockrect.x1));
-            PyTuple_SET_ITEM(litem.get(), 3, Py_BuildValue("f", blockrect.y1));
-            PyTuple_SET_ITEM(litem.get(), 4, Py_BuildValue("O", text.get()));
-            PyTuple_SET_ITEM(litem.get(), 5, Py_BuildValue("i", block_n));
-            PyTuple_SET_ITEM(litem.get(), 6, Py_BuildValue("i", block->type));
-            LIST_APPEND(lines.get(), litem.get());
+        text = NULL;
+        block = block->next;
         }
+    return block_n;
     }
+
+PyObject* extractBLOCKS(mupdf::FzStextPage& self)
+{
+    fz_stext_page *this_tpage = self.m_internal;
+    fz_rect tp_rect = this_tpage->mediabox;
+    ScopedPyObject lines(PyList_New(0));
+    int block_n = -1;
+    fz_stext_block *block = this_tpage->first_block;
+    block_n = _as_blocks(block, tp_rect, lines.get(), block_n);
     return lines.release();
 }
 
@@ -3599,10 +3682,88 @@ void JM_make_image_block(fz_stext_block *block, PyObject *block_dict)
         fz_drop_buffer(ctx, mask_buf);
         fz_drop_buffer(ctx, freebuf);
     }
-    fz_catch(ctx) {;}
+    fz_catch(ctx)
+    {
+        fz_ignore_error(ctx);
+    }
+    return;
+}
+
+
+void JM_make_vector_block(fz_stext_block *block, PyObject *block_dict)
+{
+    DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox));
+    DICT_SETITEMSTR_DROP(block_dict, "stroked", JM_BOOL(block->u.v.flags & FZ_STEXT_VECTOR_IS_STROKED));
+    DICT_SETITEMSTR_DROP(block_dict, "isrect", JM_BOOL(block->u.v.flags & FZ_STEXT_VECTOR_IS_RECTANGLE));
+    DICT_SETITEMSTR_DROP(block_dict, "continues", JM_BOOL(block->u.v.flags & FZ_STEXT_VECTOR_CONTINUES));
+    int color = (int) block->u.v.argb & 0xffffff;  // extract color components
+    int alpha = block->u.v.argb >> 24;  // extract alpha value
+    DICT_SETITEM_DROP(block_dict, dictkey_color, Py_BuildValue("i", color));
+    DICT_SETITEMSTR_DROP(block_dict, "alpha", Py_BuildValue("i", alpha));
+    return;
+}
+
+void JM_make_grid_block(fz_stext_block *block, PyObject *block_dict)
+{
+    Py_ssize_t i;
+    PyObject *pos;
+
+    DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox));
+
+    DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
+
+    DICT_SETITEMSTR_DROP(block_dict, "max_uncertain", Py_BuildValue("ii",
+        block->u.b.xs->max_uncertainty,
+        block->u.b.ys->max_uncertainty));
+
+    // x coordinates with uncertainties
+    pos = PyList_New((size_t) block->u.b.xs->len);
+    for (i = 0; i <  block->u.b.xs->len; i++)
+    {
+        PyList_SetItem(pos, i, Py_BuildValue("fi",
+            block->u.b.xs->list[i].pos,
+            block->u.b.xs->list[i].uncertainty));
+    }
+    DICT_SETITEMSTR_DROP(block_dict, "xpos", pos);
+
+    // y coordinates with uncertainties
+    pos = PyList_New((size_t) block->u.b.ys->len);
+    for (i = 0; i <  block->u.b.ys->len; i++)
+    {
+        PyList_SetItem(pos, i, Py_BuildValue("fi",
+            block->u.b.ys->list[i].pos,
+            block->u.b.ys->list[i].uncertainty));
+    }
+    DICT_SETITEMSTR_DROP(block_dict, "ypos", pos);
+    
     return;
 }
 
+
+void make_table_dict(fz_stext_page *tp, PyObject *table_dict, PyObject *bbox)
+{
+    fz_rect bounds = JM_rect_from_py(bbox);
+    fz_stext_block *block;
+
+    try
+    {
+        block = mupdf::ll_fz_find_table_within_bounds(tp, bounds);
+    }
+    catch (std::exception&)
+    {
+        /* Ignore failure to find a table structure. */
+        return;
+    }
+
+    // Check if a table structure was found
+    if (block && block->type == FZ_STEXT_BLOCK_GRID)
+    {
+        JM_make_grid_block(block, table_dict);
+    }
+
+}
+
+
 static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int raw, fz_buffer *buff, fz_rect tp_rect)
 {
     fz_stext_line *line;
@@ -3638,38 +3799,111 @@ static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int
     return;
 }
 
+
+void JM_make_struct_block(fz_stext_block *block, PyObject *block_dict)
+{
+    DICT_SETITEMSTR_DROP(block_dict, "index", Py_BuildValue("i",block->u.s.index));
+    if (block->u.s.down)
+    {
+        DICT_SETITEMSTR_DROP(block_dict, "raw", Py_BuildValue("s",block->u.s.down->raw));
+        DICT_SETITEMSTR_DROP(block_dict, "std", Py_BuildValue("s",fz_structure_to_string(block->u.s.down->standard)));
+    }
+    
+}
+
+
+int _as_dict(PyObject *block_list, fz_stext_block *block, fz_buffer *text_buffer, int raw, fz_rect tp_rect, int block_n)
+{
+    /*
+    Recursive function for output by blocks as identified by the
+    MuPDF SEGMENT logic.
+    */
+    PyObject *block_dict;
+    while (block)
+    {
+        switch (block->type)
+        {
+            case FZ_STEXT_BLOCK_STRUCT:
+                if (block->u.s.down && block->u.s.down->first_block)
+                {
+                    block_n++;
+                    block_dict = PyDict_New();
+                    DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
+                    DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n));
+                    DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox));
+                    JM_make_struct_block(block, block_dict);
+                    PyObject *subblocks = PyList_New(0);
+                    block_n = _as_dict(subblocks, block->u.s.down->first_block, text_buffer, raw, tp_rect, block_n);
+                    DICT_SETITEM_DROP(block_dict, dictkey_blocks, subblocks);
+                    LIST_APPEND_DROP(block_list, block_dict);
+                }
+            break;
+
+            case FZ_STEXT_BLOCK_TEXT:
+                if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
+                {
+                    block_dict = PyDict_New();
+                    block_n++;
+                    DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
+                    DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n));
+                    DICT_SETITEMSTR_DROP(block_dict, "flags", Py_BuildValue("i", block->u.t.flags));
+                    JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect);
+                    LIST_APPEND_DROP(block_list, block_dict);
+                }
+            break;
+
+            case FZ_STEXT_BLOCK_IMAGE:
+                if (fz_contains_rect(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
+                {
+                    block_dict = PyDict_New();
+                    block_n++;
+                    DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
+                    DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n));
+                    DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox));
+                    JM_make_image_block(block, block_dict);
+                    LIST_APPEND_DROP(block_list, block_dict);
+                }
+            break;
+
+            case FZ_STEXT_BLOCK_VECTOR:
+                if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
+                {
+                    block_dict = PyDict_New();
+                    block_n++;
+                    DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
+                    DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n));
+                    JM_make_vector_block(block, block_dict);
+                    LIST_APPEND_DROP(block_list, block_dict);
+                }
+            break;
+
+            case FZ_STEXT_BLOCK_GRID:
+                if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
+                {
+                    block_dict = PyDict_New();
+                    block_n++;
+                    DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
+                    DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n));
+                    JM_make_grid_block(block, block_dict);
+                    LIST_APPEND_DROP(block_list, block_dict);
+                }
+            break;
+        }
+        block = block->next;
+    }
+    return block_n;
+}
+
 void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw)
 {
     fz_context* ctx = mupdf::internal_context_get();
     fz_stext_block *block;
     fz_buffer *text_buffer = fz_new_buffer(ctx, 128);
-    PyObject *block_dict, *block_list = PyList_New(0);
+    PyObject *block_list = PyList_New(0);
     fz_rect tp_rect = tp->mediabox;
+    block = tp->first_block;
     int block_n = -1;
-    for (block = tp->first_block; block; block = block->next) {
-        block_n++;
-        if (!fz_contains_rect(tp_rect, block->bbox) &&
-            !fz_is_infinite_rect(tp_rect) &&
-            block->type == FZ_STEXT_BLOCK_IMAGE) {
-            continue;
-        }
-        if (!fz_is_infinite_rect(tp_rect) &&
-            fz_is_empty_rect(fz_intersect_rect(tp_rect, block->bbox))) {
-            continue;
-        }
-
-        block_dict = PyDict_New();
-        DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n));
-        DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
-        if (block->type == FZ_STEXT_BLOCK_IMAGE) {
-            DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox));
-            JM_make_image_block(block, block_dict);
-        } else {
-            JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect);
-        }
-
-        LIST_APPEND_DROP(block_list, block_dict);
-    }
+    block_n = _as_dict(block_list, block, text_buffer, raw, tp_rect, block_n);
     DICT_SETITEM_DROP(page_dict, dictkey_blocks, block_list);
     fz_drop_buffer(ctx, text_buffer);
 }
@@ -4270,6 +4504,7 @@ fz_stext_page* page_get_textpage(
         PyObject* matrix
         );
 
+void make_table_dict(fz_stext_page *tp, PyObject *table_dict, PyObject *bbox);
 void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw);
 PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y);
 int pixmap_n(mupdf::FzPixmap& pixmap);
diff --git a/src/table.py b/src/table.py
index 3de8b2c86..9b73782d3 100644
--- a/src/table.py
+++ b/src/table.py
@@ -80,39 +80,121 @@
 from dataclasses import dataclass
 from operator import itemgetter
 import weakref
+import pymupdf
+from pymupdf import mupdf
 
 # -------------------------------------------------------------------
 # Start of PyMuPDF interface code
 # -------------------------------------------------------------------
-from . import (
-    Rect,
-    Matrix,
-    TEXTFLAGS_TEXT,
-    TEXT_FONT_BOLD,
-    TEXT_FONT_ITALIC,
-    TEXT_FONT_MONOSPACED,
-    TEXT_FONT_SUPERSCRIPT,
-    TEXT_COLLECT_STYLES,
-    TOOLS,
-    EMPTY_RECT,
-    sRGB_to_pdf,
-    Point,
-    message,
-    mupdf,
-)
 
 EDGES = []  # vector graphics from PyMuPDF
 CHARS = []  # text characters from PyMuPDF
 TEXTPAGE = None
 TEXT_BOLD = mupdf.FZ_STEXT_BOLD
 TEXT_STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT
-FLAGS = TEXTFLAGS_TEXT | TEXT_COLLECT_STYLES
-
+FLAGS = (
+    0
+    | pymupdf.TEXTFLAGS_TEXT
+    | pymupdf.TEXT_COLLECT_STYLES
+    | pymupdf.TEXT_ACCURATE_BBOXES
+    | pymupdf.TEXT_MEDIABOX_CLIP
+)
+# needed by mupdf function fz_find_table_within_bounds().
+TABLE_DETECTOR_FLAGS = (
+    0
+    | pymupdf.TEXT_ACCURATE_BBOXES
+    | pymupdf.TEXT_SEGMENT
+    | pymupdf.TEXT_COLLECT_VECTORS
+    | pymupdf.TEXT_MEDIABOX_CLIP
+)
 white_spaces = set(string.whitespace)  # for checking white space only cells
 
 
+def _iou(r1, r2):
+    """Compute intersection over union of two rectangles."""
+    ix = max(0, min(r1[2], r2[2]) - max(r1[0], r2[0]))
+    iy = max(0, min(r1[3], r2[3]) - max(r1[1], r2[1]))
+    intersection = ix * iy  # intersection area
+    if not intersection:
+        return 0
+    area1 = (r1[2] - r1[0]) * (r1[3] - r1[1])
+    area2 = (r2[2] - r2[0]) * (r2[3] - r2[1])
+    return intersection / (area1 + area2 - intersection)
+
+
+def intersects_words_h(bbox, y, word_rects) -> bool:
+    """Check whether any of the words in bbox are cut through by
+    horizontal line y.
+    """
+    return any(r.y0 < y < r.y1 for r in word_rects if r in bbox)
+
+
+def get_table_dict_from_rect(textpage, rect):
+    """Extract MuPDF table structure information from a given rectangle."""
+    table_dict = {}
+    pymupdf.extra.make_table_dict(textpage.this.m_internal, table_dict, rect)
+    return table_dict
+
+
+def make_table_from_bbox(textpage, word_rects, rect):
+    """Detect table structure within a given rectangle."""
+    cells = []  # table cells as (x0,y0,x1,y1) tuples
+
+    # calls fz_find_table_within_bounds
+    block = get_table_dict_from_rect(textpage, rect)
+    # No table structure found if not a grid block
+    if block.get("type") != mupdf.FZ_STEXT_BLOCK_GRID:
+        return cells
+    bbox = pymupdf.Rect(block["bbox"])  # resulting table bbox
+
+    # lists of (pos,uncertainty) tuples
+    xpos = sorted(block["xpos"], key=lambda x: x[0])
+    ypos = sorted(block["ypos"], key=lambda y: y[0])
+
+    # maximum uncertainties in x and y directions
+    xmaxu, ymaxu = block["max_uncertain"]
+
+    # Modify ypos to remove uncertain positions, and y positions
+    # that cut through words.
+    nypos = []
+    for y, yunc in ypos:
+        if yunc > 0:  # allow no uncertain y values
+            continue
+        if intersects_words_h(bbox, y, word_rects):
+            continue  # allow no y that cuts through words
+        if nypos and (y - nypos[-1] < 3):
+            nypos[-1] = y  # snap close positions
+        else:
+            nypos.append(y)
+
+    # New max y uncertainty: 35% of remaining y positions.
+    # Omit x positions that intersect too many words, otherwise
+    # only remove x for the affected cells.
+    ymaxu = max(0, round((len(nypos) - 2) * 0.35))
+
+    # Exclude x positions with too high uncertainty
+    # (we allow more uncertainty in x direction)
+    nxpos = [x[0] for x in xpos if x[1] <= ymaxu]
+    if bbox.x1 > nxpos[-1] + 3:
+        nxpos.append(bbox.x1)  # ensure right table border
+
+    # Compose cells from the remaining x and y positions.
+    for i in range(len(nypos) - 1):
+        row_box = pymupdf.Rect(bbox.x0, nypos[i], bbox.x1, nypos[i + 1])
+        # Sub-select words in this row and sort them by left coordinate
+        row_words = sorted([r for r in word_rects if r in row_box], key=lambda r: r.x0)
+        # Sub-select x values that do not cut through words
+        this_xpos = [x for x in nxpos if not any(r.x0 < x < r.x1 for r in row_words)]
+        for j in range(len(this_xpos) - 1):
+            cell = pymupdf.Rect(this_xpos[j], nypos[i], this_xpos[j + 1], nypos[i + 1])
+            if not cell.is_empty:  # valid cell
+                cells.append(tuple(cell))
+    # Add new table to TableFinder tables
+    return cells
+
+
 def extract_cells(textpage, cell, markdown=False):
-    """Extract text from a rect-like 'cell' as plain or MD style text.
+    """Extract text from a rect-like 'cell' as plain or MD styled text.
 
     This function should ultimately be used to extract text from a table cell.
     Markdown output will only work correctly if extraction flag bit
@@ -171,9 +253,12 @@ def extract_cells(textpage, cell, markdown=False):
                 # only include chars with more than 50% bbox overlap
                 span_text = ""
                 for char in span["chars"]:
-                    bbox = Rect(char["bbox"])
+                    this_char = char["c"]
+                    bbox = pymupdf.Rect(char["bbox"])
                     if abs(bbox & cell) > 0.5 * abs(bbox):
-                        span_text += char["c"]
+                        span_text += this_char
+                    elif this_char in white_spaces:
+                        span_text += " "
 
                 if not span_text:
                     continue  # skip empty span
@@ -190,10 +275,10 @@ def extract_cells(textpage, cell, markdown=False):
                 if span["char_flags"] & TEXT_BOLD:
                     prefix += "**"
                     suffix = "**" + suffix
-                if span["flags"] & TEXT_FONT_ITALIC:
+                if span["flags"] & pymupdf.TEXT_FONT_ITALIC:
                     prefix += "_"
                     suffix = "_" + suffix
-                if span["flags"] & TEXT_FONT_MONOSPACED:
+                if span["flags"] & pymupdf.TEXT_FONT_MONOSPACED:
                     prefix += "`"
                     suffix = "`" + suffix
 
@@ -1358,7 +1443,7 @@ def bbox_to_corners(bbox) -> tuple:
     # PyMuPDF modification:
     # Remove tables without text or having only 1 column
     for i in range(len(tables) - 1, -1, -1):
-        r = EMPTY_RECT()
+        r = pymupdf.EMPTY_RECT()
         x1_vals = set()
         x0_vals = set()
         for c in tables[i]:
@@ -1556,7 +1641,7 @@ def to_pandas(self, **kwargs):
         try:
             import pandas as pd
         except ModuleNotFoundError:
-            message("Package 'pandas' is not installed")
+            pymupdf.message("Package 'pandas' is not installed")
             raise
 
         pd_dict = {}
@@ -1618,7 +1703,7 @@ def top_row_bg_color(self):
             above. If different, return True indicating that the original
             table top row is already the header.
             """
-            bbox0 = Rect(self.rows[0].bbox)
+            bbox0 = pymupdf.Rect(self.rows[0].bbox)
             bboxt = bbox0 + (0, -bbox0.height, 0, -bbox0.height)  # area above
             top_color0 = page.get_pixmap(clip=bbox0).color_topusage()[1]
             top_colort = page.get_pixmap(clip=bboxt).color_topusage()[1]
@@ -1636,15 +1721,17 @@ def row_has_bold(bbox):
 
             Returns True if any spans are bold else False.
             """
-            blocks = page.get_text("dict", flags=TEXTFLAGS_TEXT, clip=bbox)["blocks"]
+            blocks = page.get_text("dict", flags=pymupdf.TEXTFLAGS_TEXT, clip=bbox)[
+                "blocks"
+            ]
             spans = [s for b in blocks for l in b["lines"] for s in l["spans"]]
 
-            return any(s["flags"] & TEXT_FONT_BOLD for s in spans)
+            return any(s["flags"] & pymupdf.TEXT_FONT_BOLD for s in spans)
 
         try:
             row = self.rows[0]
             cells = row.cells
-            bbox = Rect(row.bbox)
+            bbox = pymupdf.Rect(row.bbox)
         except IndexError:  # this table has no rows
             return None
 
@@ -1686,7 +1773,9 @@ def row_has_bold(bbox):
         clip.y0 = 0  # start at top of page
         clip.y1 = bbox.y0  # end at top of table
 
-        blocks = page.get_text("dict", clip=clip, flags=TEXTFLAGS_TEXT)["blocks"]
+        blocks = page.get_text("dict", clip=clip, flags=pymupdf.TEXTFLAGS_TEXT)[
+            "blocks"
+        ]
         # non-empty, non-superscript spans above table, sorted descending by y1
         spans = sorted(
             [
@@ -1696,7 +1785,7 @@ def row_has_bold(bbox):
                 for s in l["spans"]
                 if not (
                     white_spaces.issuperset(s["text"])
-                    or s["flags"] & TEXT_FONT_SUPERSCRIPT
+                    or s["flags"] & pymupdf.TEXT_FONT_SUPERSCRIPT
                 )
             ],
             key=lambda s: s["bbox"][3],
@@ -1712,7 +1801,7 @@ def row_has_bold(bbox):
             s = spans[i]
             y1 = s["bbox"][3]  # span bottom
             h = y1 - s["bbox"][1]  # span bbox height
-            bold = s["flags"] & TEXT_FONT_BOLD
+            bold = s["flags"] & pymupdf.TEXT_FONT_BOLD
 
             # use first item to start the lists
             if i == 0:
@@ -1759,7 +1848,7 @@ def row_has_bold(bbox):
             return header_top_row
 
         # re-compute clip above table
-        nclip = EMPTY_RECT()
+        nclip = pymupdf.EMPTY_RECT()
         for s in [s for s in spans if s["bbox"][3] >= select[-1]]:
             nclip |= s["bbox"]
         if not nclip.is_empty:
@@ -1768,7 +1857,7 @@ def row_has_bold(bbox):
         clip.y1 = bbox.y0  # make sure we still include every word above
 
         # Confirm that no word in clip is intersecting a column separator
-        word_rects = [Rect(w[:4]) for w in page.get_text("words", clip=clip)]
+        word_rects = [pymupdf.Rect(w[:4]) for w in page.get_text("words", clip=clip)]
         word_tops = sorted(list(set([r[1] for r in word_rects])), reverse=True)
 
         select = []
@@ -2074,7 +2163,7 @@ def make_chars(page, clip=None):
         for line in block["lines"]:
             ldir = line["dir"]  # = (cosine, sine) of angle
             ldir = (round(ldir[0], 4), round(ldir[1], 4))
-            matrix = Matrix(ldir[0], -ldir[1], ldir[1], ldir[0], 0, 0)
+            matrix = pymupdf.Matrix(ldir[0], -ldir[1], ldir[1], ldir[0], 0, 0)
             if ldir[1] == 0:
                 upright = True
             else:
@@ -2082,11 +2171,11 @@ def make_chars(page, clip=None):
             for span in sorted(line["spans"], key=lambda s: s["bbox"][0]):
                 fontname = span["font"]
                 fontsize = span["size"]
-                color = sRGB_to_pdf(span["color"])
+                color = pymupdf.sRGB_to_pdf(span["color"])
                 for char in sorted(span["chars"], key=lambda c: c["bbox"][0]):
-                    bbox = Rect(char["bbox"])
+                    bbox = pymupdf.Rect(char["bbox"])
                     bbox_ctm = bbox * ctm
-                    origin = Point(char["origin"]) * ctm
+                    origin = pymupdf.Point(char["origin"]) * ctm
                     matrix.e = origin.x
                     matrix.f = origin.y
                     text = char["c"]
@@ -2136,9 +2225,9 @@ def make_edges(page, clip=None, tset=None, paths=None, add_lines=None, add_boxes
     prect = page.rect
     if page.rotation in (90, 270):
         w, h = prect.br
-        prect = Rect(0, 0, h, w)
+        prect = pymupdf.Rect(0, 0, h, w)
     if clip is not None:
-        clip = Rect(clip)
+        clip = pymupdf.Rect(clip)
     else:
         clip = prect
 
@@ -2309,8 +2398,8 @@ def make_line(p, p1, p2, clip):
                     rect.width <= min_length and rect.width < rect.height
                 ):  # simulates a vertical line
                     x = abs(rect.x1 + rect.x0) / 2  # take middle value for x
-                    p1 = Point(x, rect.y0)
-                    p2 = Point(x, rect.y1)
+                    p1 = pymupdf.Point(x, rect.y0)
+                    p2 = pymupdf.Point(x, rect.y1)
                     line_dict = make_line(p, p1, p2, clip)
                     if line_dict:
                         EDGES.append(line_to_edge(line_dict))
@@ -2320,8 +2409,8 @@ def make_line(p, p1, p2, clip):
                     rect.height <= min_length and rect.height < rect.width
                 ):  # simulates a horizontal line
                     y = abs(rect.y1 + rect.y0) / 2  # take middle value for y
-                    p1 = Point(rect.x0, y)
-                    p2 = Point(rect.x1, y)
+                    p1 = pymupdf.Point(rect.x0, y)
+                    p2 = pymupdf.Point(rect.x1, y)
                     line_dict = make_line(p, p1, p2, clip)
                     if line_dict:
                         EDGES.append(line_to_edge(line_dict))
@@ -2386,8 +2475,8 @@ def make_line(p, p1, p2, clip):
     else:
         add_lines = []
     for p1, p2 in add_lines:
-        p1 = Point(p1)
-        p2 = Point(p2)
+        p1 = pymupdf.Point(p1)
+        p2 = pymupdf.Point(p2)
         line_dict = make_line(path, p1, p2, clip)
         if line_dict:
             EDGES.append(line_to_edge(line_dict))
@@ -2397,7 +2486,7 @@ def make_line(p, p1, p2, clip):
     else:
         add_boxes = []
     for box in add_boxes:
-        r = Rect(box)
+        r = pymupdf.Rect(box)
         line_dict = make_line(path, r.tl, r.bl, clip)
         if line_dict:
             EDGES.append(line_to_edge(line_dict))
@@ -2426,17 +2515,17 @@ def page_rotation_set0(page):
 
     if rot == 90:
         # before derotation, shift content horizontally
-        mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
+        mat0 = pymupdf.Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
     elif rot == 270:
         # before derotation, shift content vertically
-        mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
+        mat0 = pymupdf.Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
     else:
-        mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
+        mat0 = pymupdf.Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
 
     # prefix with derotation matrix
     mat = mat0 * page.derotation_matrix
     cmd = b"%g %g %g %g %g %g cm " % tuple(mat)
-    xref = TOOLS._insert_contents(page, cmd, 0)
+    xref = pymupdf.TOOLS._insert_contents(page, cmd, 0)
 
     # swap x- and y-coordinates
     if rot in (90, 270):
@@ -2496,11 +2585,12 @@ def find_tables(
     add_boxes=None,  # user-specified rectangles
     paths=None,  # accept vector graphics as parameter
 ):
+    pymupdf._warn_layout_once()
     global CHARS, EDGES
     CHARS = []
     EDGES = []
-    old_small = bool(TOOLS.set_small_glyph_heights())  # save old value
-    TOOLS.set_small_glyph_heights(True)  # we need minimum bboxes
+    old_small = bool(pymupdf.TOOLS.set_small_glyph_heights())  # save old value
+    pymupdf.TOOLS.set_small_glyph_heights(True)  # we need minimum bboxes
     if page.rotation != 0:
         page, old_xref, old_rot, old_mediabox = page_rotation_set0(page)
     else:
@@ -2543,21 +2633,65 @@ def find_tables(
         "text_x_tolerance": text_x_tolerance,
         "text_y_tolerance": text_y_tolerance,
     }
-    tset = TableSettings.resolve(settings=settings)
-    page.table_settings = tset
-
-    make_chars(page, clip=clip)  # create character list of page
-    make_edges(
-        page,
-        clip=clip,
-        tset=tset,
-        paths=paths,
-        add_lines=add_lines,
-        add_boxes=add_boxes,
-    )  # create lines and curves
-    tables = TableFinder(page, settings=tset)
-
-    TOOLS.set_small_glyph_heights(old_small)
-    if old_xref is not None:
-        page = page_rotation_reset(page, old_xref, old_rot, old_mediabox)
-    return tables
+
+    old_quad_corrections = pymupdf.TOOLS.unset_quad_corrections()
+    try:
+        page.get_layout()
+        if page.layout_information:
+            pymupdf.TOOLS.unset_quad_corrections(True)
+            boxes = [
+                pymupdf.Rect(b[:4]) for b in page.layout_information if b[-1] == "table"
+            ]
+        else:
+            boxes = []
+
+        if boxes:  # layout did find some tables
+            pass
+        elif page.layout_information is not None:
+            # layout was executed but found no tables
+            # make sure we exit quickly with an empty TableFinder
+            tbf = TableFinder(page)
+            return tbf
+
+        tset = TableSettings.resolve(settings=settings)
+        page.table_settings = tset
+
+        make_chars(page, clip=clip)  # create character list of page
+        make_edges(
+            page,
+            clip=clip,
+            tset=tset,
+            paths=paths,
+            add_lines=add_lines,
+            add_boxes=add_boxes,
+        )  # create lines and curves
+
+        tbf = TableFinder(page, settings=tset)
+
+        if boxes:
+            # only keep Finder tables that match a layout box
+            tbf.tables = [
+                tab
+                for tab in tbf.tables
+                if any(_iou(tab.bbox, r) >= 0.6 for r in boxes)
+            ]
+        # build the complementary list of layout table boxes
+        my_boxes = [
+            r for r in boxes if all(_iou(r, tab.bbox) < 0.6 for tab in tbf.tables)
+        ]
+        if my_boxes:
+            word_rects = [pymupdf.Rect(w[:4]) for w in TEXTPAGE.extractWORDS()]
+            tp2 = page.get_textpage(flags=TABLE_DETECTOR_FLAGS)
+        for rect in my_boxes:
+            cells = make_table_from_bbox(tp2, word_rects, rect)  # pylint: disable=E0606
+            tbf.tables.append(Table(page, cells))
+    except Exception as e:
+        pymupdf.message("find_tables: exception occurred: %s" % str(e))
+        return None
+    finally:
+        pymupdf.TOOLS.set_small_glyph_heights(old_small)
+        if old_xref is not None:
+            page = page_rotation_reset(page, old_xref, old_rot, old_mediabox)
+        pymupdf.TOOLS.unset_quad_corrections(old_quad_corrections)
+
+    return tbf