pymupdf · julian-smith-artifex-com · Dec 12, 2022 · Dec 7, 2022 · Dec 9, 2022 · Dec 9, 2022
diff --git a/README.md b/README.md
@@ -1,8 +1,9 @@
-# PyMuPDF 1.21.0
+# PyMuPDF 1.21.1rc1
 
 ![logo](https://artifex.com/images/logos/py-mupdf-github-icon.png)
 
-Release date: November  8, 2022
+
+Release date: December 12, 2022
 
 On **[PyPI](https://pypi.org/project/PyMuPDF)** since August 2016: [![Downloads](https://static.pepy.tech/personalized-badge/pymupdf?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads)](https://pepy.tech/project/pymupdf)
 

diff --git a/changes.txt b/changes.txt
@@ -1,8 +1,41 @@
 Change Log
 ==========
 
+**Changes in Version 1.21.1rc1 (2022-12-12)**
 
-**Changes in Version 1.21.0**
+* This release uses ``MuPDF-1.21.1``.
+
+* Bug fixes:
+
+  * **Fixed** `#2110 <https://github.com/pymupdf/PyMuPDF/issues/2110>`_: Fully embedded font is extracted only partially if it occupies more than one object
+  * **Fixed** `#2094 <https://github.com/pymupdf/PyMuPDF/issues/2094>`_: Rectangle Detection Logic
+  * **Fixed** `#2088 <https://github.com/pymupdf/PyMuPDF/issues/2088>`_: Destination point not set for named links in toc
+  * **Fixed** `#2087 <https://github.com/pymupdf/PyMuPDF/issues/2087>`_: Image with Filter "[/FlateDecode/JPXDecode]" not extracted
+  * **Fixed** `#2086 <https://github.com/pymupdf/PyMuPDF/issues/2086>`_: Document.save() owner_pw & user_pw has buffer overflow bug
+  * **Fixed** `#2076 <https://github.com/pymupdf/PyMuPDF/issues/2076>`_: Segfault in fitz.py
+  * **Fixed** `#2051 <https://github.com/pymupdf/PyMuPDF/issues/2051>`_: Missing DPI Parameter
+  * **Fixed** `#2048 <https://github.com/pymupdf/PyMuPDF/issues/2048>`_: Invalid size of TextPage and bbox with newest version 1.21.0
+  * **Fixed** `#2045 <https://github.com/pymupdf/PyMuPDF/issues/2045>`_: SystemError: <built-in function Page_get_texttrace> returned a result with an error set
+  * **Fixed** `#2039 <https://github.com/pymupdf/PyMuPDF/issues/2039>`_: 1.21.0 fails to build against system libmupdf
+  * **Fixed** `#2036 <https://github.com/pymupdf/PyMuPDF/issues/2036>`_: Archive::Archive defined twice
+
+* Other
+
+  * Swallow "&zoom=nan" in link uri strings.
+  * Add new Page utility methods ``Page.replace_image()`` and ``Page.delete_image()``.
+
+* Documentation:
+
+  * `#2040 <https://github.com/pymupdf/PyMuPDF/issues/2040>`_: Added note about test failure with non-default build of MuPDF, to ``tests/README.md``.
+  * `#2037 <https://github.com/pymupdf/PyMuPDF/issues/2037>`_: In ``docs/installation.rst``, mention incompatibility with chocolatey.org on Windows.
+  * `#2061 <https://github.com/pymupdf/PyMuPDF/issues/2061>`_: Fixed description of ``Annot.file_info``.
+  * `#2065 <https://github.com/pymupdf/PyMuPDF/issues/2065>`_: Show how to insert internal PDF link.
+  * Improved description of building from source without an sdist.
+  * Added information about running tests.
+  * `#2084 <https://github.com/pymupdf/PyMuPDF/issues/2084>`_: Fixed broken link to PyMuPDF-Utilities.
+
+
+**Changes in Version 1.21.0 (2022-11-8)**
 
 * This release uses ``MuPDF-1.21.0``.
 

diff --git a/docs/changes.rst b/docs/changes.rst
@@ -3,8 +3,41 @@
 Change Log
 ==========
 
+**Changes in Version 1.21.1rc1 (2022-12-12)**
 
-**Changes in Version 1.21.0**
+* This release uses ``MuPDF-1.21.1``.
+
+* Bug fixes:
+
+  * **Fixed** `#2110 <https://github.com/pymupdf/PyMuPDF/issues/2110>`_: Fully embedded font is extracted only partially if it occupies more than one objects
+  * **Fixed** `#2094 <https://github.com/pymupdf/PyMuPDF/issues/2094>`_: Rectangle Detection Logic
+  * **Fixed** `#2088 <https://github.com/pymupdf/PyMuPDF/issues/2088>`_: Destination point not set for named links in toc
+  * **Fixed** `#2087 <https://github.com/pymupdf/PyMuPDF/issues/2087>`_: Image with Filter "[/FlateDecode/JPXDecode]" not extracted
+  * **Fixed** `#2086 <https://github.com/pymupdf/PyMuPDF/issues/2086>`_: Document.save() owner_pw & user_pw has buffer overflow bug
+  * **Fixed** `#2076 <https://github.com/pymupdf/PyMuPDF/issues/2076>`_: Segfault in fitz.py
+  * **Fixed** `#2051 <https://github.com/pymupdf/PyMuPDF/issues/2051>`_: Missing DPI Parameter
+  * **Fixed** `#2048 <https://github.com/pymupdf/PyMuPDF/issues/2048>`_: Invalid size of TextPage and bbox with newest version 1.21.0
+  * **Fixed** `#2045 <https://github.com/pymupdf/PyMuPDF/issues/2045>`_: SystemError: <built-in function Page_get_texttrace> returned a result with an error set
+  * **Fixed** `#2039 <https://github.com/pymupdf/PyMuPDF/issues/2039>`_: 1.21.0 fails to build against system libmupdf
+  * **Fixed** `#2036 <https://github.com/pymupdf/PyMuPDF/issues/2036>`_: Archive::Archive defined twice
+
+* Other
+
+  * Swallow "&zoom=nan" in link uri strings.
+  * Add new Page utility methods ``Page.replace_image()`` and ``Page.delete_image()``.
+
+* Documentation:
+
+  * `#2040 <https://github.com/pymupdf/PyMuPDF/issues/2040>`_: Added note about test failure with non-default build of MuPDF, to ``tests/README.md``.
+  * `#2037 <https://github.com/pymupdf/PyMuPDF/issues/2037>`_: In ``docs/installation.rst``, mention incompatibility with chocolatey.org on Windows.
+  * `#2061 <https://github.com/pymupdf/PyMuPDF/issues/2061>`_: Fixed description of ``Annot.file_info``.
+  * `#2065 <https://github.com/pymupdf/PyMuPDF/issues/2065>`_: Show how to insert internal PDF link.
+  * Improved description of building from source without an sdist.
+  * Added information about running tests.
+  * `#2084 <https://github.com/pymupdf/PyMuPDF/issues/2084>`_: Fixed broken link to PyMuPDF-Utilities.
+
+
+**Changes in Version 1.21.0 (2022-11-8)**
 
 * This release uses ``MuPDF-1.21.0``.
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -43,7 +43,7 @@
 # built documents.
 #
 # The full version, including alpha/beta/rc tags.
-release = "1.21.0"
+release = "1.21.1rc1"
 
 # The short X.Y version
 version = release

diff --git a/docs/page.rst b/docs/page.rst
@@ -62,6 +62,7 @@ In a nutshell, this is what you can do with PyMuPDF:
 :meth:`Page.apply_redactions`      PDF olny: process the redactions of the page
 :meth:`Page.bound`                 rectangle of the page
 :meth:`Page.delete_annot`          PDF only: delete an annotation
+:meth:`Page.delete_image`          PDF only: delete an image
 :meth:`Page.delete_link`           PDF only: delete a link
 :meth:`Page.delete_widget`         PDF only: delete a widget / field
 :meth:`Page.draw_bezier`           PDF only: draw a cubic Bezier curve
@@ -100,6 +101,7 @@ In a nutshell, this is what you can do with PyMuPDF:
 :meth:`Page.load_widget`           PDF only: load a specific field
 :meth:`Page.load_links`            return the first link on a page
 :meth:`Page.new_shape`             PDF only: create a new :ref:`Shape`
+:meth:`Page.replace_image`         PDF only: replace an image
 :meth:`Page.search_for`            search for a string
 :meth:`Page.set_artbox`            PDF only: modify ``/ArtBox``
 :meth:`Page.set_bleedbox`          PDF only: modify ``/BleedBox``
@@ -949,6 +951,45 @@ In a nutshell, this is what you can do with PyMuPDF:
 
          6. Another efficient way to display the same image on multiple pages is another method: :meth:`show_pdf_page`. Consult :meth:`Document.convert_to_pdf` for how to obtain intermediary PDFs usable for that method. Demo script `fitz-logo.py <https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/demo/fitz-logo.py>`_ implements a fairly complete approach.
 
+
+   .. index::
+      pair: filename; replace_image
+      pair: pixmap; replace_image
+      pair: stream; replace_image
+      pair: xref; replace_image
+
+   .. method:: replace_image(xref, filename=None, pixmap=None, stream=None)
+
+      * New in v1.21.0
+
+      Replace the image at xref with another one.
+
+      :arg int xref: the :data:`xref` of the image.
+      :arg filename: the filename of the new image.
+      :arg pixmap: the :ref:`Pixmap` of the new image.
+      :arg stream: the memory area containing the new image.
+
+      Arguments ``filename``, ``pixmap``, ``stream`` have the same meaning as in :meth:`Page.insert_image`, especially exactly one of these must be provided.
+
+      This is a **global replacement:** the new image will also be shown wherever the old one has been displayed throughout the file.
+
+      This method mainly exists for technical purposes. Typical uses include replacing large images by smaller versions, like a lower resolution, graylevel instead of colored, etc., or changing transparency.
+
+
+   .. index::
+      pair: xref; delete_image
+
+   .. method:: delete_image(xref)
+
+      * New in v1.21.0
+
+      Delete the image at xref. This is slightly misleading: actually the image is being replaced with a small transparent :ref:`Pixmap` using above :meth:`Page.replace_image`. The visible effect however is equivalent.
+
+      :arg int xref: the :data:`xref` of the image.
+
+      This is a **global replacement:** the image will disappear wherever the old one has been displayed throughout the file.
+
+
    .. index::
       pair: blocks; Page.get_text
       pair: dict; Page.get_text

diff --git a/docs/version.rst b/docs/version.rst
@@ -1,6 +1,6 @@
 Covered Version
 --------------------
 
-This documentation covers PyMuPDF v1.21.0 features as of **2022-11-08 00:00:01**.
+This documentation covers PyMuPDF v1.21.1rc1 features as of **2022-12-12 00:00:01**.
 
 .. note:: The major and minor versions of **PyMuPDF** and **MuPDF** will always be the same. Only the third qualifier (patch level) may deviate from that of MuPDF.
diff --git a/fitz/__init__.py b/fitz/__init__.py
@@ -7,6 +7,7 @@
 # maintained and developed by Artifex Software, Inc. https://artifex.com.
 # ------------------------------------------------------------------------
 import sys
+
 from fitz.fitz import *
 
 # define the supported colorspaces for convenience
@@ -31,12 +32,16 @@
 # This atexit handler runs, but doesn't cause ~Tools() to be run.
 #
 import atexit
-def cleanup_tools( TOOLS):
-    #print(f'cleanup_tools: TOOLS={TOOLS} id(TOOLS)={id(TOOLS)}')
-    #print(f'TOOLS.thisown={TOOLS.thisown}')
+
+
+def cleanup_tools(TOOLS):
+    # print(f'cleanup_tools: TOOLS={TOOLS} id(TOOLS)={id(TOOLS)}')
+    # print(f'TOOLS.thisown={TOOLS.thisown}')
     del TOOLS
     del fitz.TOOLS
-atexit.register( cleanup_tools, TOOLS)
+
+
+atexit.register(cleanup_tools, TOOLS)
 
 if fitz.VersionFitz != fitz.TOOLS.mupdf_version():
     v1 = fitz.VersionFitz.split(".")
@@ -50,7 +55,6 @@ def cleanup_tools( TOOLS):
 # copy functions in 'utils' to their respective fitz classes
 import fitz.utils
 
-
 # ------------------------------------------------------------------------------
 # General
 # ------------------------------------------------------------------------------
@@ -127,6 +131,8 @@ def cleanup_tools( TOOLS):
 fitz.Page.get_label = fitz.utils.get_label
 fitz.Page.get_image_rects = fitz.utils.get_image_rects
 fitz.Page.get_textpage_ocr = fitz.utils.get_textpage_ocr
+fitz.Page.delete_image = fitz.utils.delete_image
+fitz.Page.replace_image = fitz.utils.replace_image
 
 # ------------------------------------------------------------------------
 # Annot

diff --git a/fitz/__main__.py b/fitz/__main__.py
@@ -512,7 +512,7 @@ def extract_objects(args):
                     if ext == "n/a" or not buffer:
                         continue
                     outname = os.path.join(
-                        out_dir, fontname.replace(" ", "-") + "." + ext
+                        out_dir, f"{fontname.replace(' ', '-')}-{xref}.{ext}"
                     )
                     outfile = open(outname, "wb")
                     outfile.write(buffer)

diff --git a/fitz/fitz.i b/fitz/fitz.i
@@ -2775,14 +2775,16 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not
 
                 if (pdf_is_jpx_image(gctx, obj)) {
                     img_type = FZ_IMAGE_JPX;
+                    res = pdf_load_stream(gctx, obj);
                     ext = "jpx";
                 }
                 if (JM_is_jbig2_image(gctx, obj)) {
                     img_type = FZ_IMAGE_JBIG2;
+                    res = pdf_load_stream(gctx, obj);
                     ext = "jb2";
                 }
-                res = pdf_load_raw_stream(gctx, obj);
                 if (img_type == FZ_IMAGE_UNKNOWN) {
+                    res = pdf_load_raw_stream(gctx, obj);
                     unsigned char *c = NULL;
                     fz_buffer_storage(gctx, res, &c);
                     img_type = fz_recognize_image_format(gctx, c);
@@ -2795,9 +2797,10 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not
                     res = fz_new_buffer_from_image_as_png(gctx, img,
                                 fz_default_color_params);
                     ext = "png";
-                } else /*if (smask == 0)*/ {
+                } else {
                     img = fz_new_image_from_buffer(gctx, res);
                 }
+
                 fz_image_resolution(img, &xres, &yres);
                 width = img->w;
                 height = img->h;
@@ -2835,7 +2838,8 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not
 
             fz_catch(gctx) {
                 Py_CLEAR(rc);
-                Py_RETURN_NONE;
+                fz_warn(gctx, "%s", fz_caught_message(gctx));
+                Py_RETURN_FALSE;
             }
             if (!rc)
                 Py_RETURN_NONE;
@@ -12332,6 +12336,7 @@ struct Archive
             }
             return (struct Archive *) arch;
         }
+
         Archive(PyObject *a0=NULL, const char *path=NULL)
         {
             fz_archive *arch=NULL;
@@ -13566,6 +13571,7 @@ struct Story
             return ret;
         }
 
+
         void draw( struct DeviceWrapper* device, PyObject* matrix=NULL)
         {
             fz_matrix ctm2 = JM_matrix_from_py( matrix);

diff --git a/fitz/helper-devices.i b/fitz/helper-devices.i
@@ -101,6 +101,7 @@ jm_checkrect()
 	dev_linecount = 0; // reset line count
 	long orientation = 0;
 	fz_point ll, lr, ur, ul;
+	fz_rect r;
 	PyObject *rect;
 	PyObject *line0, *line2;
 	PyObject *items = PyDict_GetItem(dev_pathdict, dictkey_items);
@@ -109,50 +110,35 @@ jm_checkrect()
 	line0 = PyList_GET_ITEM(items, len - 3);
 	ll = JM_point_from_py(PyTuple_GET_ITEM(line0, 1));
 	lr = JM_point_from_py(PyTuple_GET_ITEM(line0, 2));
-
+	// no need to extract "line1"!
 	line2 = PyList_GET_ITEM(items, len - 1);
 	ur = JM_point_from_py(PyTuple_GET_ITEM(line2, 1));
 	ul = JM_point_from_py(PyTuple_GET_ITEM(line2, 2));
 
 	/*
 	---------------------------------------------------------------------
-	Three connected lines: at least a quad! Check whether even a rect.
-	For this, the lines must be parallel to the axes.
 	Assumption:
 	For decomposing rects, MuPDF always starts with a horizontal line,
 	followed by a vertical line, followed by a horizontal line.
 	We will also check orientation of the enclosed area and add this info
 	as '+1' for anti-clockwise, '-1' for clockwise orientation.
 	---------------------------------------------------------------------
 	*/
-	if (ll.y != lr.y) {  // not horizontal
-		goto drop_out;
-	}
-	if (lr.x != ur.x) {  // not vertical
-		goto drop_out;
-	}
-	if (ur.y != ul.y) {  // not horizontal
-		goto drop_out;
+	if (ll.y != lr.y ||
+		ll.x != ul.x ||
+		ur.y != ul.y ||
+		ur.x != lr.x) {
+		goto drop_out;  // not a rectangle
 	}
-	// we have a rect, determine orientation
-	if (ll.x < lr.x) {  // move left to right
-		if (lr.y > ur.y) {  // move upwards
-			orientation = 1;
-		} else {
-			orientation = -1;
-		}
-	} else {  // move right to left
-		if (lr.y < ur.y) {  // move downwards
-			orientation = 1;
-		} else {
-			orientation = -1;
-		}
+
+	// we have a rect, replace last 3 "l" items by one "re" item.
+	if (ul.y < lr.y) {
+		r = fz_make_rect(ul.x, ul.y, lr.x, lr.y);
+		orientation = 1;
+	} else {
+		r = fz_make_rect(ll.x, ll.y, ur.x, ur.y);
+		orientation = -1;
 	}
-	// Replace last 3 "l" items by one "re" item.
-	fz_rect r = fz_make_rect(ul.x, ul.y, ul.x, ul.y);
-	r = fz_include_point_in_rect(r, ur);
-	r = fz_include_point_in_rect(r, ll);
-	r = fz_include_point_in_rect(r, lr);
 	rect = PyTuple_New(3);
 	PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("re"));
 	PyTuple_SET_ITEM(rect, 1, JM_py_from_rect(r));