Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/pixmap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,9 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work".
367 ns ± 1.75 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
In [4]: %timeit len(pix.samples)
3.52 ms ± 57.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

After the Pixmap has been destroyed, any attempt to use the memoryview
will fail with ValueError.

:type: memoryview

Expand All @@ -559,6 +562,9 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work".
img = QtGui.QImage(pix.samples_ptr, pix.width, pix.height, format) # (2)

Both of the above lead to the same Qt image, but (2) can be **many hundred times faster**, because it avoids an additional copy of the pixel area.

Warning: after the Pixmap has been destroyed, the Python pointer will be
invalid and attempting to use it may crash the Python interpreter.

:type: int

Expand Down
41 changes: 27 additions & 14 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4513,7 +4513,7 @@ def get_page_fonts(self, pno: int, full: bool =False) -> list:
exception_info()
raise ValueError("need a Page or page number")
val = self._getPageInfo(pno, 1)
if full is False:
if not full:
return [v[:-1] for v in val]
return val

Expand All @@ -4525,7 +4525,7 @@ def get_page_images(self, pno: int, full: bool =False) -> list:
if not self.is_pdf:
return ()
val = self._getPageInfo(pno, 2)
if full is False:
if not full:
return [v[:-1] for v in val]
return val

Expand Down Expand Up @@ -6720,7 +6720,7 @@ def __bool__(self):
def __eq__(self, mat):
if not hasattr(mat, "__len__"):
return False
return len(mat) == 6 and bool(self - mat) is False
return len(mat) == 6 and not (self - mat)

def __getitem__(self, i):
return (self.a, self.b, self.c, self.d, self.e, self.f)[i]
Expand Down Expand Up @@ -9277,7 +9277,7 @@ def get_image_bbox(self, name, transform=0):
else:
raise ValueError("found multiple images named '%s'." % name)
xref = item[-1]
if xref != 0 or transform is True:
if xref != 0 or transform:
try:
return self.get_image_rects(item, transform=transform)[0]
except Exception:
Expand Down Expand Up @@ -10059,6 +10059,9 @@ def __init__(self, *args):
# data. Doesn't seem to make much difference to Pixmap.set_pixel() so
# not currently used.
self._memory_view = None

# Cache for property `self.samples_mv`.
self._samples_mv = None

def __len__(self):
return self.size
Expand Down Expand Up @@ -10339,7 +10342,13 @@ def samples_mv(self):
'''
Pixmap samples memoryview.
'''
return mupdf.fz_pixmap_samples_memoryview(self.this)
# We remember the returned memoryview so that our `__del__()` can
# release it; otherwise accessing it after we have been destructed will
# fail, possibly crashing Python; this is #4155.
#
if self._samples_mv is None:
self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this)
return self._samples_mv

@property
def samples_ptr(self):
Expand Down Expand Up @@ -10625,6 +10634,10 @@ def yres(self):

width = w
height = h

def __del__(self):
if self._samples_mv:
self._samples_mv.release()


del Point
Expand All @@ -10646,7 +10659,7 @@ def __bool__(self):
def __eq__(self, p):
if not hasattr(p, "__len__"):
return False
return len(p) == 2 and bool(self - p) is False
return len(p) == 2 and not (self - p)

def __getitem__(self, i):
return (self.x, self.y)[i]
Expand Down Expand Up @@ -10677,7 +10690,7 @@ def __init__(self, *args, x=None, y=None):
self.x = l.x
self.y = l.y
else:
if hasattr(l, "__getitem__") is False:
if not hasattr(l, "__getitem__"):
raise ValueError("Point: bad args")
if len(l) != 2:
raise ValueError("Point: bad seq len")
Expand Down Expand Up @@ -10891,7 +10904,7 @@ def __init__(self, *args, ul=None, ur=None, ll=None, lr=None):
if isinstance(l, mupdf.FzQuad):
self.this = l
self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr)
elif hasattr(l, "__getitem__") is False:
elif not hasattr(l, "__getitem__"):
raise ValueError("Quad: bad args")
elif len(l) != 4:
raise ValueError("Quad: bad seq len")
Expand Down Expand Up @@ -11092,7 +11105,7 @@ def __contains__(self, x):
def __eq__(self, rect):
if not hasattr(rect, "__len__"):
return False
return len(rect) == 4 and bool(self - rect) is False
return len(rect) == 4 and not (self - rect)

def __getitem__(self, i):
return (self.x0, self.y0, self.x1, self.y1)[i]
Expand Down Expand Up @@ -12593,7 +12606,7 @@ def extractDICT(self, cb=None, sort=False) -> dict:
if cb is not None:
val["width"] = cb.width
val["height"] = cb.height
if sort is True:
if sort:
blocks = val["blocks"]
blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
val["blocks"] = blocks
Expand Down Expand Up @@ -12659,7 +12672,7 @@ def default(self, s):
if cb is not None:
val["width"] = cb.width
val["height"] = cb.height
if sort is True:
if sort:
blocks = val["blocks"]
blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
val["blocks"] = blocks
Expand All @@ -12673,7 +12686,7 @@ def extractRAWDICT(self, cb=None, sort=False) -> dict:
if cb is not None:
val["width"] = cb.width
val["height"] = cb.height
if sort is True:
if sort:
blocks = val["blocks"]
blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
val["blocks"] = blocks
Expand All @@ -12693,7 +12706,7 @@ def default(self,s):
if cb is not None:
val["width"] = cb.width
val["height"] = cb.height
if sort is True:
if sort:
blocks = val["blocks"]
blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
val["blocks"] = blocks
Expand All @@ -12708,7 +12721,7 @@ def extractSelection(self, pointa, pointb):

def extractText(self, sort=False) -> str:
"""Return simple, bare text on the page."""
if sort is False:
if not sort:
return self._extractText(0)
blocks = self.extractBLOCKS()[:]
blocks.sort(key=lambda b: (b[3], b[0]))
Expand Down
20 changes: 11 additions & 9 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ def get_text_blocks(
blocks = tp.extractBLOCKS()
if textpage is None:
del tp
if sort is True:
if sort:
blocks.sort(key=lambda b: (b[3], b[0]))
return blocks

Expand Down Expand Up @@ -571,7 +571,7 @@ def sort_words(words):

if textpage is None:
del tp
if words and sort is True:
if words and sort:
# advanced sort if any words found
words = sort_words(words)

Expand Down Expand Up @@ -771,7 +771,7 @@ def full_ocr(page, dpi, language, flags):
return tpage

# if OCR for the full page, OCR its pixmap @ desired dpi
if full is True:
if full:
return full_ocr(page, dpi, language, flags)

# For partial OCR, make a normal textpage, then extend it with text that
Expand Down Expand Up @@ -948,7 +948,7 @@ def get_text(
page, clip=clip, flags=flags, textpage=textpage, sort=sort
)

if option == "text" and sort is True:
if option == "text" and sort:
return get_sorted_text(
page,
clip=clip,
Expand Down Expand Up @@ -1227,7 +1227,7 @@ def recurse(olItem, liste, lvl):
lvl = 1
liste = []
toc = recurse(olItem, liste, lvl)
if doc.is_pdf and simple is False:
if doc.is_pdf and not simple:
doc._extend_toc_items(toc)
return toc

Expand Down Expand Up @@ -4561,7 +4561,7 @@ def remove_hidden(cont_lines):
if doc.is_encrypted or doc.is_closed:
raise ValueError("closed or encrypted doc")

if clean_pages is False:
if not clean_pages:
hidden_text = False
redactions = False

Expand Down Expand Up @@ -4848,9 +4848,11 @@ def output_justify(start, line):
nlines = len(new_lines)
if nlines > max_lines:
msg = "Only fitting %i of %i lines." % (max_lines, nlines)
if warn is True:
if warn is None:
pass
elif warn:
pymupdf.message("Warning: " + msg)
elif warn is False:
else:
raise ValueError(msg)

start = pymupdf.Point()
Expand Down Expand Up @@ -5561,7 +5563,7 @@ def subset_fonts(doc: pymupdf.Document, verbose: bool = False, fallback: bool =
# Once the sets of used unicodes and glyphs are known, we compute a
# smaller version of the buffer user package fontTools.

if fallback is False: # by default use MuPDF function
if not fallback: # by default use MuPDF function
pdf = mupdf.pdf_document_from_fz_document(doc)
mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
return
Expand Down
18 changes: 18 additions & 0 deletions tests/test_pixmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,3 +428,21 @@ def test_3854():
assert rms < 1
else:
assert rms == 0


def test_4155():
path = os.path.normpath(f'{__file__}/../../tests/resources/test_3854.pdf')
with pymupdf.open(path) as document:
page = document[0]
pixmap = page.get_pixmap()
mv = pixmap.samples_mv
mvb1 = mv.tobytes()
del page
del pixmap
try:
mvb2 = mv.tobytes()
except ValueError as e:
print(f'Received exception: {e}')
assert 'operation forbidden on released memoryview object' in str(e)
else:
assert 0, f'Did not receive expected exception when using defunct memoryview.'
Loading