From 06297717cfcc3c242618828be1a13db048e745fe Mon Sep 17 00:00:00 2001 From: ZLotusRain <1161525789@qq.com> Date: Wed, 4 Dec 2024 00:41:04 +0800 Subject: [PATCH] fix: don't use a new list to store result and don't modify the original item directly --- src/__init__.py | 436 ++++++++++++++++++++++++------------------------ 1 file changed, 217 insertions(+), 219 deletions(-) diff --git a/src/__init__.py b/src/__init__.py index 887c6c3fa..db99b2b8a 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -416,7 +416,7 @@ def _format_g(value, *, fmt='%g'): return mupdf.fz_format_double(fmt, value) else: return fmt % value - + format_g = _format_g # Names required by class method typing annotations. @@ -687,7 +687,7 @@ def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotat if g_exceptions_verbose: exception_info() message( f'cannot update annot: {e}') raise - + if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode return True @@ -703,7 +703,7 @@ def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotat resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources')) if not resources.m_internal: # no Resources yet: make one resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2) - + alp0 = mupdf.pdf_new_dict( page.doc(), 3) if opacity >= 0 and opacity < 1: mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity) @@ -1165,10 +1165,10 @@ def popup_rect(self): #log( '{rect=}') val = JM_py_from_rect(rect) #log( '{val=}') - + val = Rect(val) * self.get_parent().transformation_matrix val *= self.get_parent().derotation_matrix - + return val @property @@ -1191,7 +1191,7 @@ def rect(self): else: val = mupdf.pdf_bound_annot(self.this) val = Rect(val) - + # Caching self.parent_() reduces 1000x from 0.07 to 0.04. # p = self.get_parent() @@ -1467,7 +1467,7 @@ def set_rect(self, rect): """Set annotation rectangle.""" CheckParent(self) annot = self.this - + pdfpage = _pdf_annot_page(annot) rot = JM_rotate_page_matrix(pdfpage) r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot) @@ -1482,7 +1482,7 @@ def set_rect(self, rect): def set_rotation(self, rotate=0): """Set annotation rotation.""" CheckParent(self) - + annot = self.this type = mupdf.pdf_annot_type(annot) if type not in ( @@ -1856,7 +1856,7 @@ def update_timing_test(): for i in range( 30*1000): total += i return total - + @property def vertices(self): """annotation vertex points""" @@ -1881,7 +1881,7 @@ def vertices(self): if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L')) if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints')) if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL') - + if o.m_internal: # handle lists with 1-level depth # weiter @@ -1893,7 +1893,7 @@ def vertices(self): point = mupdf.fz_transform_point(point, page_ctm) res.append( (point.x, point.y)) return res - + o = mupdf.pdf_dict_gets(annot_obj, 'InkList') if o.m_internal: # InkList has 2-level lists @@ -1932,30 +1932,30 @@ def __init__( self, *args): self.this = mupdf.fz_new_multi_archive() if args: self.add( *args) - + def __repr__( self): return f'Archive, sub-archives: {len(self._subarchives)}' def _add_arch( self, subarch, path=None): mupdf.fz_mount_multi_archive( self.this, subarch, path) - + def _add_dir( self, folder, path=None): sub = mupdf.fz_open_directory( folder) mupdf.fz_mount_multi_archive( self.this, sub, path) - + def _add_treeitem( self, memory, name, path=None): buff = JM_BufferFromBytes( memory) sub = mupdf.fz_new_tree_archive( mupdf.FzTree()) mupdf.fz_tree_archive_add_buffer( sub, name, buff) mupdf.fz_mount_multi_archive( self.this, sub, path) - + def _add_ziptarfile( self, filepath, type_, path=None): if type_ == 1: sub = mupdf.fz_open_zip_archive( filepath) else: sub = mupdf.fz_open_tar_archive( filepath) mupdf.fz_mount_multi_archive( self.this, sub, path) - + def _add_ziptarmemory( self, memory, type_, path=None): buff = JM_BufferFromBytes( memory) stream = mupdf.fz_open_buffer( buff) @@ -1964,7 +1964,7 @@ def _add_ziptarmemory( self, memory, type_, path=None): else: sub = mupdf.fz_open_tar_archive_with_stream( stream) mupdf.fz_mount_multi_archive( self.this, sub, path) - + def add( self, content, path=None): ''' Add a sub-archive. @@ -2000,7 +2000,7 @@ def make_subarch(entries, mount, fmt): if isinstance(content, pathlib.Path): content = str(content) - + if isinstance(content, str): if os.path.isdir(content): self._add_dir(content, path) @@ -2044,7 +2044,7 @@ def make_subarch(entries, mount, fmt): elif isinstance(content, Archive): self._add_arch(content, path) return make_subarch([], path, 'multi') - + if isinstance(content, tuple) and len(content) == 2: # covers the tree item plus path data, name = content @@ -2059,13 +2059,13 @@ def make_subarch(entries, mount, fmt): else: assert 0, f'Unexpected {type(data)=}.' return make_subarch([name], path, 'tree') - + elif hasattr(content, '__getitem__'): # Deal with sequence of disparate items. for item in content: self.add(item, path) return - + else: raise TypeError(f'Unrecognised type {type(content)}.') assert 0 @@ -2076,10 +2076,10 @@ def entry_list( self): List of sub archives. ''' return self._subarchives - + def has_entry( self, name): return mupdf.fz_has_archive_entry( self.this, name) - + def read_entry( self, name): buff = mupdf.fz_read_archive_entry( self.this, name) return JM_BinFromBuffer( buff) @@ -2101,7 +2101,7 @@ def __init__( self, rhs): self.this = mupdf.fz_parse_xml_from_html5( buff) else: assert 0, f'Unsupported type for rhs: {type(rhs)}' - + def _get_node_tree( self): def show_node(node, items, shift): while node is not None: @@ -2123,7 +2123,7 @@ def show_node(node, items, shift): items = [] items = show_node(self, items, shift) return items - + def add_bullet_list(self): """Add bulleted list ("ul" tag)""" child = self.create_element("ul") @@ -2302,7 +2302,7 @@ def add_text(self, text): def append_child( self, child): mupdf.fz_dom_append_child( self.this, child.this) - + def append_styled_span(self, style): span = self.create_element("span") span.add_style(style) @@ -2314,11 +2314,11 @@ def append_styled_span(self, style): def bodytag( self): return Xml( mupdf.fz_dom_body( self.this)) - + def clone( self): ret = mupdf.fz_dom_clone( self.this) return Xml( ret) - + @staticmethod def color_text(color): if type(color) is str: @@ -2331,10 +2331,10 @@ def color_text(color): def create_element( self, tag): return Xml( mupdf.fz_dom_create_element( self.this, tag)) - + def create_text_node( self, text): return Xml( mupdf.fz_dom_create_text_node( self.this, text)) - + def debug(self): """Print a list of the node tree below self.""" items = self._get_node_tree() @@ -2345,12 +2345,12 @@ def find( self, tag, att, match): ret = mupdf.fz_dom_find( self.this, tag, att, match) if ret.m_internal: return Xml( ret) - + def find_next( self, tag, att, match): ret = mupdf.fz_dom_find_next( self.this, tag, att, match) if ret.m_internal: return Xml( ret) - + @property def first_child( self): if mupdf.fz_xml_text( self.this): @@ -2359,11 +2359,11 @@ def first_child( self): ret = mupdf.fz_dom_first_child( self) if ret.m_internal: return Xml( ret) - + def get_attribute_value( self, key): assert key return mupdf.fz_dom_attribute( self.this, key) - + def get_attributes( self): if mupdf.fz_xml_text( self.this): # text node, has no attributes. @@ -2377,13 +2377,13 @@ def get_attributes( self): result[ key] = val i += 1 return result - + def insert_after( self, node): mupdf.fz_dom_insert_after( self.this, node.this) - + def insert_before( self, node): mupdf.fz_dom_insert_before( self.this, node.this) - + def insert_text(self, text): lines = text.splitlines() line_count = len(lines) @@ -2415,30 +2415,30 @@ def next( self): ret = mupdf.fz_dom_next( self.this) if ret.m_internal: return Xml( ret) - + @property def parent( self): ret = mupdf.fz_dom_parent( self.this) if ret.m_internal: return Xml( ret) - + @property def previous( self): ret = mupdf.fz_dom_previous( self.this) if ret.m_internal: return Xml( ret) - + def remove( self): mupdf.fz_dom_remove( self.this) - + def remove_attribute( self, key): assert key mupdf.fz_dom_remove_attribute( self.this, key) - + @property def root( self): return Xml( mupdf.fz_xml_root( self.this)) - + def set_align(self, align): """Set text alignment via CSS style""" text = "text-align: %s" @@ -2461,7 +2461,7 @@ def set_align(self, align): def set_attribute( self, key, value): assert key mupdf.fz_dom_add_attribute( self.this, key, value) - + def set_bgcolor(self, color): """Set background color via CSS style""" text = f"background-color: %s" % self.color_text(color) @@ -2688,11 +2688,11 @@ def span_bottom(self): @property def tagname( self): return mupdf.fz_xml_tag( self.this) - + @property def text( self): return mupdf.fz_xml_text( self.this) - + add_var = add_code add_samp = add_code add_kbd = add_code @@ -2859,16 +2859,16 @@ def __exit__(self, *args): @typing.overload def __getitem__(self, i: int = 0) -> Page: ... - + if sys.version_info >= (3, 9): @typing.overload def __getitem__(self, i: slice) -> list[Page]: ... - + @typing.overload def __getitem__(self, i: tuple[int, int]) -> Page: ... - + def __getitem__(self, i=0): if isinstance(i, slice): return [self[j] for j in range(*i.indices(len(self)))] @@ -2913,11 +2913,11 @@ def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0 self.this = pdf_document self.this_is_pdf = True return - + # Classic implementation temporarily sets JM_mupdf_show_errors=0 then # restores the previous value in `fz_always() {...}` before returning. # - + if not filename or type(filename) is str: pass elif hasattr(filename, "absolute"): @@ -2926,7 +2926,7 @@ def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0 filename = filename.name else: raise TypeError(f"bad filename: {type(filename)=} {filename=}.") - + if stream is not None: if type(stream) is bytes: self.stream = stream @@ -2956,7 +2956,7 @@ def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0 elif not os.path.isfile(filename): msg = f"'{filename}' is no file" raise FileDataError(msg) - + if from_file and os.path.getsize(filename) == 0: raise EmptyFileError(f'Cannot open empty file: {filename=}.') if type(self.stream) is bytes and len(self.stream) == 0: @@ -3082,7 +3082,7 @@ def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0 self.page_count2 = extra.page_count_fz finally: JM_mupdf_show_errors = JM_mupdf_show_errors_old - + def __len__(self) -> int: return self.page_count @@ -4746,7 +4746,7 @@ def insert_pdf( raise TypeError( "source or target not a PDF") ENSURE_OPERATION(pdfout) JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap) - + #log( 'insert_pdf(): calling self._reset_page_refs()') self._reset_page_refs() if links: @@ -5105,16 +5105,16 @@ def move_page(self, pno: int, to: int =-1): @property def name(self): return self._name - + def need_appearances(self, value=None): """Get/set the NeedAppearances value.""" if not self.is_form_pdf: return None - + pdf = _as_pdf_document(self) oldval = -1 appkey = "NeedAppearances" - + form = mupdf.pdf_dict_getp( mupdf.pdf_trailer(pdf), "Root/AcroForm", @@ -5163,7 +5163,7 @@ def next_location(self, page_id): def page_annot_xrefs(self, n): if g_use_extra: return extra.page_annot_xrefs( self.this, n) - + if isinstance(self.this, mupdf.PdfDocument): page_count = mupdf.pdf_count_pages(self.this) pdf_document = self.this @@ -5365,7 +5365,7 @@ def reload_page(self, page: Page) -> Page: pno = page.number # save the page number for k, v in page._annot_refs.items(): # save the annot dictionary old_annots[k] = v - + # When we call `self.load_page()` below, it will end up in # fz_load_chapter_page(), which will return any matching page in the # document's list of non-ref-counted loaded pages, instead of actually @@ -5389,10 +5389,10 @@ def reload_page(self, page: Page) -> Page: # `fz_page*` - the original was not freed, so a new `fz_page` cannot # reuse the same block of memory. # - + refs_old = page.this.m_internal.refs m_internal_old = page.this.m_internal_value() - + page.this = None page._erase() # remove the page page = None @@ -5620,7 +5620,7 @@ def save( raise ValueError("incremental needs original file") if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40: raise ValueError("password length must not exceed 40") - + pdf = _as_pdf_document(self) opts = mupdf.PdfWriteOptions() opts.do_incremental = incremental @@ -6205,9 +6205,9 @@ def xref_xml_metadata(self): if xml.m_internal: xref = mupdf.pdf_to_num( xml) return xref - + __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__') - + outline = property(lambda self: self._outline) tobytes = write is_stream = xref_is_stream @@ -6245,16 +6245,16 @@ def __init__(self, path, options=''): self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF) assert out.m_internal_value() == 0 assert hasattr( self.this, '_out') - + def begin_page( self, mediabox): mediabox2 = JM_rect_from_py(mediabox) device = mupdf.fz_begin_page( self.this, mediabox2) device_wrapper = DeviceWrapper( device) return device_wrapper - + def close( self): mupdf.fz_close_document_writer( self.this) - + def end_page( self): mupdf.fz_end_page( self.this) @@ -6278,7 +6278,7 @@ def __init__( is_serif=0, embed=1, ): - + if fontbuffer: if hasattr(fontbuffer, "getvalue"): fontbuffer = fontbuffer.getvalue() @@ -6286,7 +6286,7 @@ def __init__( fontbuffer = bytes(fontbuffer) if not isinstance(fontbuffer, bytes): raise ValueError("bad type: 'fontbuffer'") - + if isinstance(fontname, str): fname_lower = fontname.lower() if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower: @@ -6327,7 +6327,7 @@ def ascender(self): @property def bbox(self): return self.this.fz_font_bbox() - + @property def buffer(self): buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer)) @@ -6466,7 +6466,7 @@ def is_writable(self): ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags) else: ft_substitute = flags.ft_substitute - + if ( mupdf.ll_fz_font_t3_procs(font.m_internal) or ft_substitute or not mupdf.pdf_font_writing_supported(font) @@ -6580,7 +6580,7 @@ def _setBorder(self, border, doc, xref): return b = JM_annot_set_border(border, pdf, link_obj) return b - + @property def border(self): return self._border(self.parent.parent.this, self.xref) @@ -6749,7 +6749,7 @@ def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None): Matrix(Matrix) - new copy Matrix(sequence) - from 'sequence' Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix. - + Explicit keyword args a, b, c, d, e, f override any earlier settings if not None. """ @@ -6785,7 +6785,7 @@ def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None): float(args[1]), float(args[0]), 1.0, 0.0, 0.0 else: raise ValueError("Matrix: bad args") - + # Override with explicit args if specified. if a is not None: self.a = a if b is not None: self.b = b @@ -7000,7 +7000,7 @@ def __init__(self, obj, rlink, document=None): self.page = obj.page self.rb = Point(0, 0) self.uri = obj.uri - + def uri_to_dict(uri): items = self.uri[1:].split('&') ret = dict() @@ -7022,7 +7022,7 @@ def unescape(name): newname += chr(int(piece, base=16)) newname += item[2:] return newname - + if rlink and not self.uri.startswith("#"): self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}" if obj.is_external: @@ -7325,7 +7325,7 @@ def next(self): def on_state(self): """Return the "On" value for button widgets. - + This is useful for radio buttons mainly. Checkboxes will always return "Yes". Radio buttons will return the string that is unequal to "Off" as returned by method button_states(). @@ -7400,7 +7400,7 @@ def destination(self, document): kind=LINK_NAMED. ''' return linkDest(self, None, document) - + @property def down(self): ol = self.this @@ -7482,7 +7482,7 @@ def _make_PdfFilterOptions( filter_.recurse = recurse filter_.instance_forms = instance_forms filter_.ascii = ascii - + filter_.no_update = no_update if sanitize: # We want to use a PdfFilterFactory whose `.filter` fn pointer is @@ -8148,7 +8148,7 @@ def _insert_image(self, colorspace = image.colorspace() xres, yres = mupdf.fz_image_resolution(image) mask = mupdf.fz_new_image_from_buffer(maskbuf) - + # mupdf.ll_fz_new_image_from_compressed_buffer() is not usable. zimg = extra.fz_new_image_from_compressed_buffer( w, @@ -8181,9 +8181,9 @@ def _insert_image(self, # Python? # image.m_internal = None - + image = zimg - + if do_have_image: #log( 'do_have_image') ref = mupdf.pdf_add_image(pdf, image) @@ -8675,7 +8675,7 @@ def annot_xrefs(self): List of xref numbers of annotations, fields and links. ''' return JM_get_annot_xref_list2(self) - + def annots(self, types=None): """ Generator over the annotations of a page. @@ -8718,7 +8718,7 @@ def bound(self): page = _as_fz_page(self.this) val = mupdf.fz_bound_page(page) val = Rect(val) - + if val.is_infinite and self.parent.is_pdf: cb = self.cropbox w, h = cb.width, cb.height @@ -8727,7 +8727,7 @@ def bound(self): val = Rect(0, 0, w, h) msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1] message(msg) - + return val def clean_contents(self, sanitize=1): @@ -8738,7 +8738,7 @@ def clean_contents(self, sanitize=1): return filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize) mupdf.pdf_filter_page_contents( page.doc(), page, filter_) - + @property def cropbox(self): """The CropBox.""" @@ -8992,20 +8992,18 @@ def get_drawings(self, extended: bool=False) -> list: npath["rect"] = Rect(npath["rect"]) else: npath["scissor"] = Rect(npath["scissor"]) - if npath["type"]!="group": + if npath["type"] != "group": items = npath["items"] - newitems = [] - for item in items: - cmd = item[0] - rest = item[1:] - if cmd == "re": - item = ("re", Rect(rest[0]).normalize(), rest[1]) + for idx in range(len(items)): + item = items[idx] + cmd, rest = item[0], item[1:] + if cmd == "re": + newitem = ("re", Rect(rest[0]).normalize(), rest[1]) elif cmd == "qu": - item = ("qu", Quad(rest[0])) + newitem = ("qu", Quad(rest[0])) else: - item = tuple([cmd] + [Point(i) for i in rest]) - newitems.append(item) - npath["items"] = newitems + newitem = tuple([cmd] + [Point(i) for i in rest]) + items[idx] = newitem if npath['type'] in ('f', 's'): for k in allkeys: npath[k] = npath.get(k) @@ -9017,7 +9015,7 @@ class Drawpath(object): """Reflects a path dictionary from get_cdrawings().""" def __init__(self, **args): self.__dict__.update(args) - + class Drawpathlist(object): """List of Path objects representing get_cdrawings() output.""" def __getitem__(self, item): @@ -9141,7 +9139,7 @@ def get_lineart(self) -> object: item = tuple([cmd] + [Point(i) for i in rest]) newitems.append(item) npath.items = newitems - + if npath.type == "f": npath.stroke_opacity = None npath.dashes = None @@ -9892,7 +9890,7 @@ def __init__(self, *args): spix = spix.this if not mupdf.fz_pixmap_colorspace(spix).m_internal: raise ValueError( "source colorspace must not be None") - + if cs.m_internal: self.this = mupdf.fz_convert_pixmap( spix, @@ -9936,7 +9934,7 @@ def __init__(self, *args): else: spix, w, h, clip = args bbox = JM_irect_from_py(clip) - + spix, w, h, clip = args src_pix = spix.this if isinstance(spix, Pixmap) else spix bbox = JM_irect_from_py(clip) @@ -10435,7 +10433,7 @@ def save(self, filename, output=None, jpg_quality=95): "jpg": 7, "jpeg": 7, } - + if type(filename) is str: pass elif hasattr(filename, "absolute"): @@ -10670,7 +10668,7 @@ def tint_with(self, black, white): def w(self): """The width.""" return mupdf.fz_pixmap_width(self.this) - + def warp(self, quad, width, height): """Return pixmap from a warped quad.""" if not quad.is_convex: raise ValueError("quad must be convex") @@ -10954,7 +10952,7 @@ def __init__(self, *args, ul=None, ur=None, ll=None, lr=None): Explicit keyword args ul, ur, ll, lr override earlier settings if not None. - + ''' if not args: self.ul = self.ur = self.ll = self.lr = Point() @@ -11124,7 +11122,7 @@ def transform(self, m): class Rect: - + def __abs__(self): if self.is_empty or self.is_infinite: return 0.0 @@ -11185,7 +11183,7 @@ def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): Rect(top-left, bottom-right) Rect(Rect or IRect) - new copy Rect(sequence) - from 'sequence' - + Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings if not None. """ @@ -11373,7 +11371,7 @@ def top_left(self): def top_right(self): """Top-right corner.""" return Point(self.x1, self.y0) - + def torect(self, r): """Return matrix that converts to target rect.""" @@ -11561,7 +11559,7 @@ def draw_rect(self, rect: rect_like):# -> Point: r = Rect(rect) args = JM_TUPLE(list(r.bl * self.ipctm) + [r.width, r.height]) self.draw_cont += _format_g(args) + " re\n" - + self.updateRect(r) self.last_point = r.tl return self.last_point @@ -12032,7 +12030,7 @@ def __init__( self, html='', user_css=None, em=12, archive=None): self.this = mupdf.FzStoryS( buffer_, user_css, em, arch) else: self.this = mupdf.FzStory( buffer_, user_css, em, arch) - + def add_header_ids(self): ''' Look for `