Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Change Log
**Changes in version 1.24.4 (2024-05-16)**

* **Fixed** `3418 <https://github.com/pymupdf/PyMuPDF/issues/3418>`_: Re-introduced bug, text align add_redact_annot
* **Fixed** `3472 <https://github.com/pymupdf/PyMuPDF/issues/3472>`_: insert_pdf gives SystemError

* Other:

Expand Down
12 changes: 6 additions & 6 deletions scripts/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def main(argv):
build_type = None
build_mupdf = True
gdb = False
test_pymupdf = True
test_fitz = True
implementations = None
test_names = list()
venv = 2
Expand All @@ -157,7 +157,7 @@ def main(argv):
elif arg == '-d':
build_type = 'debug'
elif arg == '-f':
test_pymupdf = int(next(args))
test_fitz = int(next(args))
elif arg in ('-h', '--help'):
show_help()
return
Expand Down Expand Up @@ -226,7 +226,7 @@ def do_test():
pytest_options=pytest_options,
timeout=timeout,
gdb=gdb,
test_pymupdf=test_pymupdf,
test_fitz=test_fitz,
)

for command in commands:
Expand Down Expand Up @@ -359,7 +359,7 @@ def test(
pytest_options=None,
timeout=None,
gdb=False,
test_pymupdf=True,
test_fitz=True,
):
'''
Args:
Expand All @@ -375,7 +375,7 @@ def test(
See top-level option `-p`.
gdb:
See top-level option `--gdb`.
test_pymupdf:
test_fitz:
See top-level option `-f`.
'''
pymupdf_dir_rel = gh_release.relpath(pymupdf_dir)
Expand Down Expand Up @@ -433,7 +433,7 @@ def test(
for p in glob.glob(f'{pymupdf_dir_rel}/tests/test_*_fitz.py'):
print(f'Removing {p=}')
os.remove(p)
if test_pymupdf:
if test_fitz:
# Create copies of each test file, modified to use `pymupdf`
# instead of `fitz`.
for p in glob.glob(f'{pymupdf_dir_rel}/tests/test_*.py'):
Expand Down
11 changes: 8 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,21 +606,26 @@ def add( ret, from_, to_):

if path_so_leaf_b:
# Add rebased implementation files.
add( ret_p, f'{g_root}/src/fitz___init__.py', 'fitz/__init__.py') # For `fitz` module alias.
add( ret_p, f'{g_root}/src/fitz_table.py', 'fitz/table.py') # For `fitz` module alias.
add( ret_p, f'{g_root}/src/fitz_utils.py', 'fitz/utils.py') # For `fitz` module alias.
to_dir = 'pymupdf/'
add( ret_p, f'{g_root}/src/__init__.py', to_dir)
add( ret_p, f'{g_root}/src/__main__.py', to_dir)
add( ret_p, f'{g_root}/src/pymupdf.py', to_dir)
add( ret_p, f'{g_root}/src/table.py', to_dir)
add( ret_p, f'{g_root}/src/utils.py', to_dir)
add( ret_p, f'{g_root}/src/_apply_pages.py', to_dir)
add( ret_p, f'{g_root}/src/build/extra.py', to_dir)
add( ret_p, f'{g_root}/src/build/{path_so_leaf_b}', to_dir)

# Add support for `fitz` backwards compatibility.
add( ret_p, f'{g_root}/src/fitz___init__.py', 'fitz/__init__.py')
add( ret_p, f'{g_root}/src/fitz_table.py', 'fitz/table.py')
add( ret_p, f'{g_root}/src/fitz_utils.py', 'fitz/utils.py')

if mupdf_local:
# Add MuPDF Python API.
add( ret_p, f'{mupdf_build_dir}/mupdf.py', to_dir)

# Add MuPDF shared libraries.
if windows:
wp = pipcl.wdev.WindowsPython()
add( ret_p, f'{mupdf_build_dir}/_mupdf.pyd', to_dir)
Expand Down
209 changes: 206 additions & 3 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import string
import sys
import tarfile
import time
import typing
import warnings
import weakref
Expand Down Expand Up @@ -61,8 +62,19 @@ def _set_stream(name, default):
_g_out_log = _set_stream('PYMUPDF_LOG', sys.stdout)
_g_out_message = _set_stream('PYMUPDF_MESSAGE', sys.stdout)

# Set to list() if we are in test suite.
_g_log_items = None
_g_log_items = list()
_g_log_items_active = False

def _log_items():
return _g_log_items

def _log_items_active(active):
global _g_log_items_active
_g_log_items_active = active

def _log_items_clear():
del _g_log_items[:]


def log( text='', caller=1):
'''
Expand All @@ -73,7 +85,7 @@ def log( text='', caller=1):
line = frame_record.lineno
function = frame_record.function
text = f'{filename}:{line}:{function}: {text}'
if _g_log_items is not None:
if _g_log_items_active:
_g_log_items.append(text)
print(text, file=_g_out_log)
_g_out_log.flush()
Expand Down Expand Up @@ -20942,6 +20954,197 @@ def vdist(dir, a, b):
return mupdf.fz_abs(dx * dir.y + dy * dir.x)


def apply_pages(
path,
pagefn,
*,
pagefn_args=(),
pagefn_kwargs=dict(),
initfn=None,
initfn_args=(),
initfn_kwargs=dict(),
pages=None,
method='single',
concurrency=None,
_stats=False,
):
'''
Returns list of results from `pagefn()`, optionally using concurrency for
speed.

Args:
path:
Path of document.
pagefn:
Function to call for each page; is passed (page, *pagefn_args,
**pagefn_kwargs). Return value is added to list that we return. If
`method` is not 'single', must be a top-level function - nested
functions don't work with concurrency.
pagefn_args
pagefn_kwargs:
Additional args to pass to `pagefn`. Must be picklable.
initfn:
If true, called once in each worker process; is passed
(*initfn_args, **initfn_kwargs).
initfn_args
initfn_kwargs:
Args to pass to initfn. Must be picklable.
pages:
List of page numbers to process, or None to include all pages.
method:
'single'
Do not use concurrency.
'mp'
Operate concurrently using Python's `multiprocessing` module.
'fork'
Operate concurrently using custom implementation with
`os.fork()`. Does not work on Windows.
concurrency:
Number of worker processes to use when operating concurrently. If
None, we use the number of available CPUs.
_stats:
Internal, may change or be removed. If true, we output simple
timing diagnostics.

Note: We require a file path rather than a Document, because Document
instances do not work properly after a fork - internal file descriptor
offsets are shared between the parent and child processes.
'''
if _stats:
t0 = time.time()

if method == 'single':
if initfn:
initfn(*initfn_args, **initfn_kwargs)
ret = list()
document = Document(path)
for page in document:
r = pagefn(page, *pagefn_args, **initfn_kwargs)
ret.append(r)

else:
# Use concurrency.
#
from . import _apply_pages

if pages is None:
if _stats:
t = time.time()
with Document(path) as document:
num_pages = len(document)
pages = list(range(num_pages))
if _stats:
t = time.time() - t
log(f'{t:.2f}s: count pages.')

if _stats:
t = time.time()

if method == 'mp':
ret = _apply_pages._multiprocessing(
path,
pages,
pagefn,
pagefn_args,
pagefn_kwargs,
initfn,
initfn_args,
initfn_kwargs,
concurrency,
_stats,
)

elif method == 'fork':
ret = _apply_pages._fork(
path,
pages,
pagefn,
pagefn_args,
pagefn_kwargs,
initfn,
initfn_args,
initfn_kwargs,
concurrency,
_stats,
)

else:
assert 0, f'Unrecognised {method=}.'

if _stats:
t = time.time() - t
log(f'{t:.2f}s: work.')

if _stats:
t = time.time() - t0
log(f'{t:.2f}s: total.')
return ret


def get_text(
path,
*,
pages=None,
method='single',
concurrency=None,

option='text',
clip=None,
flags=None,
textpage=None,
sort=False,
delimiters=None,

_stats=False,
):
'''
Returns list of results from `Page.get_text()`, optionally using
concurrency for speed.

Args:
path:
Path of document.
pages:
List of page numbers to process, or None to include all pages.
method:
'single'
Do not use concurrency.
'mp'
Operate concurrently using Python's `multiprocessing` module.
'fork'
Operate concurrently using custom implementation with
`os.fork`. Does not work on Windows.
concurrency:
Number of worker processes to use when operating concurrently. If
None, we use the number of available CPUs.
option
clip
flags
textpage
sort
delimiters:
Passed to internal calls to `Page.get_text()`.
'''
args_dict = dict(
option=option,
clip=clip,
flags=flags,
textpage=textpage,
sort=sort,
delimiters=delimiters,
)

return apply_pages(
path,
Page.get_text,
pagefn_kwargs=args_dict,
pages=pages,
method=method,
concurrency=concurrency,
_stats=_stats,
)


class TOOLS:
'''
We use @staticmethod to avoid the need to create an instance of this class.
Expand Down
Loading