Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 85 additions & 42 deletions app/tools/page_numbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,29 +122,35 @@ def _run(self):
out_path = self._resolve_output_file(self.drop_out, pdf_path)
if not out_path: return

fmt_template = _FORMATS[self.cmb_format.currentIndex()][1]
pos_code = _POSITIONS[self.cmb_position.currentIndex()][1]
font_size = self.spin_size.value()
start_page = self.spin_start_page.value() - 1 # 0-indexed
start_num = self.spin_start_number.value()
margin = max(18, font_size + 8)
txt = self.edit_pages.text().strip()

# ── Phase 1 (main thread): scan for existing numbers and prompt.
# The scan reads only a thin band at the chosen edge of each
# target page, so it stays fast enough not to need a worker.
# The user-visible Yes/No/Cancel decision must run on the main
# thread anyway, and re-entering the worker for a second phase
# would add complexity without a perceived speedup.
try:
import fitz, re
with self._open_fitz(pdf_path) as doc:
total = doc.page_count

txt = self.edit_pages.text().strip()
targets = set(parse_pages(txt, total)) if txt else set(range(total))

fmt_template = _FORMATS[self.cmb_format.currentIndex()][1]
pos_code = _POSITIONS[self.cmb_position.currentIndex()][1]
font_size = self.spin_size.value()
start_page = self.spin_start_page.value() - 1 # 0-indexed
start_num = self.spin_start_number.value()
margin = max(18, font_size + 8)

# ── detect existing page numbers in the chosen margin band ──
band_h = max(50, font_size * 4)
num_re = re.compile(
r"^\s*(?:\d+\s*(?:/\s*\d+)?|"
r"(?:page|página|pagina|seite|stránka)\s+\d+(?:\s+(?:of|de|sur|von|di|van)\s+\d+)?)\s*$",
re.IGNORECASE,
)
existing = [] # list of (page_idx, [fitz.Rect])
# Plain (x0, y0, x1, y1) tuples — no fitz.Rect objects
# leak past the `with` block; the worker reconstructs
# them after re-opening the doc.
existing: list = []
for i in range(total):
if i not in targets or i < start_page:
continue
Expand All @@ -162,45 +168,68 @@ def _run(self):
for span in line.get("spans", []):
stxt = span.get("text", "").strip()
if stxt and num_re.match(stxt):
hits.append(fitz.Rect(span["bbox"]))
hits.append(tuple(span["bbox"]))
if hits:
existing.append((i, hits))

replace = False
if existing:
ans = QMessageBox.question(
self, t("msg.warning"),
t("tool.page_numbers.existing_found", n=len(existing)),
QMessageBox.StandardButton.Yes
| QMessageBox.StandardButton.No
| QMessageBox.StandardButton.Cancel,
)
if ans == QMessageBox.StandardButton.Cancel:
return
replace = (ans == QMessageBox.StandardButton.Yes)

except Exception as e:
QMessageBox.critical(self, t("msg.error"), str(e))
return

# numbered_total = how many pages will actually receive a number
numbered_total = sum(1 for i in range(total)
if i in targets and i >= start_page)
if numbered_total == 0:
QMessageBox.warning(self, t("msg.warning"),
t("tool.page_numbers.no_targets"))
return

replace = False
if existing:
ans = QMessageBox.question(
self, t("msg.warning"),
t("tool.page_numbers.existing_found", n=len(existing)),
QMessageBox.StandardButton.Yes
| QMessageBox.StandardButton.No
| QMessageBox.StandardButton.Cancel,
)
if ans == QMessageBox.StandardButton.Cancel:
return
replace = (ans == QMessageBox.StandardButton.Yes)

pwd = self._pdf_password

# ── Phase 2 (worker thread): apply redactions + insert numbers.
# This is the slow part — apply_redactions rasterises the
# affected regions and insert_text touches every target page.
def do_work(worker):
import fitz
doc = fitz.open(pdf_path)
if doc.needs_pass and pwd:
doc.authenticate(pwd)
try:
if replace:
for pg_idx, rects in existing:
if worker.is_cancelled():
return None
pg = doc[pg_idx]
for r in rects:
pg.add_redact_annot(r, fill=(1, 1, 1))
for bbox in rects:
pg.add_redact_annot(fitz.Rect(*bbox), fill=(1, 1, 1))
pg.apply_redactions()

# numbered_total = how many pages will actually be numbered
numbered_total = sum(1 for i in range(total)
if i in targets and i >= start_page)

counter = 0
for i in range(total):
if i not in targets or i < start_page:
continue
if worker.is_cancelled():
return None
counter += 1
n_display = start_num + counter - 1
label = fmt_template.format(n=n_display, total=numbered_total + start_num - 1)
label = fmt_template.format(
n=n_display, total=numbered_total + start_num - 1)

page = doc[i]
rect = page.rect
# Estimate text width (rough: 0.5 * font_size per char for Helvetica)
# Estimate text width (rough: 0.5 * font_size per char)
tw = len(label) * font_size * 0.5
if pos_code[0] == "t":
y = margin
Expand All @@ -214,14 +243,28 @@ def _run(self):
x = rect.width - margin - tw

page.insert_text(fitz.Point(x, y), label,
fontsize=font_size, fontname="helv", color=(0, 0, 0))

fontsize=font_size, fontname="helv",
color=(0, 0, 0))
worker.progress.emit(counter,
t("progress.page_numbers.page",
current=counter,
total=numbered_total))

if worker.is_cancelled():
return None
doc.save(out_path, garbage=4, deflate=True)
self._status(f"✔ → {os.path.basename(out_path)}")
msg = t("tool.page_numbers.done", path=out_path)
finally:
doc.close()
return out_path

def on_done(saved):
self._status(f"✔ → {os.path.basename(saved)}")
msg = t("tool.page_numbers.done", path=saved)
if self._pipeline_active:
self._pipeline_success(msg, out_path)
self._pipeline_success(msg, saved)
else:
QMessageBox.information(self, t("msg.done"), msg)
except Exception as e:
QMessageBox.critical(self, t("msg.error"), str(e))

self._run_background(do_work, total=numbered_total,
label=t("progress.page_numbers.applying"),
on_done=on_done)
Loading