Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions app/base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""PDFApps – BasePage: standard page layout (header + scroll + action bar)."""

import contextlib
import os
import subprocess
import sys
import tempfile
import shutil
from typing import Iterable

from PySide6.QtCore import Qt, QTimer, Signal
from shiboken6 import isValid
Expand All @@ -15,6 +17,12 @@
from app.i18n import t
from app.utils import ToolHeader, ActionBar, scrolled, _paint_bg

# Iterable is referenced via string-typed annotations in
# _atomic_pdf_write / _check_not_same_path; keep it importable so
# tooling that resolves forward refs (e.g. typing.get_type_hints)
# finds the symbol.
__all__ = ["BasePage", "Iterable"]


def _reveal_file(path: str) -> None:
"""Open the OS file manager and highlight the given file when possible.
Expand Down Expand Up @@ -333,6 +341,104 @@ def _open_fitz(self, path: str):
doc.authenticate(self._pdf_password)
return doc

def _clear_pdf_password(self) -> None:
"""Best-effort wipe of the cached PDF password from memory.

Thin wrapper around :func:`app.utils.wipe_pdf_password` so every
close / reload path (closeEvent, ``_close_pdf``, loading a
different file) can drop the cached password uniformly. See the
helper docstring for the immutability caveat.
"""
from app.utils import wipe_pdf_password
wipe_pdf_password(self)

# ── safe PDF writer ────────────────────────────────────────────────────

@staticmethod
def _check_not_same_path(dst: str,
sources: "Iterable[str] | None" = None) -> None:
"""Raise RuntimeError if ``dst`` resolves to any of ``sources``.

Shared invariant for every tool that takes a PDF in and writes
a result back to disk: if the user picks the same path for
input and output, opening the output for writing truncates the
input before the writer's lazy stream reads complete and we
get silent dataloss + corrupted output.
"""
try:
dst_real = os.path.realpath(dst)
except OSError:
return
for src in (sources or ()):
if not src:
continue
try:
if os.path.realpath(src) == dst_real:
raise RuntimeError(t("tool.err.same_source_output"))
except OSError:
continue

@staticmethod
def _atomic_pdf_write(writer, dst: str, *,
sources: "Iterable[str] | None" = None,
save_opts: "dict | None" = None) -> None:
"""Write a PdfWriter (pypdf) or fitz.Document to ``dst`` atomically.

Two defensive layers fix the silent dataloss bug where opening
``open(dst, "wb")`` truncates the input file BEFORE the writer's
lazy stream reads complete (PdfWriter holds references into
the PdfReader; same applies to fitz.Document.save() with
incremental flags).

1. Reject up-front if ``dst`` resolves to any path in
``sources`` (via ``os.path.realpath``) β€” this catches the
"user picked the same path for input and output" case which
was producing corrupt output + losing the original.

2. Write to a same-directory tempfile and atomically rename to
``dst`` via :func:`os.replace` (works on POSIX and Windows).

``writer`` may be a pypdf ``PdfWriter`` (uses ``writer.write(fh)``)
or a PyMuPDF ``fitz.Document`` (uses ``writer.save(tmp)``).
Anything else with a ``.write(fh)`` method is accepted.

Raises :class:`RuntimeError` with a translated message when the
same-source check fails; the caller's existing ``show_error``
path surfaces it as a friendly dialog.
"""
BasePage._check_not_same_path(dst, sources)

dst_dir = os.path.dirname(dst) or os.getcwd()
# mkstemp returns an OS-level fd; close via os.fdopen so the
# writer can stream into it. Same-volume placement guarantees
# os.replace() stays atomic.
fd, tmp = tempfile.mkstemp(suffix=".pdf", dir=dst_dir)
# Detect fitz.Document via its module to avoid importing fitz
# at base.py load time (every page imports BasePage). Modern
# PyMuPDF reports module="pymupdf"; legacy versions used "fitz".
# Both expose Document.save(path, ...).
writer_mod = type(writer).__module__
is_fitz_doc = (writer_mod.startswith("pymupdf")
or writer_mod.startswith("fitz")) and hasattr(writer, "save")
try:
if is_fitz_doc:
# fitz.Document.save(path, ...) accepts a filesystem
# path and writes through cleanly. We close the fd
# we opened first so save() can take exclusive access.
os.close(fd)
writer.save(tmp, **(save_opts or {}))
else:
# pypdf.PdfWriter (and anything else with .write(fh))
# streams into the open file handle.
with os.fdopen(fd, "wb") as fh:
writer.write(fh)
os.replace(tmp, dst)
except Exception:
with contextlib.suppress(Exception):
if os.path.exists(tmp):
os.unlink(tmp)
raise

# ── background-task helper ────────────────────────────────────────────

def _run_background(self, do_work_fn, total: int, label: str,
Expand Down
9 changes: 9 additions & 0 deletions app/editor/dialogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,15 @@ def _on_accept(self):
from app.i18n import save_signature
save_signature(tmp)

# Restrict the in-flight tmp signature to user-only on POSIX so
# other users on the host cannot read it before the editor
# stamps it into the PDF. NTFS ACLs already cover the Windows
# case via the profile owner; chmod there is a no-op.
try:
os.chmod(tmp, 0o600)
except OSError:
pass

self._result_path = tmp
self.accept()

Expand Down
55 changes: 52 additions & 3 deletions app/editor/tab.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""PDFApps – TabEditar: visual PDF editor tool tab."""

import contextlib
import os
import tempfile

Expand All @@ -25,6 +26,10 @@ class TabEditar(QWidget):
"""Visual editor: click/drag directly on the rendered PDF."""

_MAX_REDO = 100 # cap redo history to avoid unbounded memory growth
# Cap pending-edit history to keep memory bounded in long sessions and to
# release temp signature/image files (which edits hold paths to) once
# they fall out of the rolling window. Trimmed FIFO β€” oldest first.
_MAX_PENDING = 500

_HI_COLORS_KEYS = ["color.yellow", "color.green", "color.pink", "color.light_blue"]
_HI_COLORS_VALS = [(1,1,0), (0,1,0), (1,0.4,0.7), (0.5,0.8,1)]
Expand Down Expand Up @@ -619,6 +624,18 @@ def _close_pdf(self):
self._lbl_info.setText("")
self._page_idx = 0
self._update_nav()
# Drop the cached password so a memory dump after the user
# closes the file no longer surfaces it (R5/D2).
self._clear_pdf_password()

def _clear_pdf_password(self) -> None:
"""Mirror of ``BasePage._clear_pdf_password`` β€” EditorTab does not
inherit from BasePage so we provide the same hook locally and
delegate to the shared :func:`app.utils.wipe_pdf_password`
helper, keeping a single implementation across the codebase.
"""
from app.utils import wipe_pdf_password
wipe_pdf_password(self)

def _pick_image(self):
p, _ = QFileDialog.getOpenFileName(self, t("edit.image"), DESKTOP,
Expand Down Expand Up @@ -806,9 +823,41 @@ def _on_point(self, page_idx, pdf_pt):
def _on_text_edit_committed(self, page_idx, edit):
self._add(edit)

def _add(self, edit: dict):
self._redo_stack.clear()
def _add(self, edit: dict, *, _from_redo: bool = False):
# _from_redo=True is set by _redo() so consecutive redos don't
# wipe the redo stack. Previously _redo() called _add(), and the
# first line below cleared the remaining redo entries β€” meaning
# after a single redo all the others were silently discarded.
if not _from_redo:
self._redo_stack.clear()
self._pending.append(edit)
# Trim oldest edits once we cross the cap. Without this the list
# grew unbounded across long sessions and retained references to
# temp signature/image files until the tab closed.
if len(self._pending) > self._MAX_PENDING:
to_drop = self._pending[:-self._MAX_PENDING]
self._pending = self._pending[-self._MAX_PENDING:]
# Best-effort cleanup of temp paths owned by the dropped
# entries. Only files inside the system tempdir are touched β€”
# the user's source image/signature picks must never be
# deleted from disk.
tmp_root = os.path.normcase(tempfile.gettempdir())
for old in to_drop:
with contextlib.suppress(Exception):
p = old.get("path")
if (p and os.path.isfile(p)
and os.path.normcase(p).startswith(tmp_root)):
os.unlink(p)
# Mirror the trim in the visible list widget so the labels
# stay in sync with self._pending indices. We use
# ``len(to_drop)`` rather than ``count() - len(_pending)``
# because the addItem for the *new* edit happens below: at
# this point _pending already has the trimmed length but
# _pending_list still holds the pre-trim row count, so the
# diff would be off by one and the next _undo would remove
# the wrong label (PR-B revisor finding #1).
for _ in range(len(to_drop)):
self._pending_list.takeItem(0)
# Each entry's base label is fully translated via edit.label.*;
# the page suffix (" β€” p. N") comes from a shared key so all
# locales decide their own dash/spacing/abbreviation.
Expand Down Expand Up @@ -847,7 +896,7 @@ def _redo(self):
if not self._redo_stack:
return
edit = self._redo_stack.pop()
self._add(edit)
self._add(edit, _from_redo=True)

def _on_note_deleted(self, overlay: dict):
"""Remove a deleted note from the pending edits list."""
Expand Down
16 changes: 15 additions & 1 deletion app/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,24 @@ def get_saved_signature() -> str | None:


def save_signature(img_path: str):
"""Copy signature image to persistent location."""
"""Copy signature image to persistent location.

The persistent path is restricted to user-only access (``0o600``) on
POSIX so other users on the same host cannot read the cached
signature. Windows file permissions are governed by NTFS ACLs and
the user profile inherits owner-only access by default β€” chmod is
a no-op there but harmless.
"""
import shutil
os.makedirs(os.path.dirname(_SIGNATURE_PATH), exist_ok=True)
shutil.copy2(img_path, _SIGNATURE_PATH)
try:
os.chmod(_SIGNATURE_PATH, 0o600)
except OSError:
# Some filesystems (FAT/exFAT on USB sticks) ignore chmod and
# raise. Permission hardening is best-effort; the copy itself
# succeeded so we must not surface this as an error.
pass


def clear_saved_signature():
Expand Down
4 changes: 2 additions & 2 deletions app/tools/encrypt.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def _run(self):
w = PdfWriter(); w.append(reader)
w.encrypt(user_password=user_pwd,
owner_password=owner, algorithm="AES-256")
with open(out_path, "wb") as f: w.write(f)
self._atomic_pdf_write(w, out_path, sources=[pdf_path])
self._status(t("tool.encrypt.status.done",
name=os.path.basename(out_path)))
msg = t("tool.encrypt.done_enc", path=out_path)
Expand All @@ -145,7 +145,7 @@ def _run(self):
QMessageBox.warning(self, t("msg.warning"), t("tool.encrypt.wrong_pass"))
return
w = PdfWriter(); w.append(reader)
with open(out_path, "wb") as f: w.write(f)
self._atomic_pdf_write(w, out_path, sources=[pdf_path])
self._status(t("tool.encrypt.status.done",
name=os.path.basename(out_path)))
msg = t("tool.encrypt.done_dec", path=out_path)
Expand Down
2 changes: 1 addition & 1 deletion app/tools/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _run(self):
pages = parse_pages(txt, len(reader.pages))
w = PdfWriter()
for p in pages: w.add_page(reader.pages[p])
with open(out_path, "wb") as f: w.write(f)
self._atomic_pdf_write(w, out_path, sources=[pdf_path])
self._status(t("tool.extract.status.done",
n=len(pages), name=os.path.basename(out_path)))
msg = t("tool.extract.done", n=len(pages), path=out_path)
Expand Down
2 changes: 1 addition & 1 deletion app/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _run(self):
reader.decrypt(pwd)
for page in reader.pages:
w.add_page(page)
with open(out, "wb") as f: w.write(f)
self._atomic_pdf_write(w, out, sources=paths)
self._status(t("tool.merge.status.done", name=os.path.basename(out)))
QMessageBox.information(self, t("msg.done"), t("tool.merge.done", path=out))
except Exception as e: show_error(self, e)
21 changes: 17 additions & 4 deletions app/tools/ocr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""PDFApps – TabOCR: OCR text recognition tool."""

import os
import tempfile

from PySide6.QtCore import Qt
from PySide6.QtWidgets import (
Expand Down Expand Up @@ -300,8 +301,20 @@ def do_work(_self):
"RGB", (pix.width, pix.height), pix.samples)
texts.append(
pytesseract.image_to_string(img, lang=lang))
with open(out_path, "w", encoding="utf-8") as fh:
fh.write("\f".join(texts))
# Reject same input==output and write atomically
# so a mistaken overlap doesn't truncate the PDF.
self._check_not_same_path(out_path, [pdf_path])
out_dir = os.path.dirname(out_path) or os.getcwd()
_fd, _tmp = tempfile.mkstemp(suffix=".txt", dir=out_dir)
try:
with os.fdopen(_fd, "w", encoding="utf-8") as fh:
fh.write("\f".join(texts))
os.replace(_tmp, out_path)
except Exception:
if os.path.exists(_tmp):
try: os.unlink(_tmp)
except OSError: pass
raise
else:
pdf_pages = []
for i, page in enumerate(doc):
Expand All @@ -327,8 +340,8 @@ def do_work(_self):
writer = PdfWriter()
for page_bytes in pdf_pages:
writer.append(PdfReader(_io.BytesIO(page_bytes)))
with open(out_path, "wb") as fh:
writer.write(fh)
self._atomic_pdf_write(
writer, out_path, sources=[pdf_path])
finally:
doc.close()
return out_path
Expand Down
6 changes: 5 additions & 1 deletion app/tools/page_numbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,11 @@ def do_work(worker):

if worker.is_cancelled():
return None
doc.save(out_path, garbage=4, deflate=True)
self._atomic_pdf_write(
doc, out_path,
sources=[pdf_path],
save_opts={"garbage": 4, "deflate": True},
)
finally:
doc.close()
return out_path
Expand Down
2 changes: 1 addition & 1 deletion app/tools/reorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _run(self):
reader = self._open_reader(self.drop_in.path())
w = PdfWriter()
for idx in indices: w.add_page(reader.pages[idx])
with open(out, "wb") as f: w.write(f)
self._atomic_pdf_write(w, out, sources=[self.drop_in.path()])
self._status(t("tool.reorder.status.done", name=os.path.basename(out)))
msg = t("tool.reorder.done", path=out)
if self._pipeline_active:
Expand Down
2 changes: 1 addition & 1 deletion app/tools/rotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _run(self):
for i, page in enumerate(reader.pages):
if i in pages: page.rotate(angle)
w.add_page(page)
with open(out_path, "wb") as f: w.write(f)
self._atomic_pdf_write(w, out_path, sources=[pdf_path])
self._status(t("tool.rotate.status.done", name=os.path.basename(out_path)))
if self._pipeline_active:
self._pipeline_success(t("tool.rotate.done", path=out_path), out_path)
Expand Down
3 changes: 1 addition & 2 deletions app/tools/watermark.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,7 @@ def do_work(worker):
current=i + 1, total=n))
if worker.is_cancelled():
return None
with open(out_path, "wb") as f:
w.write(f)
self._atomic_pdf_write(w, out_path, sources=[pdf_path, wm_path])
return out_path

def on_done(saved):
Expand Down
Loading