Skip to content

Commit

Permalink
BUG: Remove erroneous assertion check (#1564)
Browse files Browse the repository at this point in the history
Closes #1559

This is due to a bad interpretation of text at bottom of page 108 of PDF 1.7 reference
  • Loading branch information
pubpub-zz committed Jan 21, 2023
1 parent 53645ef commit a6aad31
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
6 changes: 3 additions & 3 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1901,11 +1901,11 @@ def _read_xref_subsections(
get_entry: Callable[[int], Union[int, Tuple[int, ...]]],
used_before: Callable[[int, Union[int, Tuple[int, ...]]], bool],
) -> None:
last_end = 0
# last_end = 0
for start, size in self._pairs(idx_pairs):
# The subsections must increase
assert start >= last_end
last_end = start + size
# assert start >= last_end
# last_end = start + size
for num in range(start, start + size):
# The first entry is the type
xref_type = get_entry(0)
Expand Down
8 changes: 8 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,3 +1279,11 @@ def test_build_outline_item(caplog):
def test_page_labels(src, page_labels):
max_indices = 6
assert PdfReader(src).page_labels[:max_indices] == page_labels[:max_indices]


def test_iss1559():
url = "https://github.com/py-pdf/pypdf/files/10441992/default.pdf"
name = "iss1559.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
for p in reader.pages:
p.extract_text()

0 comments on commit a6aad31

Please sign in to comment.