Skip to content

Commit

Permalink
TST: Error cases (#773)
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Apr 17, 2022
1 parent 9111336 commit 9cd16d0
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 3 deletions.
154 changes: 154 additions & 0 deletions Tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ def test_read_metadata(pdf_path, expected):
docinfo = reader.getDocumentInfo()
metadict = dict(docinfo)
assert metadict == expected
docinfo.title
docinfo.title_raw
docinfo.author
docinfo.author_raw
docinfo.creator
docinfo.creator_raw
docinfo.producer
docinfo.producer_raw
docinfo.subject
docinfo.subject_raw
if "/Title" in metadict:
assert metadict["/Title"] == docinfo.title

Expand Down Expand Up @@ -293,9 +303,153 @@ def test_get_page_layout(src, expected):
"src,expected",
[
("form.pdf", "/UseNone"),
("crazyones.pdf", None),
],
)
def test_get_page_mode(src, expected):
src = os.path.join(RESOURCE_ROOT, src)
reader = PdfFileReader(src)
assert reader.getPageMode() == expected


def test_read_empty():
with pytest.raises(PdfReadError) as exc:
PdfFileReader(io.BytesIO())
assert exc.value.args[0] == "Cannot read an empty file"


def test_read_malformed():
with pytest.raises(PdfReadError) as exc:
PdfFileReader(io.BytesIO(b"foo"))
assert exc.value.args[0] == "Could not read malformed PDF file"


def test_read_prev_0_trailer():
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
with_prev_0 = True
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
pdf_data.find(b"xref"),
)
pdf_stream = io.BytesIO(pdf_data)
with pytest.raises(PdfReadError) as exc:
PdfFileReader(pdf_stream, strict=True)
assert exc.value.args[0] == "/Prev=0 in the trailer (try opening with strict=False)"


def test_read_missing_startxref():
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << /Root 5 0 R /Size 6 >>\n"
# b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
# pdf_data.find(b"xref"),
)
pdf_stream = io.BytesIO(pdf_data)
with pytest.raises(PdfReadError) as exc:
PdfFileReader(pdf_stream, strict=True)
assert exc.value.args[0] == "startxref not found"


def test_read_unknown_zero_pages():
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
# Pages 0 0 is the key point:
b"5 0 obj << /Pages 0 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << /Root 5 1 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
pdf_data.find(b"xref"),
)
pdf_stream = io.BytesIO(pdf_data)
with pytest.raises(PdfReadError) as exc:
reader = PdfFileReader(pdf_stream, strict=True)
reader.getNumPages()

assert exc.value.args[0] == "Could not find object."
reader = PdfFileReader(pdf_stream, strict=False)
with pytest.raises(AttributeError) as exc:
reader.getNumPages()
assert exc.value.args[0] == "'NoneType' object has no attribute 'getObject'"


def test_read_encrypted_without_decryption():
src = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")
reader = PdfFileReader(src)
with pytest.raises(PdfReadError) as exc:
reader.getNumPages()
assert exc.value.args[0] == "File has not been decrypted"


def test_get_destination_age_number():
src = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
reader = PdfFileReader(src)
outlines = reader.getOutlines()
for outline in outlines:
if not isinstance(outline, list):
reader.getDestinationPageNumber(outline)
38 changes: 35 additions & 3 deletions Tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")


def test_writer_clone():
src = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")

reader = PdfFileReader(src)
writer = PdfFileWriter()

writer.cloneDocumentFromReader(reader)
assert writer.getNumPages() == 4


def test_writer_operations():
"""
This test just checks if the operation throws an exception.
Expand All @@ -33,7 +43,6 @@ def test_writer_operations():
writer.insertPage(reader_outline.pages[0], 0)
writer.addBookmarkDestination(page)
writer.removeLinks()
# assert output.getNamedDestRoot() == ['A named destination', IndirectObject(9, 0, output)]
writer.addBlankPage()
writer.addURI(2, "https://example.com", RectangleObject([0, 0, 100, 100]))
writer.addLink(2, 1, RectangleObject([0, 0, 100, 100]))
Expand Down Expand Up @@ -213,7 +222,11 @@ def test_add_named_destination():

from PyPDF2.pdf import NameObject

writer.addNamedDestination(NameObject("A bookmark"), 2)
writer.addNamedDestination(NameObject("A named dest"), 2)

from PyPDF2.pdf import IndirectObject

assert writer.getNamedDestRoot() == ["A named dest", IndirectObject(7, 0, writer)]

# write "output" to PyPDF2-output.pdf
tmp_filename = "dont_commit_named_destination.pdf"
Expand Down Expand Up @@ -307,4 +320,23 @@ def test_add_link():
writer.write(output_stream)

# Cleanup
# os.remove(tmp_filename)
os.remove(tmp_filename)


def test_io_streams():
"""This is the example from the docs ("Streaming data")."""
# Arrange
from io import BytesIO

filepath = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
with open(filepath, "rb") as fh:
bytes_stream = BytesIO(fh.read())

# Read from bytes stream
reader = PdfFileReader(bytes_stream)
assert reader.getNumPages() == 4

# Write to bytes stream
writer = PdfFileWriter()
with BytesIO() as output_stream:
writer.write(output_stream)
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ You can contribute to `PyPDF2 on Github <https://github.com/py-pdf/PyPDF2>`_.
user/reading-pdf-annotations
user/adding-pdf-annotations
user/forms
user/streaming-data


.. toctree::
Expand Down
22 changes: 22 additions & 0 deletions docs/user/streaming-data.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Streaming Data with PyPDF2

In some cases you might want to avoid saving things explicitly as a file
to disk, e.g. when you want to store the PDF in a database or AWS S3.

PyPDF2 supports streaming data to a file-like object and here is how.

```python
from io import BytesIO

# Prepare example
with open("example.pdf", 'rb') as fh:
bytes_stream = BytesIO(fh.read())

# Read from bytes_stream
reader = PdfFileReader(bytes_stream)

# Write to bytes_stream
writer = PdfFileWriter()
with BytesIO() as bytes_stream:
writer.write(bytes_stream)
```

0 comments on commit 9cd16d0

Please sign in to comment.