Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix metadata encoding #689

Merged
merged 1 commit into from
Feb 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
- new translation of the tutorial in [简体中文](https://pyfpdf.github.io/fpdf2/Tutorial-zh.html) - thanks to @Bubbu0129
### Fixed
- hyperlinks were not working on encrypted files - thanks to @andersonhc
- unicode (non limited to ASCII) text can now be provided as metadata [#685](https://github.com/PyFPDF/fpdf2/issues/685)
- all `TitleStyle` constructor parameters are now effectively optional
### Changed
- vector images parsing is now more robust: `fpdf2` can now embed SVG files without `viewPort` or no `height` / `width`
Expand Down
27 changes: 16 additions & 11 deletions fpdf/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@
from .errors import FPDFException
from .outline import build_outline_objs
from .sign import Signature, sign_content
from .syntax import build_obj_dict, Name, PDFArray, PDFContentStream, PDFObject
from .syntax import (
build_obj_dict,
Name,
PDFArray,
PDFContentStream,
PDFObject,
PDFString,
)
from .syntax import create_dictionary_string as pdf_dict
from .syntax import create_list_string as pdf_list
from .syntax import iobj_ref as pdf_ref
Expand Down Expand Up @@ -110,12 +117,12 @@ def __init__(
creation_date,
):
super().__init__()
self.title = enclose_in_parens(title) if title else None
self.subject = enclose_in_parens(subject) if subject else None
self.author = enclose_in_parens(author) if author else None
self.keywords = enclose_in_parens(keywords) if keywords else None
self.creator = enclose_in_parens(creator) if creator else None
self.producer = enclose_in_parens(producer) if producer else None
self.title = PDFString(title) if title else None
self.subject = PDFString(subject) if subject else None
self.author = PDFString(author) if author else None
self.keywords = PDFString(keywords) if keywords else None
self.creator = PDFString(creator) if creator else None
self.producer = PDFString(producer) if producer else None
self.creation_date = creation_date


Expand Down Expand Up @@ -170,7 +177,7 @@ def __init__(self, contents):

class PDFXmpMetadata(PDFContentStream):
def __init__(self, contents):
super().__init__(contents=contents)
super().__init__(contents=contents.encode("utf-8"))
self.type = Name("Metadata")
self.subtype = Name("XML")

Expand Down Expand Up @@ -836,9 +843,7 @@ def _add_document_outline(self):
def _add_xmp_metadata(self):
if not self.fpdf.xmp_metadata:
return None
xpacket = f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n{self.fpdf.xmp_metadata}\n<?xpacket end="w"?>\n'.encode(
"latin-1"
)
xpacket = f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n{self.fpdf.xmp_metadata}\n<?xpacket end="w"?>\n'
pdf_obj = PDFXmpMetadata(xpacket)
self._add_pdf_obj(pdf_obj)
return pdf_obj
Expand Down
Binary file modified test/encryption/encrypt_metadata.pdf
Binary file not shown.
Binary file modified test/encryption/encryption_aes128.pdf
Binary file not shown.
Binary file modified test/encryption/encryption_rc4.pdf
Binary file not shown.
Binary file modified test/encryption/encryption_rc4_permissions.pdf
Binary file not shown.
Binary file modified test/encryption/encryption_rc4_user_password.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion test/image/test_load_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_load_invalid_base64_data():


# ensure memory usage does not get too high - this value depends on Python version:
@memunit.assert_lt_mb(145)
@memunit.assert_lt_mb(147)
def test_share_images_cache(tmp_path):
images_cache = {}

Expand Down
2 changes: 1 addition & 1 deletion test/image/test_oversized.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

HERE = Path(__file__).resolve().parent
IMAGE_PATH = HERE / "png_images/6c853ed9dacd5716bc54eb59cec30889.png"
MAX_MEMORY_MB = 145 # memory usage depends on Python version
MAX_MEMORY_MB = 147 # memory usage depends on Python version


def test_oversized_images_warn(caplog):
Expand Down
Binary file added test/metadata/metadata_unicode.pdf
Binary file not shown.
Binary file modified test/metadata/put_info_all.pdf
Binary file not shown.
Binary file modified test/metadata/put_info_some.pdf
Binary file not shown.
49 changes: 40 additions & 9 deletions test/metadata/test_info.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path

import fpdf
from fpdf import FPDF
from test.conftest import assert_pdf_equal

HERE = Path(__file__).resolve().parent
Expand All @@ -10,25 +10,25 @@
<rdf:Description rdf:about="">
<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:Alt>
<rdf:li xml:lang="x-default">My document title</rdf:li>
<rdf:li xml:lang="x-default">{title}</rdf:li>
</rdf:Alt>
</dc:title>
</rdf:Description>
<rdf:Description rdf:about="">
<dc:description xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:Alt>
<rdf:li xml:lang="x-default">This is a test document for fpdf2 with XMP metadata</rdf:li>
<rdf:li xml:lang="x-default">{desc}</rdf:li>
</rdf:Alt>
</dc:description>
</rdf:Description>
<rdf:Description rdf:about="">
<dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:Seq>
<rdf:li>Lucas Cimon</rdf:li>
<rdf:li>{creator}</rdf:li>
</rdf:Seq>
</dc:creator>
</rdf:Description>
<rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="" pdf:Keywords="test data pdf fpdf2"/>
<rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="" pdf:Keywords="{keywords}"/>
<rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="" pdf:Producer="PyFPDF/fpdf2.X.Y"/>
<rdf:Description xmlns:xmp="http://ns.adobe.com/xap/1.0/" rdf:about="" xmp:CreatorTool="fpdf2"/>
</rdf:RDF>
Expand All @@ -43,7 +43,7 @@ def document_operations(doc):

def test_put_info_all(tmp_path):
"""This test tests all possible inputs to FPDF#_put_info."""
doc = fpdf.FPDF()
doc = FPDF()
document_operations(doc)
doc.set_title("sample title")
doc.set_lang("en-US")
Expand All @@ -57,7 +57,7 @@ def test_put_info_all(tmp_path):

def test_put_info_some(tmp_path):
"""This test tests some possible inputs to FPDF#_put_info."""
doc = fpdf.FPDF()
doc = FPDF()
document_operations(doc)
doc.set_title("sample title")
doc.set_keywords("sample keywords")
Expand All @@ -66,7 +66,38 @@ def test_put_info_some(tmp_path):


def test_xmp_metadata(tmp_path):
doc = fpdf.FPDF()
doc = FPDF()
document_operations(doc)
doc.set_xmp_metadata(XMP_METADATA)
doc.set_xmp_metadata(
XMP_METADATA.format(
title="My document title",
desc="This is a test document for fpdf2 with XMP metadata",
creator="Lucas Cimon",
keywords="test data pdf fpdf2",
)
)
assert_pdf_equal(doc, HERE / "xmp_metadata.pdf", tmp_path)


def test_metadata_unicode(tmp_path): # issue 685
doc = FPDF()
document_operations(doc)
doc.set_creator("༄༅། །སྒྲུབ།")
doc.set_title("༄༅། །སྒྲུབ།")
doc.set_keywords("༄༅། །སྒྲུབ།")
doc.set_creator("༄༅། །སྒྲུབ།")
assert_pdf_equal(doc, HERE / "metadata_unicode.pdf", tmp_path)


def test_xmp_metadata_unicode(tmp_path): # issue 685
doc = FPDF()
document_operations(doc)
doc.set_xmp_metadata(
XMP_METADATA.format(
title="༄༅། །སྒྲུབ།",
desc="༄༅། །སྒྲུབ།",
creator="༄༅། །སྒྲུབ།",
keywords="༄༅། །སྒྲུབ།",
)
)
assert_pdf_equal(doc, HERE / "xmp_metadata_unicode.pdf", tmp_path)
Binary file modified test/metadata/xmp_metadata.pdf
Binary file not shown.
Binary file added test/metadata/xmp_metadata_unicode.pdf
Binary file not shown.
Binary file modified test/template/template_code39.pdf
Binary file not shown.
Binary file modified test/template/template_code39_defaultheight.pdf
Binary file not shown.
Binary file modified test/template/template_multipage.pdf
Binary file not shown.
Binary file modified test/template/template_nominal_csv.pdf
Binary file not shown.
Binary file modified test/template/template_nominal_hardcoded.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion test/test_perfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

@pytest.mark.timeout(40)
# ensure memory usage does not get too high - this value depends on Python version:
@memunit.assert_lt_mb(180)
@memunit.assert_lt_mb(181)
def test_intense_image_rendering():
png_file_paths = []
for png_file_path in (HERE / "image/png_images/").glob("*.png"):
Expand Down