Skip to content

Commit

Permalink
Fix metadata encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucas-C committed Feb 14, 2023
1 parent ac60b07 commit ff5763d
Show file tree
Hide file tree
Showing 21 changed files with 60 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
- new translation of the tutorial in [简体中文](https://pyfpdf.github.io/fpdf2/Tutorial-zh.html) - thanks to @Bubbu0129
### Fixed
- hyperlinks were not working on encrypted files - thanks to @andersonhc
- unicode (non limited to ASCII) text can now be provided as metadata [#685](https://github.com/PyFPDF/fpdf2/issues/685)
- all `TitleStyle` constructor parameters are now effectively optional
### Changed
- vector images parsing is now more robust: `fpdf2` can now embed SVG files without `viewPort` or no `height` / `width`
Expand Down
27 changes: 16 additions & 11 deletions fpdf/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@
from .errors import FPDFException
from .outline import build_outline_objs
from .sign import Signature, sign_content
from .syntax import build_obj_dict, Name, PDFArray, PDFContentStream, PDFObject
from .syntax import (
build_obj_dict,
Name,
PDFArray,
PDFContentStream,
PDFObject,
PDFString,
)
from .syntax import create_dictionary_string as pdf_dict
from .syntax import create_list_string as pdf_list
from .syntax import iobj_ref as pdf_ref
Expand Down Expand Up @@ -110,12 +117,12 @@ def __init__(
creation_date,
):
super().__init__()
self.title = enclose_in_parens(title) if title else None
self.subject = enclose_in_parens(subject) if subject else None
self.author = enclose_in_parens(author) if author else None
self.keywords = enclose_in_parens(keywords) if keywords else None
self.creator = enclose_in_parens(creator) if creator else None
self.producer = enclose_in_parens(producer) if producer else None
self.title = PDFString(title) if title else None
self.subject = PDFString(subject) if subject else None
self.author = PDFString(author) if author else None
self.keywords = PDFString(keywords) if keywords else None
self.creator = PDFString(creator) if creator else None
self.producer = PDFString(producer) if producer else None
self.creation_date = creation_date


Expand Down Expand Up @@ -170,7 +177,7 @@ def __init__(self, contents):

class PDFXmpMetadata(PDFContentStream):
def __init__(self, contents):
super().__init__(contents=contents)
super().__init__(contents=contents.encode("utf-8"))
self.type = Name("Metadata")
self.subtype = Name("XML")

Expand Down Expand Up @@ -836,9 +843,7 @@ def _add_document_outline(self):
def _add_xmp_metadata(self):
if not self.fpdf.xmp_metadata:
return None
xpacket = f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n{self.fpdf.xmp_metadata}\n<?xpacket end="w"?>\n'.encode(
"latin-1"
)
xpacket = f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n{self.fpdf.xmp_metadata}\n<?xpacket end="w"?>\n'
pdf_obj = PDFXmpMetadata(xpacket)
self._add_pdf_obj(pdf_obj)
return pdf_obj
Expand Down
Binary file modified test/encryption/encrypt_metadata.pdf
Binary file not shown.
Binary file modified test/encryption/encryption_aes128.pdf
Binary file not shown.
Binary file modified test/encryption/encryption_rc4.pdf
Binary file not shown.
Binary file modified test/encryption/encryption_rc4_permissions.pdf
Binary file not shown.
Binary file modified test/encryption/encryption_rc4_user_password.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion test/image/test_load_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_load_invalid_base64_data():


# ensure memory usage does not get too high - this value depends on Python version:
@memunit.assert_lt_mb(145)
@memunit.assert_lt_mb(147)
def test_share_images_cache(tmp_path):
images_cache = {}

Expand Down
2 changes: 1 addition & 1 deletion test/image/test_oversized.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

HERE = Path(__file__).resolve().parent
IMAGE_PATH = HERE / "png_images/6c853ed9dacd5716bc54eb59cec30889.png"
MAX_MEMORY_MB = 145 # memory usage depends on Python version
MAX_MEMORY_MB = 147 # memory usage depends on Python version


def test_oversized_images_warn(caplog):
Expand Down
Binary file added test/metadata/metadata_unicode.pdf
Binary file not shown.
Binary file modified test/metadata/put_info_all.pdf
Binary file not shown.
Binary file modified test/metadata/put_info_some.pdf
Binary file not shown.
49 changes: 40 additions & 9 deletions test/metadata/test_info.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path

import fpdf
from fpdf import FPDF
from test.conftest import assert_pdf_equal

HERE = Path(__file__).resolve().parent
Expand All @@ -10,25 +10,25 @@
<rdf:Description rdf:about="">
<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:Alt>
<rdf:li xml:lang="x-default">My document title</rdf:li>
<rdf:li xml:lang="x-default">{title}</rdf:li>
</rdf:Alt>
</dc:title>
</rdf:Description>
<rdf:Description rdf:about="">
<dc:description xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:Alt>
<rdf:li xml:lang="x-default">This is a test document for fpdf2 with XMP metadata</rdf:li>
<rdf:li xml:lang="x-default">{desc}</rdf:li>
</rdf:Alt>
</dc:description>
</rdf:Description>
<rdf:Description rdf:about="">
<dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:Seq>
<rdf:li>Lucas Cimon</rdf:li>
<rdf:li>{creator}</rdf:li>
</rdf:Seq>
</dc:creator>
</rdf:Description>
<rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="" pdf:Keywords="test data pdf fpdf2"/>
<rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="" pdf:Keywords="{keywords}"/>
<rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="" pdf:Producer="PyFPDF/fpdf2.X.Y"/>
<rdf:Description xmlns:xmp="http://ns.adobe.com/xap/1.0/" rdf:about="" xmp:CreatorTool="fpdf2"/>
</rdf:RDF>
Expand All @@ -43,7 +43,7 @@ def document_operations(doc):

def test_put_info_all(tmp_path):
"""This test tests all possible inputs to FPDF#_put_info."""
doc = fpdf.FPDF()
doc = FPDF()
document_operations(doc)
doc.set_title("sample title")
doc.set_lang("en-US")
Expand All @@ -57,7 +57,7 @@ def test_put_info_all(tmp_path):

def test_put_info_some(tmp_path):
"""This test tests some possible inputs to FPDF#_put_info."""
doc = fpdf.FPDF()
doc = FPDF()
document_operations(doc)
doc.set_title("sample title")
doc.set_keywords("sample keywords")
Expand All @@ -66,7 +66,38 @@ def test_put_info_some(tmp_path):


def test_xmp_metadata(tmp_path):
doc = fpdf.FPDF()
doc = FPDF()
document_operations(doc)
doc.set_xmp_metadata(XMP_METADATA)
doc.set_xmp_metadata(
XMP_METADATA.format(
title="My document title",
desc="This is a test document for fpdf2 with XMP metadata",
creator="Lucas Cimon",
keywords="test data pdf fpdf2",
)
)
assert_pdf_equal(doc, HERE / "xmp_metadata.pdf", tmp_path)


def test_metadata_unicode(tmp_path): # issue 685
doc = FPDF()
document_operations(doc)
doc.set_creator("༄༅། །སྒྲུབ།")
doc.set_title("༄༅། །སྒྲུབ།")
doc.set_keywords("༄༅། །སྒྲུབ།")
doc.set_creator("༄༅། །སྒྲུབ།")
assert_pdf_equal(doc, HERE / "metadata_unicode.pdf", tmp_path)


def test_xmp_metadata_unicode(tmp_path): # issue 685
doc = FPDF()
document_operations(doc)
doc.set_xmp_metadata(
XMP_METADATA.format(
title="༄༅། །སྒྲུབ།",
desc="༄༅། །སྒྲུབ།",
creator="༄༅། །སྒྲུབ།",
keywords="༄༅། །སྒྲུབ།",
)
)
assert_pdf_equal(doc, HERE / "xmp_metadata_unicode.pdf", tmp_path)
Binary file modified test/metadata/xmp_metadata.pdf
Binary file not shown.
Binary file added test/metadata/xmp_metadata_unicode.pdf
Binary file not shown.
Binary file modified test/template/template_code39.pdf
Binary file not shown.
Binary file modified test/template/template_code39_defaultheight.pdf
Binary file not shown.
Binary file modified test/template/template_multipage.pdf
Binary file not shown.
Binary file modified test/template/template_nominal_csv.pdf
Binary file not shown.
Binary file modified test/template/template_nominal_hardcoded.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion test/test_perfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

@pytest.mark.timeout(40)
# ensure memory usage does not get too high - this value depends on Python version:
@memunit.assert_lt_mb(180)
@memunit.assert_lt_mb(181)
def test_intense_image_rendering():
png_file_paths = []
for png_file_path in (HERE / "image/png_images/").glob("*.png"):
Expand Down

0 comments on commit ff5763d

Please sign in to comment.