Skip to content

Commit 3b5c85f

Browse files
authored
MAINT: Refactor _writer code into _appearance_stream (#3466)
1 parent 56cf557 commit 3b5c85f

File tree

3 files changed

+360
-212
lines changed

3 files changed

+360
-212
lines changed

pypdf/_writer.py

Lines changed: 46 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
cast,
4949
)
5050

51-
from ._cmap import _default_fonts_space_width, build_char_map_from_dict
5251
from ._doc_common import DocumentInformation, PdfDocCommon
5352
from ._encryption import EncryptAlgorithm, Encryption
5453
from ._page import PageObject, Transformation
@@ -85,7 +84,6 @@
8584
BooleanObject,
8685
ByteStringObject,
8786
ContentStream,
88-
DecodedStreamObject,
8987
Destination,
9088
DictionaryObject,
9189
EmbeddedFile,
@@ -107,6 +105,7 @@
107105
hex_to_rgb,
108106
is_null_or_none,
109107
)
108+
from .generic._appearance_stream import TextStreamAppearance
110109
from .pagerange import PageRange, PageRangeSpec
111110
from .types import (
112111
AnnotationSubtype,
@@ -119,7 +118,6 @@
119118
from .xmp import XmpInformation
120119

121120
ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all()
122-
DEFAULT_FONT_HEIGHT_IN_MULTILINE = 12
123121

124122

125123
class ObjectDeletionFlag(enum.IntFlag):
@@ -874,7 +872,6 @@ def _add_apstream_object(
874872
object_name: str,
875873
x_offset: float,
876874
y_offset: float,
877-
font_res: Optional[DictionaryObject] = None
878875
) -> None:
879876
"""
880877
Adds an appearance stream to the page content in the form of
@@ -886,17 +883,25 @@ def _add_apstream_object(
886883
object_name: The name of the appearance stream.
887884
x_offset: The horizontal offset for the appearance stream.
888885
y_offset: The vertical offset for the appearance stream.
889-
font_res: The appearance stream's font resource (if given).
890886
"""
891-
# Prepare XObject resource dictionary on the page
887+
# Prepare XObject resource dictionary on the page. This currently
888+
# only deals with font resources, but can easily be adapted to also
889+
# include other resources.
892890
pg_res = cast(DictionaryObject, page[PG.RESOURCES])
893-
if font_res is not None:
894-
font_name = font_res["/BaseFont"] # [/"Name"] often also exists, but is deprecated
891+
if "/Resources" in appearance_stream_obj:
892+
ap_stream_res = cast(DictionaryObject, appearance_stream_obj["/Resources"])
893+
# No need to check "if "/Font" in ap_stream_res", because the only reason this
894+
# code runs would be if we are flattening form fields, and the associated code
895+
# either adds a Font resource or no resource at all. This probably needs to
896+
# change if we want to use this method to flatten markup annotations.
897+
ap_stream_font_dict = cast(DictionaryObject, ap_stream_res["/Font"])
895898
if "/Font" not in pg_res:
896899
pg_res[NameObject("/Font")] = DictionaryObject()
897-
pg_ft_res = cast(DictionaryObject, pg_res[NameObject("/Font")])
898-
if font_name not in pg_ft_res:
899-
pg_ft_res[NameObject(font_name)] = font_res
900+
pg_font_res = cast(DictionaryObject, pg_res["/Font"])
901+
# Merge fonts from the appearance stream into the page's font resources
902+
for font_name, font_ref in ap_stream_font_dict.items():
903+
if font_name not in pg_font_res:
904+
pg_font_res[font_name] = font_ref
900905
# Always add the resolved stream object to the writer to get a new IndirectObject.
901906
# This ensures we have a valid IndirectObject managed by *this* writer.
902907
xobject_ref = self._add_object(appearance_stream_obj)
@@ -915,160 +920,6 @@ def _add_apstream_object(
915920
xobject_drawing_commands = f"q\n{xobject_cm._to_cm()}\n{xobject_name} Do\nQ".encode()
916921
self._merge_content_stream_to_page(page, xobject_drawing_commands)
917922

918-
def _update_field_annotation(
919-
self,
920-
page: PageObject,
921-
field: DictionaryObject,
922-
annotation: DictionaryObject,
923-
font_name: str = "",
924-
font_size: float = -1,
925-
flatten: bool = False,
926-
) -> None:
927-
# Calculate rectangle dimensions
928-
_rct = cast(RectangleObject, annotation[AA.Rect])
929-
rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1])))
930-
931-
# Extract font information
932-
da = annotation.get_inherited(
933-
AA.DA,
934-
cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get(
935-
AA.DA, None
936-
),
937-
)
938-
if da is None:
939-
da = TextStringObject("/Helv 0 Tf 0 g")
940-
else:
941-
da = da.get_object()
942-
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
943-
font_properties = [x for x in font_properties if x != ""]
944-
if font_name:
945-
font_properties[font_properties.index("Tf") - 2] = font_name
946-
else:
947-
font_name = font_properties[font_properties.index("Tf") - 2]
948-
font_height = (
949-
font_size
950-
if font_size >= 0
951-
else float(font_properties[font_properties.index("Tf") - 1])
952-
)
953-
if font_height == 0:
954-
if field.get(FA.Ff, 0) & FA.FfBits.Multiline:
955-
font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE
956-
else:
957-
font_height = rct.height - 2
958-
font_properties[font_properties.index("Tf") - 1] = str(font_height)
959-
da = " ".join(font_properties)
960-
y_offset = rct.height - 1 - font_height
961-
962-
# Retrieve font information from local DR ...
963-
dr: Any = cast(
964-
DictionaryObject,
965-
cast(
966-
DictionaryObject,
967-
annotation.get_inherited(
968-
"/DR",
969-
cast(
970-
DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
971-
).get("/DR", DictionaryObject()),
972-
),
973-
).get_object(),
974-
)
975-
dr = dr.get("/Font", DictionaryObject()).get_object()
976-
# _default_fonts_space_width keys is the list of Standard fonts
977-
if font_name not in dr and font_name not in _default_fonts_space_width:
978-
# ...or AcroForm dictionary
979-
dr = cast(
980-
dict[Any, Any],
981-
cast(
982-
DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
983-
).get("/DR", {}),
984-
)
985-
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
986-
font_res = dr.get(font_name, None)
987-
if not is_null_or_none(font_res):
988-
font_res = cast(DictionaryObject, font_res.get_object())
989-
_font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
990-
200, font_res
991-
)
992-
try: # remove width stored in -1 key
993-
del font_map[-1]
994-
except KeyError:
995-
pass
996-
font_full_rev: dict[str, bytes]
997-
if isinstance(font_encoding, str):
998-
font_full_rev = {
999-
v: k.encode(font_encoding) for k, v in font_map.items()
1000-
}
1001-
else:
1002-
font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
1003-
font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
1004-
for key, value in font_map.items():
1005-
font_full_rev[value] = font_encoding_rev.get(key, key)
1006-
else:
1007-
logger_warning(f"Font dictionary for {font_name} not found.", __name__)
1008-
font_full_rev = {}
1009-
1010-
# Retrieve field text and selected values
1011-
field_flags = field.get(FA.Ff, 0)
1012-
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
1013-
txt = "\n".join(annotation.get_inherited(FA.Opt, []))
1014-
sel = field.get("/V", [])
1015-
if not isinstance(sel, list):
1016-
sel = [sel]
1017-
else: # /Tx
1018-
txt = field.get("/V", "")
1019-
sel = []
1020-
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
1021-
txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
1022-
# Generate appearance stream
1023-
ap_stream = generate_appearance_stream(
1024-
txt, sel, da, font_full_rev, rct, font_height, y_offset
1025-
)
1026-
1027-
# Create appearance dictionary
1028-
dct = DecodedStreamObject.initialize_from_dictionary(
1029-
{
1030-
NameObject("/Type"): NameObject("/XObject"),
1031-
NameObject("/Subtype"): NameObject("/Form"),
1032-
NameObject("/BBox"): rct,
1033-
"__streamdata__": ByteStringObject(ap_stream),
1034-
"/Length": 0,
1035-
}
1036-
)
1037-
if AA.AP in annotation:
1038-
for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items():
1039-
if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
1040-
dct[k] = v
1041-
1042-
# Update Resources with font information if necessary
1043-
if font_res is not None:
1044-
dct[NameObject("/Resources")] = DictionaryObject(
1045-
{
1046-
NameObject("/Font"): DictionaryObject(
1047-
{
1048-
NameObject(font_name): getattr(
1049-
font_res, "indirect_reference", font_res
1050-
)
1051-
}
1052-
)
1053-
}
1054-
)
1055-
if AA.AP not in annotation:
1056-
annotation[NameObject(AA.AP)] = DictionaryObject(
1057-
{NameObject("/N"): self._add_object(dct)}
1058-
)
1059-
elif "/N" not in cast(DictionaryObject, annotation[AA.AP]):
1060-
cast(DictionaryObject, annotation[NameObject(AA.AP)])[
1061-
NameObject("/N")
1062-
] = self._add_object(dct)
1063-
else: # [/AP][/N] exists
1064-
n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore
1065-
self._objects[n - 1] = dct
1066-
dct.indirect_reference = IndirectObject(n, 0, self)
1067-
1068-
if flatten:
1069-
field_name = self._get_qualified_field_name(annotation)
1070-
self._add_apstream_object(page, dct, field_name, _rct[0], _rct[1], font_res)
1071-
1072923
FFBITS_NUL = FA.FfBits(0)
1073924

1074925
def update_page_form_field_values(
@@ -1111,8 +962,8 @@ def update_page_form_field_values(
1111962
"""
1112963
if CatalogDictionary.ACRO_FORM not in self._root_object:
1113964
raise PyPdfError("No /AcroForm dictionary in PDF of PdfWriter Object")
1114-
af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
1115-
if InteractiveFormDictEntries.Fields not in af:
965+
acro_form = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
966+
if InteractiveFormDictEntries.Fields not in acro_form:
1116967
raise PyPdfError("No /Fields dictionary in PDF of PdfWriter Object")
1117968
if isinstance(auto_regenerate, bool):
1118969
self.set_need_appearances_writer(auto_regenerate)
@@ -1139,6 +990,7 @@ def update_page_form_field_values(
1139990
).get_object()
1140991

1141992
for field, value in fields.items():
993+
rectangle = cast(RectangleObject, annotation[AA.Rect])
1142994
if not (
1143995
self._get_qualified_field_name(parent_annotation) == field
1144996
or parent_annotation.get("/T", None) == field
@@ -1151,6 +1003,7 @@ def update_page_form_field_values(
11511003
del parent_annotation["/I"]
11521004
if flags:
11531005
annotation[NameObject(FA.Ff)] = NumberObject(flags)
1006+
# Set the field value
11541007
if not (value is None and flatten): # Only change values if given by user and not flattening.
11551008
if isinstance(value, list):
11561009
lst = ArrayObject(TextStringObject(v) for v in value)
@@ -1161,37 +1014,52 @@ def update_page_form_field_values(
11611014
)
11621015
else:
11631016
parent_annotation[NameObject(FA.V)] = TextStringObject(value)
1017+
# Get or create the field's appearance stream object
11641018
if parent_annotation.get(FA.FT) == "/Btn":
1165-
# Checkbox button (no /FT found in Radio widgets)
1019+
# Checkbox button (no /FT found in Radio widgets);
1020+
# We can find the associated appearance stream object
1021+
# within the annotation.
11661022
v = NameObject(value)
11671023
ap = cast(DictionaryObject, annotation[NameObject(AA.AP)])
11681024
normal_ap = cast(DictionaryObject, ap["/N"])
11691025
if v not in normal_ap:
11701026
v = NameObject("/Off")
11711027
appearance_stream_obj = normal_ap.get(v)
1172-
# other cases will be updated through the for loop
1028+
# Other cases will be updated through the for loop
11731029
annotation[NameObject(AA.AS)] = v
11741030
annotation[NameObject(FA.V)] = v
1175-
if flatten and appearance_stream_obj is not None:
1176-
# We basically copy the entire appearance stream, which should be an XObject that
1177-
# is already registered. No need to add font resources.
1178-
rct = cast(RectangleObject, annotation[AA.Rect])
1179-
self._add_apstream_object(page, appearance_stream_obj, field, rct[0], rct[1])
11801031
elif (
11811032
parent_annotation.get(FA.FT) == "/Tx"
11821033
or parent_annotation.get(FA.FT) == "/Ch"
11831034
):
1184-
# textbox
1035+
# Textbox; we need to generate the appearance stream object
11851036
if isinstance(value, tuple):
1186-
self._update_field_annotation(
1187-
page, parent_annotation, annotation, value[1], value[2], flatten=flatten
1037+
appearance_stream_obj = TextStreamAppearance.from_text_annotation(
1038+
acro_form, parent_annotation, annotation, value[1], value[2]
11881039
)
11891040
else:
1190-
self._update_field_annotation(page, parent_annotation, annotation, flatten=flatten)
1041+
appearance_stream_obj = TextStreamAppearance.from_text_annotation(
1042+
acro_form, parent_annotation, annotation
1043+
)
1044+
# Add the appearance stream object
1045+
if AA.AP not in annotation:
1046+
annotation[NameObject(AA.AP)] = DictionaryObject(
1047+
{NameObject("/N"): self._add_object(appearance_stream_obj)}
1048+
)
1049+
elif "/N" not in (ap:= cast(DictionaryObject, annotation[AA.AP])):
1050+
cast(DictionaryObject, annotation[NameObject(AA.AP)])[
1051+
NameObject("/N")
1052+
] = self._add_object(appearance_stream_obj)
1053+
else: # [/AP][/N] exists
1054+
n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore
1055+
self._objects[n - 1] = appearance_stream_obj
1056+
appearance_stream_obj.indirect_reference = IndirectObject(n, 0, self)
11911057
elif (
11921058
annotation.get(FA.FT) == "/Sig"
11931059
): # deprecated # not implemented yet
11941060
logger_warning("Signature forms not implemented yet", __name__)
1061+
if flatten and appearance_stream_obj is not None:
1062+
self._add_apstream_object(page, appearance_stream_obj, field, rectangle[0], rectangle[1])
11951063

11961064
def reattach_fields(
11971065
self, page: Optional[PageObject] = None
@@ -3435,36 +3303,3 @@ def _create_outline_item(
34353303
format_flag += OutlineFontFlag.bold
34363304
outline_item.update({NameObject("/F"): NumberObject(format_flag)})
34373305
return outline_item
3438-
3439-
3440-
def generate_appearance_stream(
3441-
txt: str,
3442-
sel: list[str],
3443-
da: str,
3444-
font_full_rev: dict[str, bytes],
3445-
rct: RectangleObject,
3446-
font_height: float,
3447-
y_offset: float,
3448-
) -> bytes:
3449-
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
3450-
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
3451-
if line in sel:
3452-
# may be improved but cannot find how to get fill working => replaced with lined box
3453-
ap_stream += (
3454-
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
3455-
f"0.5 0.5 0.5 rg s\n{da}\n"
3456-
).encode()
3457-
if line_number == 0:
3458-
ap_stream += f"2 {y_offset} Td\n".encode()
3459-
else:
3460-
# Td is a relative translation
3461-
ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
3462-
enc_line: list[bytes] = [
3463-
font_full_rev.get(c, c.encode("utf-16-be")) for c in line
3464-
]
3465-
if any(len(c) >= 2 for c in enc_line):
3466-
ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
3467-
else:
3468-
ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
3469-
ap_stream += b"ET\nQ\nEMC\nQ\n"
3470-
return ap_stream

0 commit comments

Comments
 (0)