Skip to content

Commit

Permalink
Using .local_context() in .write_html() to better isolate HTML render…
Browse files Browse the repository at this point in the history
…ing & styling
  • Loading branch information
Lucas-C committed Jun 17, 2024
1 parent 5239fea commit 5fc0226
Show file tree
Hide file tree
Showing 67 changed files with 59 additions and 48 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
### Fixed
* [`FPDF.local_context()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.local_context) used to leak styling during page breaks, when rendering `footer()` & `header()`
* [`fpdf.drawing.DeviceCMYK`](https://py-pdf.github.io/fpdf2/fpdf/drawing.html#fpdf.drawing.DeviceCMYK) objects can now be passed to [`FPDF.set_draw_color()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.set_draw_color), [`FPDF.set_fill_color()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.set_fill_color) and [`FPDF.set_text_color()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.set_text_color) without raising a `ValueError`: [documentation](https://py-pdf.github.io/fpdf2/Text.html#text-formatting).
* [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html): fixing rendering of `<hr>` tags, that do not trigger a page break anymore
* individual `/Resources` directories are now properly created for each document page. This change ensures better compliance with the PDF specification but results in a slight increase in the size of PDF documents. You can still use the old behavior by setting `FPDF().single_resources_object = True`
* line size calculation for fragments when text shaping is used
* fixed incoherent indentation of long list entries - _cf._ [issue #1073](https://github.com/py-pdf/fpdf2/issues/1073)
Expand Down
28 changes: 20 additions & 8 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,8 @@ def write_html(self, text, *args, **kwargs):
tag_styles (dict): mapping of HTML tag names to colors
"""
html2pdf = self.HTML2FPDF_CLASS(self, *args, **kwargs)
html2pdf.feed(text)
with self.local_context():
html2pdf.feed(text)

def _set_min_pdf_version(self, version):
self.pdf_version = max(self.pdf_version, version)
Expand Down Expand Up @@ -2797,12 +2798,12 @@ def local_context(self, **kwargs):
"cannot create a local context inside an unbreakable() code block"
)
self._push_local_stack()
self._init_local_context(**kwargs) # write "q" in the output stream
self._start_local_context(**kwargs)
yield
self._out("Q")
self._end_local_context()
self._pop_local_stack()

def _init_local_context(
def _start_local_context(
self,
font_family=None,
font_style=None,
Expand All @@ -2815,7 +2816,7 @@ def _init_local_context(
**kwargs,
):
"""
This method starts a "q" context in the output stream,
This method starts a "q/Q" context in the page content stream,
and inserts operators in it to initialize all the PDF settings specified.
"""
if "font_size_pt" in kwargs:
Expand Down Expand Up @@ -2884,6 +2885,12 @@ def _init_local_context(
if dash_pattern is not None:
self.set_dash_pattern(**dash_pattern)

def _end_local_context(self):
"""
This method ends a "q/Q" context in the page content stream.
"""
self._out("Q")

@property
def accept_page_break(self):
"""
Expand Down Expand Up @@ -3575,14 +3582,19 @@ def _perform_page_break_if_need_be(self, h):

def _perform_page_break(self):
x = self.x
# If we are in a .local_context(), we need to temporarily leave it,
# by popping out every GraphicsState:
gs_stack = []
while self._is_current_graphics_state_nested():
self._out("Q")
# This code assumes that every Graphics State in the stack
# has been pushed in it while adding a "q" in the PDF stream
# (which is what FPDF.local_context() does):
self._end_local_context()
gs_stack.append(self._pop_local_stack())
self.add_page(same=True)
for prev_gs in reversed(gs_stack):
self._push_local_stack(prev_gs)
self._init_local_context(**prev_gs)
self._start_local_context(**prev_gs)
self.x = x # restore x but not y after drawing header

def _has_next_page(self):
Expand Down Expand Up @@ -4994,7 +5006,7 @@ def insert_toc_placeholder(self, render_toc_function, pages=1):
render_toc_function, self.page, self.y, pages
)
for _ in range(pages):
self.add_page()
self._perform_page_break()

def set_section_title_styles(
self,
Expand Down
41 changes: 17 additions & 24 deletions fpdf/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,13 +325,10 @@ def __init__(
self.emphasis = dict(b=False, i=False, u=False)
self.font_size = pdf.font_size_pt
self.set_font(pdf.font_family or "times", size=self.font_size, set_default=True)
self._prev_font = (pdf.font_family, self.font_size, self.emphasis)
self.pdf._push_local_stack() # xpylint: disable=protected-access

self._pre_formatted = False # preserve whitespace while True.
self._pre_started = (
False # nothing written yet to <pre>, remove one initial nl.
)
# nothing written yet to <pre>, remove one initial nl:
self._pre_started = False
self.follows_trailing_space = False # The last write has ended with a space.
self.follows_heading = False # We don't want extra space below a heading.
self.href = ""
Expand Down Expand Up @@ -464,11 +461,7 @@ def _end_paragraph(self):
self.align = ""
if self._paragraph:
self._column.end_paragraph()
our_context = (
self.pdf._pop_local_stack() # pylint: disable=protected-access
)
self._column.render()
self.pdf._push_local_stack(our_context) # pylint: disable=protected-access
self._paragraph = None
self.follows_trailing_space = True

Expand Down Expand Up @@ -530,15 +523,12 @@ def handle_data(self, data):
elif self._pre_formatted: # pre blocks
# If we want to mimick the exact HTML semantics about newlines at the
# beginning and end of the block, then this needs some more thought.
s_nl = data.startswith("\n") and self._pre_started
if data.startswith("\n") and self._pre_started:
if data.endswith("\n"):
data = data[1:-1]
else:
data = data[1:]
self._pre_started = False
e_nl = data.endswith("\n")
if s_nl and e_nl:
data = data[1:-1]
elif s_nl:
data = data[1:]
# elif e_nl:
# data = data[:-1]
self._write_data(data)
else:
data = _WS_SUB_PAT.sub(" ", data)
Expand Down Expand Up @@ -650,7 +640,13 @@ def handle_starttag(self, tag, attrs):
size=tag_style.size_pt or self.font_size,
)
if tag == "hr":
self.pdf.add_page(same=True)
self.pdf.line(
x1=self.pdf.l_margin,
y1=self.pdf.y,
x2=self.pdf.l_margin + self.pdf.epw,
y2=self.pdf.y,
)
self._write_paragraph("\n")
if tag == "code":
self.style_stack.append(
FontFace(
Expand All @@ -667,6 +663,7 @@ def handle_starttag(self, tag, attrs):
size=tag_style.size_pt or self.font_size,
)
if tag == "pre":
self._end_paragraph()
self.style_stack.append(
FontFace(
family=self.font_family,
Expand All @@ -682,8 +679,8 @@ def handle_starttag(self, tag, attrs):
size=tag_style.size_pt or self.font_size,
)
self._pre_formatted = True
self._new_paragraph()
self._pre_started = True
self._new_paragraph()
if tag == "blockquote":
tag_style = self.tag_styles[tag]
if tag_style.color:
Expand Down Expand Up @@ -928,12 +925,12 @@ def handle_endtag(self, tag):
self.set_font(font_face.family, font_face.size_pt)
self.set_text_color(*font_face.color.colors255)
if tag == "pre":
self._end_paragraph()
font_face = self.style_stack.pop()
self.set_font(font_face.family, font_face.size_pt)
self.set_text_color(*font_face.color.colors255)
self._pre_formatted = False
self._pre_started = False
self._end_paragraph()
if tag == "blockquote":
self._end_paragraph()
self.set_text_color(*self.font_color)
Expand Down Expand Up @@ -991,10 +988,6 @@ def feed(self, data):
while self._tags_stack and self._tags_stack[-1] in self.HTML_UNCLOSED_TAGS:
self._tags_stack.pop()
self._end_paragraph() # render the final chunk of text and clean up our local context.
self.pdf._pop_local_stack() # pylint: disable=protected-access
if self._prev_font[0]: # restore previously defined font settings
self.emphasis = self._prev_font[2]
self.set_font(self._prev_font[0], size=self._prev_font[1], set_default=True)
if self._tags_stack and self.warn_on_tags_not_matching:
LOGGER.warning("Missing HTML end tag for <%s>", self._tags_stack[-1])

Expand Down
2 changes: 1 addition & 1 deletion fpdf/text_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ def render(self):
_first_page_top = max(self.pdf.t_margin, self.pdf.y)
self._render_page_lines(text_lines, _first_page_top, page_bottom)
while text_lines:
self.pdf.add_page(same=True)
self.pdf._perform_page_break()
self._cur_column = 0
self._render_page_lines(text_lines, self.pdf.y, page_bottom)

Expand Down
11 changes: 7 additions & 4 deletions scripts/compare-changed-pdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# Generate a HTML page that makes it easy to visually compare all PDF files
# that are modified in the current branch, compared to the master branch.

# USAGE: ./compare-changed-pdfs.py
# USAGE: ./compare-changed-pdfs.py [test_subdir_path]

import webbrowser
import sys, webbrowser
from functools import partial
from http.server import HTTPServer, SimpleHTTPRequestHandler
from os import makedirs, scandir
Expand All @@ -30,15 +30,18 @@ def scantree_dirs(path):
yield from scantree_dirs(entry.path)


target_dir = sys.argv[1] if len(sys.argv) > 1 else "test"
print(f"Processing all PDF reference files in {target_dir}")

stdout = check_output("git diff --name-status master", shell=True)
changed_pdf_files = [
line[1:].strip()
for line in stdout.decode("utf-8").splitlines()
if line.startswith("M\ttest/")
if line.startswith(f"M\t{target_dir}")
]

TMP_DIR.mkdir(exist_ok=True)
for dir in scantree_dirs(REPO_DIR / "test"):
for dir in scantree_dirs(REPO_DIR / target_dir):
(TMP_DIR / dir).mkdir(exist_ok=True)
for changed_pdf_file in changed_pdf_files:
command = f"git show master:{changed_pdf_file} > {TMP_DIR}/{changed_pdf_file}"
Expand Down
Binary file modified test/html/html_align_paragraph.pdf
Binary file not shown.
Binary file modified test/html/html_blockquote_color.pdf
Binary file not shown.
Binary file modified test/html/html_blockquote_indent.pdf
Binary file not shown.
Binary file modified test/html/html_bold_italic_underline.pdf
Binary file not shown.
Binary file modified test/html/html_custom_heading_sizes.pdf
Binary file not shown.
Binary file modified test/html/html_custom_line_height.pdf
Binary file not shown.
Binary file modified test/html/html_custom_pre_code_font.pdf
Binary file not shown.
Binary file modified test/html/html_customize_ul.pdf
Binary file not shown.
Binary file modified test/html/html_description.pdf
Binary file not shown.
Binary file modified test/html/html_features.pdf
Binary file not shown.
Binary file modified test/html/html_font_color_name.pdf
Binary file not shown.
Binary file modified test/html/html_format_within_p.pdf
Binary file not shown.
Binary file modified test/html/html_heading_color_attribute.pdf
Binary file not shown.
Binary file modified test/html/html_heading_hebrew.pdf
Binary file not shown.
Binary file modified test/html/html_headings_color.pdf
Binary file not shown.
Binary file modified test/html/html_headings_line_height.pdf
Binary file not shown.
Binary file modified test/html/html_images.pdf
Binary file not shown.
Binary file modified test/html/html_img_not_overlapping.pdf
Binary file not shown.
Binary file modified test/html/html_li_prefix_color.pdf
Binary file not shown.
Binary file modified test/html/html_li_tag_indent.pdf
Binary file not shown.
Binary file modified test/html/html_link_color.pdf
Binary file not shown.
Binary file modified test/html/html_list_vertical_margin.pdf
Binary file not shown.
Binary file modified test/html/html_ln_outside_p.pdf
Binary file not shown.
Binary file modified test/html/html_long_list_entries.pdf
Binary file not shown.
Binary file modified test/html/html_long_ol_bullets.pdf
Binary file not shown.
Binary file modified test/html/html_measurement_units.pdf
Binary file not shown.
Binary file modified test/html/html_ol_start_and_type.pdf
Binary file not shown.
Binary file modified test/html/html_ol_ul_line_height.pdf
Binary file not shown.
Binary file modified test/html/html_preserve_initial_text_color.pdf
Binary file not shown.
Binary file modified test/html/html_superscript.pdf
Binary file not shown.
Binary file modified test/html/html_table_honoring_align.pdf
Binary file not shown.
Binary file modified test/html/html_table_line_separators.pdf
Binary file not shown.
Binary file modified test/html/html_table_simple.pdf
Binary file not shown.
Binary file modified test/html/html_table_th_inside_tr_issue_137.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_bgcolor.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_border.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_data_that_contains_entity_names.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_empty_cell_contents.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_font_tags_used_to_set_text_color.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_img.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_img_without_explicit_dimensions.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_imgs_captions_and_colspan.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_multi_lines_text.pdf
Binary file not shown.
Binary file not shown.
Binary file modified test/html/html_table_with_only_tds.pdf
Binary file not shown.
Binary file modified test/html/html_table_with_width_and_align.pdf
Binary file not shown.
Binary file modified test/html/html_ul_type.pdf
Binary file not shown.
Binary file modified test/html/html_unorthodox_headings_hierarchy.pdf
Binary file not shown.
Binary file modified test/html/html_whitespace_handling.pdf
Binary file not shown.
Binary file modified test/html/issue_156.pdf
Binary file not shown.
20 changes: 11 additions & 9 deletions test/html/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ def test_html_features(tmp_path):
pdf.write_html("<h4>h4</h4>")
pdf.write_html("<h5>h5</h5>")
pdf.write_html("<h6>h6</h6>")
pdf.write_html("<br />")
pdf.write_html("<hr />")
pdf.write_html("<br />")
pdf.write_html("<br />")
pdf.write_html("<p>Rendering &lt;hr&gt;:</p>")
pdf.write_html("<hr>")
# Now inserting <br> tags until a page jump is triggered:
for _ in range(25):
pdf.write_html("<br>")
pdf.write_html("<pre>i am preformatted text.</pre>")
pdf.write_html("<blockquote>hello blockquote</blockquote>")
pdf.write_html("<ul><li>li1</li><li>another</li><li>l item</li></ul>")
Expand Down Expand Up @@ -427,10 +428,11 @@ def test_html_whitespace_handling(tmp_path): # Issue 547
<p>
<b>Testing </b> paragraph blocks
that <i>span</i> <b>multiple lines</b>.
Testing tabs and spaces<br>
and break tags.<br>
Testing tabs and spaces
<br>and break tags.
</p>
<code>Testing code blocks with tabs and spaces.</code><br>
<code>Testing code blocks with tabs and spaces.</code>
<br>
<pre>
Testing pre blocks
that span multiple lines
Expand All @@ -444,8 +446,8 @@ def test_html_whitespace_handling(tmp_path): # Issue 547
</code></pre>
<p>Testing unicode nbsp \u00a0\u00a0\u00a0\u00a0,
and html nbsp &nbsp;&nbsp;&nbsp;&nbsp;.<br>
\u00a0&nbsp;&nbsp;Testing leading nbsp
and html nbsp &nbsp;&nbsp;&nbsp;&nbsp;.
<br>\u00a0&nbsp;&nbsp;Testing leading nbsp
</p>
</body>
"""
Expand Down
Binary file modified test/hyperlinks.pdf
Binary file not shown.
Binary file modified test/internal_links.pdf
Binary file not shown.
Binary file modified test/link_to_other_document.pdf
Binary file not shown.
Binary file modified test/outline/html_toc.pdf
Binary file not shown.
Binary file modified test/outline/html_toc_2_pages.pdf
Binary file not shown.
Binary file not shown.
Binary file modified test/outline/html_toc_with_h1_as_2nd_heading.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions test/outline/test_outline_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def render_toc(self, pdf, outline):
)


def test_custom_HTML2FPDF(tmp_path): # issue 240 & 670
def test_html_toc_with_custom_rendering(tmp_path): # issue 240 & 670
class PDF(FPDF):
HTML2FPDF_CLASS = CustomHTML2FPDF

Expand All @@ -271,4 +271,4 @@ class PDF(FPDF):
<h6>Level 6</h6>
<p>paragraph</p>"""
)
assert_pdf_equal(pdf, HERE / "custom_HTML2FPDF.pdf", tmp_path)
assert_pdf_equal(pdf, HERE / "html_toc_with_custom_rendering.pdf", tmp_path)
Binary file modified test/table/table_vertical_alignment.pdf
Binary file not shown.
Binary file modified test/table/table_with_rowspan.pdf
Binary file not shown.
Binary file modified test/table/table_with_rowspan_and_colspan.pdf
Binary file not shown.

0 comments on commit 5fc0226

Please sign in to comment.