Skip to content

Commit 4104dd5

Browse files
committed
Fix PDF bloat for off-axis scatter with per-point colors
Skip emitting markers outside canvas bounds in draw_path_collection to reduce PDF file size when scatter points are off-axis. Fixes #2488
1 parent 1ab3332 commit 4104dd5

File tree

2 files changed

+115
-0
lines changed

2 files changed

+115
-0
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2118,6 +2118,12 @@ def draw_path_collection(self, gc, master_transform, paths, all_transforms,
21182118
facecolors, edgecolors, linewidths, linestyles,
21192119
antialiaseds, urls, offset_position, hatchcolors=hatchcolors):
21202120

2121+
# Skip markers outside visible canvas bounds to reduce PDF size
2122+
# (same optimization as in draw_markers).
2123+
if not (0 <= xo <= self.file.width * 72
2124+
and 0 <= yo <= self.file.height * 72):
2125+
continue
2126+
21212127
self.check_gc(gc0, rgbFace)
21222128
dx, dy = xo - lastx, yo - lasty
21232129
output(1, 0, 0, 1, dx, dy, Op.concat_matrix, path_id,

lib/matplotlib/tests/test_backend_pdf.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,3 +478,112 @@ def test_font_bitstream_charter():
478478
ax.text(0.1, 0.3, r"fi ffl 1234", usetex=True, fontsize=50)
479479
ax.set_xticks([])
480480
ax.set_yticks([])
481+
482+
483+
def test_scatter_offaxis_colored_pdf_size():
484+
"""
485+
Test that off-axis scatter plots with per-point colors don't bloat PDFs.
486+
487+
Regression test for issue #2488. When scatter points with per-point colors
488+
are completely outside the visible axes, the PDF backend should skip
489+
writing those markers to significantly reduce file size.
490+
"""
491+
# Use John Hunter's birthday as random seed for reproducibility
492+
rng = np.random.default_rng(19680801)
493+
494+
n_points = 1000
495+
x = rng.random(n_points) * 10
496+
y = rng.random(n_points) * 10
497+
c = rng.random(n_points)
498+
499+
# Test 1: Scatter with per-point colors, all points OFF-AXIS
500+
fig1, ax1 = plt.subplots()
501+
ax1.scatter(x, y, c=c)
502+
ax1.set_xlim(20, 30) # Move view completely away from data (x is 0-10)
503+
ax1.set_ylim(20, 30) # Move view completely away from data (y is 0-10)
504+
505+
buf1 = io.BytesIO()
506+
fig1.savefig(buf1, format='pdf')
507+
size_offaxis_colored = buf1.tell()
508+
plt.close(fig1)
509+
510+
# Test 2: Empty scatter (baseline - smallest possible)
511+
fig2, ax2 = plt.subplots()
512+
ax2.set_xlim(20, 30)
513+
ax2.set_ylim(20, 30)
514+
515+
buf2 = io.BytesIO()
516+
fig2.savefig(buf2, format='pdf')
517+
size_empty = buf2.tell()
518+
plt.close(fig2)
519+
520+
# The off-axis colored scatter should be close to empty size
521+
# Allow up to 50KB overhead for axes/metadata, but should be much smaller
522+
# than if all 1000 markers were written (which would add ~200-400KB)
523+
assert size_offaxis_colored < size_empty + 50_000, (
524+
f"Off-axis colored scatter PDF ({size_offaxis_colored} bytes) is too large. "
525+
f"Expected close to empty figure size ({size_empty} bytes). "
526+
f"Markers may not be properly skipped."
527+
)
528+
529+
530+
@check_figures_equal(extensions=["pdf"])
531+
def test_scatter_offaxis_colored_visual(fig_test, fig_ref):
532+
"""
533+
Test that on-axis scatter with per-point colors still renders correctly.
534+
535+
Ensures the optimization for off-axis markers doesn't break normal
536+
scatter rendering.
537+
"""
538+
rng = np.random.default_rng(19680801)
539+
540+
n_points = 100
541+
x = rng.random(n_points) * 5
542+
y = rng.random(n_points) * 5
543+
c = rng.random(n_points)
544+
545+
# Test figure: scatter with clipping optimization
546+
ax_test = fig_test.subplots()
547+
ax_test.scatter(x, y, c=c, s=50)
548+
ax_test.set_xlim(0, 10)
549+
ax_test.set_ylim(0, 10)
550+
551+
# Reference figure: should look identical
552+
ax_ref = fig_ref.subplots()
553+
ax_ref.scatter(x, y, c=c, s=50)
554+
ax_ref.set_xlim(0, 10)
555+
ax_ref.set_ylim(0, 10)
556+
557+
558+
@check_figures_equal(extensions=["pdf"])
559+
def test_scatter_mixed_onoff_axis(fig_test, fig_ref):
560+
"""
561+
Test scatter with some points on-axis and some off-axis.
562+
563+
Ensures the optimization correctly handles the common case where only
564+
some markers are outside the visible area.
565+
"""
566+
rng = np.random.default_rng(19680801)
567+
568+
# Create points: half on-axis (0-5), half off-axis (15-20)
569+
n_points = 50
570+
x_on = rng.random(n_points) * 5
571+
y_on = rng.random(n_points) * 5
572+
x_off = rng.random(n_points) * 5 + 15
573+
y_off = rng.random(n_points) * 5 + 15
574+
575+
x = np.concatenate([x_on, x_off])
576+
y = np.concatenate([y_on, y_off])
577+
c = rng.random(2 * n_points)
578+
579+
# Test figure: scatter with mixed points
580+
ax_test = fig_test.subplots()
581+
ax_test.scatter(x, y, c=c, s=50)
582+
ax_test.set_xlim(0, 10)
583+
ax_test.set_ylim(0, 10)
584+
585+
# Reference figure: only the on-axis points should be visible
586+
ax_ref = fig_ref.subplots()
587+
ax_ref.scatter(x_on, y_on, c=c[:n_points], s=50)
588+
ax_ref.set_xlim(0, 10)
589+
ax_ref.set_ylim(0, 10)

0 commit comments

Comments
 (0)