In [1]:
from IPython.display import display, SVG
from typing import Optional, Sequence
import dataclasses
import math

from typesetting import Font, Glyph, ParagraphItem, ParagraphItemType
from typesetting.cairo import context_show_text_glyphs

AttributeError: /lib/x86_64-linux-gnu/libraqm.so.0: undefined symbol: raqm_set_word_spacing_range

Source:

http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf

In [None]:
from faker import Faker
import textwrap
import hyphen
import uniseg.wordbreak

SOFT_HYPHEN = "\N{SOFT HYPHEN}"
def hyphenate(text: str):
    hyphenator = hyphen.Hyphenator()
    words = []
    for word in uniseg.wordbreak.words(text):
        syllables = hyphenator.syllables(word) if len(word) < 100 else [word]
        if "".join(syllables) == word:
            words.append(SOFT_HYPHEN.join(syllables))
        else:
            words.append(word)
    return "".join(words)

Faker.seed(0xdeadbeef)
lorem = hyphenate("".join([
    "Difficult Quick Foxes with 1034 boxes. ",
    Faker(["la"]).paragraph(50)
]))
print(f"Generated lorem ipsum:\n\n{textwrap.fill(lorem, 72)}")
print(f"\nRepr of first part:\n{textwrap.shorten(lorem, 72)!r}")

In [None]:
rm_face_path = "EBGaramond-VariableFont_wght.ttf"
rm_size = 12
rm_font = Font(rm_face_path, (rm_size, rm_size), features=["onum"])
rm_font.freetype_face.set_var_named_instance("Regular")

In [None]:
import uniseg.linebreak

lorem_feasible_breakpoints = set(uniseg.linebreak.line_break_boundaries(lorem))
lorem_feasible_breakpoints.remove(len(lorem))

rm_hyphen_glyphs = rm_font.shape("-")
rm_hyphen_width = sum(g.x_advance for g in rm_hyphen_glyphs)
lorem_glyphs = rm_font.shape(lorem)

In [None]:
lorem_glyphs[:10]

In [None]:
lorem_glyphs[-10:]

In [None]:
import math

SOFT_HYPHEN_PENALTY=50
INFINITY = math.inf

para_items: list[ParagraphItem] = []
para_item_glyphs: list[list[Glyph]] = []
for glyph in lorem_glyphs:
    item = None
    if glyph.cluster_code_point_index + len(glyph.cluster) in lorem_feasible_breakpoints:
        if glyph.cluster == SOFT_HYPHEN:
            item = ParagraphItem(
                item_type=ParagraphItemType.PENALTY,
                width=rm_hyphen_width,
                penalty=SOFT_HYPHEN_PENALTY,
                flagged=True,
            )
        else:
            assert glyph.cluster.isspace()
            item = ParagraphItem(
                item_type=ParagraphItemType.GLUE,
                width=glyph.x_advance,
                shrinkability=0.5*glyph.x_advance,
                stretchability=2.0*glyph.x_advance
            )
    else:
        item = ParagraphItem(
            item_type=ParagraphItemType.BOX,
            width=glyph.x_advance
        )
    assert item is not None
    
    para_items.append(item)
    if glyph.cluster == SOFT_HYPHEN:
        para_item_glyphs.append(rm_hyphen_glyphs)
    else:
        para_item_glyphs.append([glyph])

# Add finishing glue and forced break
para_items.append(ParagraphItem(
    item_type=ParagraphItemType.GLUE,
    stretchability=INFINITY,
))
para_item_glyphs.append([])
para_items.append(ParagraphItem(
    item_type=ParagraphItemType.PENALTY,
    penalty=-INFINITY,
    flagged=True,
))
para_item_glyphs.append([])

In [None]:
print("First few items:")
print("\n".join(textwrap.indent(f"{item!r}", "  ") for item in para_items[:10]))
print("Final few items:")
print("\n".join(textwrap.indent(f"{item!r}", "  ") for item in para_items[-10:]))

In [None]:
from collections import namedtuple

RunningSum = namedtuple("RunningSum", ["width", "shrinkability", "stretchability"])
def para_item_running_sums(para_items: Sequence[ParagraphItem]) -> Sequence[RunningSum]:
    previous_sum = RunningSum(0, 0, 0)
    sums = [previous_sum]
    for item in para_items:
        if item.item_type != ParagraphItemType.PENALTY:
            previous_sum = RunningSum(
                width=previous_sum.width + item.width,
                shrinkability=previous_sum.shrinkability + item.shrinkability,
                stretchability=previous_sum.stretchability + item.stretchability
            )
        sums.append(previous_sum)
    return sums

In [None]:
def greedy_breaks(para_items: Sequence[ParagraphItem], width: float) -> Sequence[int]:
    "Return sequence of indices in para_items for line breaks."
    sums = para_item_running_sums(para_items)
    current_start_idx = 0
    line_end_indxs = []
    for item_idx, item in enumerate(para_items):
        # Don't break lines at boxes.
        if item.item_type == ParagraphItemType.BOX:
            continue

        # Forced break
        if item.penalty <= -INFINITY:
            line_end_indxs.append(item_idx)
            current_start_idx = item_idx + 1
            continue

        # Compute natural width and total stretch/shrinkability
        line_width = sums[item_idx].width - sums[current_start_idx].width
        if item.item_type == ParagraphItemType.PENALTY:
            line_width += item.width
        line_shrinkability = sums[item_idx].shrinkability - sums[current_start_idx].shrinkability
        line_stretchability = sums[item_idx].stretchability - sums[current_start_idx].stretchability

        if line_width + line_stretchability < width:
            continue

        line_end_indxs.append(item_idx)
        current_start_idx = item_idx + 1
    return line_end_indxs

In [None]:
import cairo
import io

paper_size = (6 * 72, 5 * 72)
line_width = paper_size[0] - rm_font.em_size[0] * 2
line_break_idxs = greedy_breaks(para_items, line_width)

line_gap = rm_font.em_size[1] * 1.1
with io.BytesIO() as svg_file:
    with cairo.SVGSurface(svg_file, paper_size[0], paper_size[1]) as surface:
        ctx = cairo.Context(surface)

        # Fill background
        ctx.rectangle(0, 0, paper_size[0], paper_size[1])
        ctx.set_source_rgb(1, 1, 1)
        ctx.fill()

        # Render lines
        ctx.set_source_rgb(0, 0, 0)
        line_start_idx = 0
        for line_idx, line_end_idx in enumerate(line_break_idxs):
            ctx.move_to(rm_font.em_size[0], rm_font.em_size[1] + (line_idx + 1) * line_gap)

            items_and_glyphs = []
            for item_idx in range(line_start_idx, line_end_idx+1):
                item = para_items[item_idx]
                if item_idx == line_end_idx and item.item_type == ParagraphItemType.GLUE:
                    continue
                if item_idx != line_end_idx and item.item_type == ParagraphItemType.PENALTY:
                    continue
                items_and_glyphs.append((item, para_item_glyphs[item_idx]))

            # What delta needs to be taken up by glue
            natural_width = sum(item.width for item, _ in items_and_glyphs)
            delta = line_width - natural_width

            # Total slack for delta
            if delta > 0.0:
                total_slack = sum(item.stretchability for item, _ in items_and_glyphs)
                n_infinities = sum(1 if item.stretchability >= INFINITY else 0 for item, _ in items_and_glyphs)
            else:
                total_slack = sum(item.shrinkability for item, _ in items_and_glyphs)
                n_infinities = sum(1 if item.shrinkability >= INFINITY else 0 for item, _ in items_and_glyphs)

            if n_infinities > 0:
                total_slack = 0
                adjustment_ratio = 0
            else:
                adjustment_ratio = delta / total_slack
            print(adjustment_ratio)

            for item, glyphs in items_and_glyphs:
                context_show_text_glyphs(ctx, rm_font, glyphs)
                if item.item_type == ParagraphItemType.GLUE:
                    if delta > 0:
                        if item.stretchability >= INFINITY:
                            ctx.rel_move_to(delta / n_infinities, 0)
                        elif n_infinities == 0:
                            ctx.rel_move_to(delta * item.stretchability / total_slack, 0)
                    else:
                        if item.shrinkability >= INFINITY:
                            ctx.rel_move_to(-delta / n_infinities, 0)
                        elif n_infinities == 0:
                            ctx.rel_move_to(delta * item.shrinkability / total_slack, 0)
            line_start_idx = line_end_idx+1

    svg_data = svg_file.getvalue()
    
display(SVG(svg_data))