Skip to content

Commit

Permalink
Merge 8dd519c into 21c0a33
Browse files Browse the repository at this point in the history
  • Loading branch information
Omikhleia committed Dec 16, 2023
2 parents 21c0a33 + 8dd519c commit 9959959
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 39 deletions.
4 changes: 4 additions & 0 deletions documentation/c07-settings.sil
Expand Up @@ -185,6 +185,10 @@ true value) and also \em{unset} the setting \autodoc:setting{document.spaceskip}
unset it, just call \autodoc:command{\set} with no \autodoc:parameter{value} parameter:
\autodoc:command{\set[parameter=document.spaceskip]}.

Note that non-breaking spaces (U+00A0), following the guidelines of Unicode Annex 14 (UAX 14), are treated by default as stretchable or shrinkable akin to regular inter-word spaces, contributing to text justification and alignment for consistent layout.

If you want to disable this behavior, the \autodoc:setting{languages.fixedNbsp} setting may be set to \code{true} to enforce fixed-width non-breaking spaces.

\subsection{Letter spacing settings}

You can also put spaces in between \em{letters} with the \autodoc:setting{document.letterspaceglue} setting.
Expand Down
2 changes: 1 addition & 1 deletion languages/fr.lua
Expand Up @@ -172,7 +172,7 @@ function SILE.nodeMakers.fr:mustRemove (i, items)
-- Clear "manual" spaces we do not want, so that later we only have to
-- insert the relevant kerns.
local curr = items[i].text
if self:isSpace(curr) then
if self:isSpace(curr) or self:isNonBreakingSpace(curr) then
if i < #items then
local next = items[i+1].text
if self:isSpace(next)
Expand Down
35 changes: 33 additions & 2 deletions languages/unicode.lua
Expand Up @@ -2,6 +2,13 @@ local icu = require("justenoughicu")

local chardata = require("char-def")

SILE.settings:declare({
parameter = "languages.fixedNbsp",
type = "boolean",
default = false,
help = "Whether to treat U+00A0 (NO-BREAK SPACE) as a fixed-width space"
})

SILE.nodeMakers.base = pl.class({

_init = function (self, options)
Expand Down Expand Up @@ -43,6 +50,14 @@ SILE.nodeMakers.base = pl.class({
self.lastnode = "penalty"
end,

makeNonBreakingSpace = function (self)
-- Unicode Line Breaking Algorithm (UAX 14) specifies that U+00A0
-- (NO-BREAK SPACE) is expanded or compressed like a normal space.
coroutine.yield(SILE.nodefactory.kern(SILE.shaper:measureSpace(self.options)))
self.lastnode = "glue"
self.lasttype = "sp"
end,

iterator = function (_, _)
SU.error("Abstract function nodemaker:iterator called", true)
end,
Expand All @@ -61,6 +76,15 @@ SILE.nodeMakers.base = pl.class({
return self.isSpaceType[self:charData(char).linebreak]
end,

isNonBreakingSpace = function (self, char)
local c = self:charData(char)
return c.contextname and c.contextname == "nobreakspace"
end,

isActiveNonBreakingSpace = function (self, char)
return self:isNonBreakingSpace(char) and not SILE.settings:get("languages.fixedNbsp")
end,

isBreaking = function (self, char)
return self.isBreakingType[self:charData(char).linebreak]
end,
Expand All @@ -85,6 +109,9 @@ function SILE.nodeMakers.unicode:dealWith (item)
if self:isSpace(item.text) then
self:makeToken()
self:makeGlue(item)
elseif self:isActiveNonBreakingSpace(item.text) then
self:makeToken()
self:makeNonBreakingSpace()
elseif self:isBreaking(item.text) then
self:addToken(char, item)
self:makeToken()
Expand Down Expand Up @@ -152,7 +179,11 @@ function SILE.nodeMakers.unicode:handleWordBreak (item)
if self:isSpace(item.text) then
-- Spacing word break
self:makeGlue(item)
else -- a word break which isn't a space
elseif self:isActiveNonBreakingSpace(item.text) then
-- Non-breaking space word break
self:makeNonBreakingSpace()
else
-- a word break which isn't a space
self:addToken(item.text, item)
end
end
Expand All @@ -161,7 +192,7 @@ function SILE.nodeMakers.unicode:handleLineBreak (item, subtype)
-- Because we are in charge of paragraphing, we
-- will override space-type line breaks, and treat
-- them just as ordinary word spaces.
if self:isSpace(item.text) then
if self:isSpace(item.text) or self:isActiveNonBreakingSpace(item.text) then
self:handleWordBreak(item)
return
end
Expand Down
176 changes: 176 additions & 0 deletions tests/feat-unicode-nbsp.expected
@@ -0,0 +1,176 @@
Set paper size 297.6377985 419.5275636
Begin page
Mx 14.8819
My 28.5447
Set font Gentium Plus;10;400;;normal;;;LTR
T 47 82 85 72 80 w=26.4014 (Lorem)
Mx 45.5228
T 76 83 86 88 80 w=25.2002 (ipsum)
Mx 74.9626
T 71 82 79 82 85 w=21.9287 (dolor)
Mx 101.1308
T 86 76 87 w=10.0146 (sit)
Mx 115.3850
T 68 80 72 87 w=20.6836 (amet)
Mx 136.0686
T 15 w=2.2900 (,)
Mx 142.5982
T 86 76 87 w=10.0146 (sit)
Mx 156.8524
T 68 80 72 87 w=20.6836 (amet)
Mx 181.7755
T 68 71 76 83 76 86 70 76 81 74 w=41.8457 (adipiscing)
Mx 227.8608
T 72 79 76 87 w=13.4814 (elit)
Mx 241.3422
T 17 w=2.2900 (.)
Mx 247.8718
T 54 72 71 w=14.5801 (Sed)
Mx 266.6915
T 81 82 81 w=16.0645 (non)
Mx 14.8819
My 40.5447
T 85 76 86 88 86 w=19.6924 (risus)
Mx 34.5743
T 17 w=2.2900 (.)
Mx 39.5220
T 54 88 86 83 72 81 71 76 86 86 72 w=49.6094 (Suspendisse)
Mx 91.7890
T 79 72 70 87 88 86 w=24.3018 (lectus)
Mx 118.7484
T 87 82 85 87 82 85 w=24.8633 (tortor)
Mx 143.6117
T 17 w=2.2900 (.)
Mx 14.8819
My 58.5447
T 47 82 85 72 80 w=26.4014 (Lorem)
Mx 49.1701
T 76 83 86 88 80 w=25.2002 (ipsum)
Mx 82.2572
T 71 82 79 82 85 w=21.9287 (dolor)
Mx 112.0727
T 86 76 87 w=10.0146 (sit)
Mx 129.9742
T 68 80 72 87 w=20.6836 (amet)
Mx 150.6578
T 15 w=2.2900 (,)
Mx 160.8347
T 70 82 81 86 72 70 87 72 87 88 85 w=48.5303 (consectetur)
Mx 217.2519
T 68 71 76 83 76 86 70 76 81 74 w=41.8457 (adipiscing)
Mx 266.9844
T 72 79 76 87 w=13.4814 (elit)
Mx 280.4659
T 17 w=2.2900 (.)
Mx 14.8819
My 70.5447
T 54 72 71 w=14.5801 (Sed)
Mx 32.1157
T 81 82 81 w=16.0645 (non)
Mx 50.8338
T 85 76 86 88 86 w=19.6924 (risus)
Mx 70.5262
T 17 w=2.2900 (.)
Mx 75.4699
T 54 88 86 83 72 81 71 76 86 86 72 w=49.6094 (Suspendisse)
Mx 127.7330
T 79 72 70 87 88 86 w=24.3018 (lectus)
Mx 154.6884
T 87 82 85 87 82 85 w=24.8633 (tortor)
Mx 179.5517
T 17 w=2.2900 (.)
Mx 14.8819
My 82.5447
T 47 82 85 72 80 w=26.4014 (Lorem)
Mx 43.1918
T 76 83 86 88 80 w=25.2002 (ipsum)
Mx 70.3005
T 71 82 79 82 85 w=21.9287 (dolor)
Mx 94.1377
T 86 76 87 w=10.0146 (sit)
Mx 106.0609
T 68 80 72 87 w=20.6836 (amet)
Mx 126.7445
T 15 w=2.2900 (,)
Mx 130.9431
T 86 76 87 w=10.0146 (sit)
Mx 142.8663
T 68 80 72 87 w=20.6836 (amet)
Mx 165.4584
T 68 71 76 83 76 86 70 76 81 74 w=41.8457 (adipiscing)
Mx 209.2126
T 72 79 76 87 w=13.4814 (elit)
Mx 222.6941
T 17 w=2.2900 (.)
Mx 226.8926
T 54 72 71 w=14.5801 (Sed)
Mx 243.3812
T 81 82 81 w=16.0645 (non)
Mx 261.3542
T 85 76 86 88 86 w=19.6924 (risus)
Mx 281.0466
T 17 w=2.2900 (.)
Mx 14.8819
My 94.5447
T 54 88 86 83 72 81 71 76 86 86 72 w=49.6094 (Suspendisse)
Mx 67.1516
T 79 72 70 87 88 86 w=24.3018 (lectus)
Mx 94.1137
T 87 82 85 87 82 85 w=24.8633 (tortor)
Mx 118.9770
T 17 w=2.2900 (.)
Mx 14.8819
My 106.5447
T 47 82 85 72 80 w=26.4014 (Lorem)
Mx 53.6435
T 76 83 86 88 80 w=25.2002 (ipsum)
Mx 91.2039
T 71 82 79 82 85 w=21.9287 (dolor)
Mx 125.4928
T 86 76 87 w=10.0146 (sit)
Mx 147.8677
T 68 80 72 87 w=20.6836 (amet)
Mx 168.5513
T 15 w=2.2900 (,)
Mx 183.2015
T 86 76 87 w=10.0146 (sit)
Mx 205.5764
T 68 80 72 87 w=20.6836 (amet)
Mx 238.6202
T 68 71 76 83 76 86 70 76 81 74 w=41.8457 (adipiscing)
Mx 280.4659
T 17 w=2.2900 (.)
Mx 14.8819
My 118.5447
T 54 88 86 83 72 81 71 76 86 86 72 w=49.6094 (Suspendisse)
Mx 67.1516
T 79 72 70 87 88 86 w=24.3018 (lectus)
Mx 94.1137
T 87 82 85 87 82 85 w=24.8633 (tortor)
Mx 118.9770
T 17 w=2.2900 (.)
Mx 14.8819
My 136.5447
T 41 85 68 81 111 68 76 86 w=34.3799 (Français)
Mx 50.5831
T 34 w=4.3311 (?)
Mx 57.5711
T 41 85 68 81 111 68 76 w=30.5176 (Françai)
Mx 88.0887
T 86 w=3.8623 (s)
Mx 94.6080
T 29 w=2.2900 (:)
Mx 99.5550
T 169 w=4.8535 («)
Mx 106.5269
T 41 85 68 81 111 w=23.2178 (Franç)
Mx 129.7447
T 68 w=4.5898 (a)
Mx 134.3345
T 76 86 w=6.5723 (is)
Mx 142.2281
T 4 w=2.7197 (!)
Mx 147.0662
T 170 w=4.8535 (»)
End page
Finish
33 changes: 33 additions & 0 deletions tests/feat-unicode-nbsp.sil
@@ -0,0 +1,33 @@
\begin[papersize=a6]{document}
\neverindent
\nofolios
\language[main=und]
% Reference text
Lorem ipsum dolor sit amet, sit amet adipiscing elit.
Sed non risus. Suspendisse lectus tortor.
\medskip
% Four first spaces are U+00A0.
% Expect proper stretchability as regular spaces.
Lorem ipsum dolor sit amet, consectetur adipiscing elit.\break
Sed non risus. Suspendisse lectus tortor.

% All spaces are U+00A0 but two:
% Expectation:
% Expect non-breakable spaces, shrinking as regular spaces
% when necessary.
Lorem ipsum dolor sit amet, sit amet adipiscing elit.
Sed non risus. Suspendisse lectus tortor.

\set[parameter=linebreak.emergencyStretch, value=20em]
Lorem ipsum dolor sit amet, sit amet adipiscing.
Suspendisse lectus tortor.

% Punctiation spaces are U+00A0 in the following case.
% Expectation:
% French punctuation spaces are managed automatically,
% also replacing U+00A0.
\medskip
\language[main=fr]
Français ? Français : « Français ! »

\end{document}

0 comments on commit 9959959

Please sign in to comment.