diff --git a/documentation/c07-settings.sil b/documentation/c07-settings.sil index f38579b11..88f26e445 100644 --- a/documentation/c07-settings.sil +++ b/documentation/c07-settings.sil @@ -185,6 +185,10 @@ true value) and also \em{unset} the setting \autodoc:setting{document.spaceskip} unset it, just call \autodoc:command{\set} with no \autodoc:parameter{value} parameter: \autodoc:command{\set[parameter=document.spaceskip]}. +Note that non-breaking spaces (U+00A0), following the guidelines of Unicode Annex 14 (UAX 14), are treated by default as stretchable or shrinkable akin to regular inter-word spaces, contributing to text justification and alignment for consistent layout. + +If you want to disable this behavior, the \autodoc:setting{languages.fixedNbsp} setting may be set to \code{true} to enforce fixed-width non-breaking spaces. + \subsection{Letter spacing settings} You can also put spaces in between \em{letters} with the \autodoc:setting{document.letterspaceglue} setting. diff --git a/languages/fr.lua b/languages/fr.lua index 5e3c25bc2..d086c815f 100644 --- a/languages/fr.lua +++ b/languages/fr.lua @@ -172,7 +172,7 @@ function SILE.nodeMakers.fr:mustRemove (i, items) -- Clear "manual" spaces we do not want, so that later we only have to -- insert the relevant kerns. local curr = items[i].text - if self:isSpace(curr) then + if self:isSpace(curr) or self:isNonBreakingSpace(curr) then if i < #items then local next = items[i+1].text if self:isSpace(next) diff --git a/languages/unicode.lua b/languages/unicode.lua index bf3ee26d3..8df26f646 100644 --- a/languages/unicode.lua +++ b/languages/unicode.lua @@ -2,6 +2,13 @@ local icu = require("justenoughicu") local chardata = require("char-def") +SILE.settings:declare({ + parameter = "languages.fixedNbsp", + type = "boolean", + default = false, + help = "Whether to treat U+00A0 (NO-BREAK SPACE) as a fixed-width space" +}) + SILE.nodeMakers.base = pl.class({ _init = function (self, options) @@ -43,6 +50,14 @@ SILE.nodeMakers.base = pl.class({ self.lastnode = "penalty" end, + makeNonBreakingSpace = function (self) + -- Unicode Line Breaking Algorithm (UAX 14) specifies that U+00A0 + -- (NO-BREAK SPACE) is expanded or compressed like a normal space. + coroutine.yield(SILE.nodefactory.kern(SILE.shaper:measureSpace(self.options))) + self.lastnode = "glue" + self.lasttype = "sp" + end, + iterator = function (_, _) SU.error("Abstract function nodemaker:iterator called", true) end, @@ -61,6 +76,15 @@ SILE.nodeMakers.base = pl.class({ return self.isSpaceType[self:charData(char).linebreak] end, + isNonBreakingSpace = function (self, char) + local c = self:charData(char) + return c.contextname and c.contextname == "nobreakspace" + end, + + isActiveNonBreakingSpace = function (self, char) + return self:isNonBreakingSpace(char) and not SILE.settings:get("languages.fixedNbsp") + end, + isBreaking = function (self, char) return self.isBreakingType[self:charData(char).linebreak] end, @@ -85,6 +109,9 @@ function SILE.nodeMakers.unicode:dealWith (item) if self:isSpace(item.text) then self:makeToken() self:makeGlue(item) + elseif self:isActiveNonBreakingSpace(item.text) then + self:makeToken() + self:makeNonBreakingSpace() elseif self:isBreaking(item.text) then self:addToken(char, item) self:makeToken() @@ -152,7 +179,11 @@ function SILE.nodeMakers.unicode:handleWordBreak (item) if self:isSpace(item.text) then -- Spacing word break self:makeGlue(item) - else -- a word break which isn't a space + elseif self:isActiveNonBreakingSpace(item.text) then + -- Non-breaking space word break + self:makeNonBreakingSpace() + else + -- a word break which isn't a space self:addToken(item.text, item) end end @@ -161,7 +192,7 @@ function SILE.nodeMakers.unicode:handleLineBreak (item, subtype) -- Because we are in charge of paragraphing, we -- will override space-type line breaks, and treat -- them just as ordinary word spaces. - if self:isSpace(item.text) then + if self:isSpace(item.text) or self:isActiveNonBreakingSpace(item.text) then self:handleWordBreak(item) return end diff --git a/tests/feat-unicode-nbsp.expected b/tests/feat-unicode-nbsp.expected new file mode 100644 index 000000000..fb6b2179d --- /dev/null +++ b/tests/feat-unicode-nbsp.expected @@ -0,0 +1,176 @@ +Set paper size 297.6377985 419.5275636 +Begin page +Mx 14.8819 +My 28.5447 +Set font Gentium Plus;10;400;;normal;;;LTR +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 45.5228 +T 76 83 86 88 80 w=25.2002 (ipsum) +Mx 74.9626 +T 71 82 79 82 85 w=21.9287 (dolor) +Mx 101.1308 +T 86 76 87 w=10.0146 (sit) +Mx 115.3850 +T 68 80 72 87 w=20.6836 (amet) +Mx 136.0686 +T 15 w=2.2900 (,) +Mx 142.5982 +T 86 76 87 w=10.0146 (sit) +Mx 156.8524 +T 68 80 72 87 w=20.6836 (amet) +Mx 181.7755 +T 68 71 76 83 76 86 70 76 81 74 w=41.8457 (adipiscing) +Mx 227.8608 +T 72 79 76 87 w=13.4814 (elit) +Mx 241.3422 +T 17 w=2.2900 (.) +Mx 247.8718 +T 54 72 71 w=14.5801 (Sed) +Mx 266.6915 +T 81 82 81 w=16.0645 (non) +Mx 14.8819 +My 40.5447 +T 85 76 86 88 86 w=19.6924 (risus) +Mx 34.5743 +T 17 w=2.2900 (.) +Mx 39.5220 +T 54 88 86 83 72 81 71 76 86 86 72 w=49.6094 (Suspendisse) +Mx 91.7890 +T 79 72 70 87 88 86 w=24.3018 (lectus) +Mx 118.7484 +T 87 82 85 87 82 85 w=24.8633 (tortor) +Mx 143.6117 +T 17 w=2.2900 (.) +Mx 14.8819 +My 58.5447 +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 49.1701 +T 76 83 86 88 80 w=25.2002 (ipsum) +Mx 82.2572 +T 71 82 79 82 85 w=21.9287 (dolor) +Mx 112.0727 +T 86 76 87 w=10.0146 (sit) +Mx 129.9742 +T 68 80 72 87 w=20.6836 (amet) +Mx 150.6578 +T 15 w=2.2900 (,) +Mx 160.8347 +T 70 82 81 86 72 70 87 72 87 88 85 w=48.5303 (consectetur) +Mx 217.2519 +T 68 71 76 83 76 86 70 76 81 74 w=41.8457 (adipiscing) +Mx 266.9844 +T 72 79 76 87 w=13.4814 (elit) +Mx 280.4659 +T 17 w=2.2900 (.) +Mx 14.8819 +My 70.5447 +T 54 72 71 w=14.5801 (Sed) +Mx 32.1157 +T 81 82 81 w=16.0645 (non) +Mx 50.8338 +T 85 76 86 88 86 w=19.6924 (risus) +Mx 70.5262 +T 17 w=2.2900 (.) +Mx 75.4699 +T 54 88 86 83 72 81 71 76 86 86 72 w=49.6094 (Suspendisse) +Mx 127.7330 +T 79 72 70 87 88 86 w=24.3018 (lectus) +Mx 154.6884 +T 87 82 85 87 82 85 w=24.8633 (tortor) +Mx 179.5517 +T 17 w=2.2900 (.) +Mx 14.8819 +My 82.5447 +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 43.1918 +T 76 83 86 88 80 w=25.2002 (ipsum) +Mx 70.3005 +T 71 82 79 82 85 w=21.9287 (dolor) +Mx 94.1377 +T 86 76 87 w=10.0146 (sit) +Mx 106.0609 +T 68 80 72 87 w=20.6836 (amet) +Mx 126.7445 +T 15 w=2.2900 (,) +Mx 130.9431 +T 86 76 87 w=10.0146 (sit) +Mx 142.8663 +T 68 80 72 87 w=20.6836 (amet) +Mx 165.4584 +T 68 71 76 83 76 86 70 76 81 74 w=41.8457 (adipiscing) +Mx 209.2126 +T 72 79 76 87 w=13.4814 (elit) +Mx 222.6941 +T 17 w=2.2900 (.) +Mx 226.8926 +T 54 72 71 w=14.5801 (Sed) +Mx 243.3812 +T 81 82 81 w=16.0645 (non) +Mx 261.3542 +T 85 76 86 88 86 w=19.6924 (risus) +Mx 281.0466 +T 17 w=2.2900 (.) +Mx 14.8819 +My 94.5447 +T 54 88 86 83 72 81 71 76 86 86 72 w=49.6094 (Suspendisse) +Mx 67.1516 +T 79 72 70 87 88 86 w=24.3018 (lectus) +Mx 94.1137 +T 87 82 85 87 82 85 w=24.8633 (tortor) +Mx 118.9770 +T 17 w=2.2900 (.) +Mx 14.8819 +My 106.5447 +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 53.6435 +T 76 83 86 88 80 w=25.2002 (ipsum) +Mx 91.2039 +T 71 82 79 82 85 w=21.9287 (dolor) +Mx 125.4928 +T 86 76 87 w=10.0146 (sit) +Mx 147.8677 +T 68 80 72 87 w=20.6836 (amet) +Mx 168.5513 +T 15 w=2.2900 (,) +Mx 183.2015 +T 86 76 87 w=10.0146 (sit) +Mx 205.5764 +T 68 80 72 87 w=20.6836 (amet) +Mx 238.6202 +T 68 71 76 83 76 86 70 76 81 74 w=41.8457 (adipiscing) +Mx 280.4659 +T 17 w=2.2900 (.) +Mx 14.8819 +My 118.5447 +T 54 88 86 83 72 81 71 76 86 86 72 w=49.6094 (Suspendisse) +Mx 67.1516 +T 79 72 70 87 88 86 w=24.3018 (lectus) +Mx 94.1137 +T 87 82 85 87 82 85 w=24.8633 (tortor) +Mx 118.9770 +T 17 w=2.2900 (.) +Mx 14.8819 +My 136.5447 +T 41 85 68 81 111 68 76 86 w=34.3799 (Français) +Mx 50.5831 +T 34 w=4.3311 (?) +Mx 57.5711 +T 41 85 68 81 111 68 76 w=30.5176 (Françai) +Mx 88.0887 +T 86 w=3.8623 (s) +Mx 94.6080 +T 29 w=2.2900 (:) +Mx 99.5550 +T 169 w=4.8535 («) +Mx 106.5269 +T 41 85 68 81 111 w=23.2178 (Franç) +Mx 129.7447 +T 68 w=4.5898 (a) +Mx 134.3345 +T 76 86 w=6.5723 (is) +Mx 142.2281 +T 4 w=2.7197 (!) +Mx 147.0662 +T 170 w=4.8535 (») +End page +Finish diff --git a/tests/feat-unicode-nbsp.sil b/tests/feat-unicode-nbsp.sil new file mode 100644 index 000000000..f4ce0f5c2 --- /dev/null +++ b/tests/feat-unicode-nbsp.sil @@ -0,0 +1,33 @@ +\begin[papersize=a6]{document} +\neverindent +\nofolios +\language[main=und] +% Reference text +Lorem ipsum dolor sit amet, sit amet adipiscing elit. +Sed non risus. Suspendisse lectus tortor. +\medskip +% Four first spaces are U+00A0. +% Expect proper stretchability as regular spaces. +Lorem ipsum dolor sit amet, consectetur adipiscing elit.\break +Sed non risus. Suspendisse lectus tortor. + +% All spaces are U+00A0 but two: +% Expectation: +% Expect non-breakable spaces, shrinking as regular spaces +% when necessary. +Lorem ipsum dolor sit amet, sit amet adipiscing elit. +Sed non risus. Suspendisse lectus tortor. + +\set[parameter=linebreak.emergencyStretch, value=20em] +Lorem ipsum dolor sit amet, sit amet adipiscing. +Suspendisse lectus tortor. + +% Punctiation spaces are U+00A0 in the following case. +% Expectation: +% French punctuation spaces are managed automatically, +% also replacing U+00A0. +\medskip +\language[main=fr] +Français ? Français : « Français ! » + +\end{document} diff --git a/tests/sura-2.expected b/tests/sura-2.expected index 6a755a023..06bb3cd0e 100644 --- a/tests/sura-2.expected +++ b/tests/sura-2.expected @@ -7,23 +7,20 @@ Mx 9.6600 Mx 4.7600 Mx 4.3400 T 121 x=3.9000 y=-0.1800 1294 a=9.6600 121 x=1.9600 y=0.5000 1121 a=4.7600 43 a=4.3400 (الۤمۤ) -Mx 238.1559 -T 22 w=5.8400 ( ) +Mx 235.3368 Set font Amiri Quran;20;400;;normal;;;LTR T 1263 a=5.8400 x=-15.7200 (١) -Mx 212.5759 +Mx 209.7568 T 114 w=25.5800 (۝) -Mx 193.4531 +Mx 192.0976 Set font Amiri Quran;20;400;;normal;;;RTL Mx 9.0000 T 77 x=-1.2800 y=-0.2600 52 a=9.0000 (ذَ) -Mx 189.2931 -T 1257 w=4.1600 ( ٰ) -Mx 169.2331 +Mx 163.3785 Mx 16.9000 Mx 3.1600 T 77 x=1.6600 y=-0.2600 1181 a=16.9000 79 x=-3.2200 1178 a=3.1600 (لِكَ) -Mx 107.4303 +Mx 103.0393 Mx 17.6600 Mx 5.8400 Mx 4.8800 @@ -32,11 +29,11 @@ Mx 3.7000 Mx 3.5000 Mx 4.3400 T 78 x=3.7200 y=-0.3800 472 a=17.6600 1258 a=5.8400 77 x=-3.7200 y=-0.2600 491 a=4.8800 79 x=0.7200 523 a=11.7600 118 x=0.8400 y=-0.2800 1295 a=3.7000 525 a=3.5000 102 a=4.3400 (ٱلۡكِتَـٰبُ) -Mx 84.4675 +Mx 81.5402 Mx 6.8000 Mx 6.0400 T 679 a=6.8000 77 x=-1.4800 y=-0.2600 677 a=6.0400 (لَا) -Mx 43.3647 +Mx 41.9010 Mx 15.4400 Mx 5.9000 Mx 9.6400 @@ -52,7 +49,7 @@ Mx 15.2800 Mx 9.4200 Mx 9.0400 T 72 a=15.2800 141 x=-0.3400 y=-0.4000 497 a=9.4200 78 x=-2.5800 y=-0.3800 519 a=9.0400 (هُدࣰى) -Mx 197.3824 +Mx 198.4611 Mx 10.8400 Mx 4.3600 Mx 7.5600 @@ -61,13 +58,12 @@ Mx 4.7400 Mx 4.8600 Mx 3.1600 T 77 x=-0.2400 y=-0.2600 878 a=10.8400 877 a=4.3600 79 x=-2.0000 505 a=7.5600 369 x=1.5400 y=0.2200 80 x=-2.5000 y=-0.4600 491 a=4.8800 78 x=-3.4400 y=-0.3800 802 a=4.7200 118 x=-1.5600 y=-0.2800 1188 a=5.7000 x=-0.8400 79 x=-3.2200 80 x=-1.9600 y=-0.4600 1178 a=3.1600 (لِّلۡمُتَّقِینَ) -Mx 191.5424 -T 22 w=5.8400 ( ) +Mx 188.3063 Set font Amiri Quran;20;400;;normal;;;LTR T 1264 a=5.8400 x=-15.7200 (٢) -Mx 165.9624 +Mx 162.7263 T 114 w=25.5800 (۝) -Mx 123.6689 +Mx 121.5115 Set font Amiri Quran;20;400;;normal;;;RTL Mx 11.2400 Mx 2.9000 @@ -75,7 +71,7 @@ Mx 9.7600 Mx 2.8200 Mx 4.3400 T 77 x=1.0600 y=-0.2600 726 a=11.2400 725 a=2.9000 79 x=-0.9000 1071 a=9.7600 369 x=-0.8600 y=0.2200 80 x=-4.9000 y=-0.4600 1067 a=2.8200 102 a=4.3400 (ٱلَّذِینَ) -Mx 70.7354 +Mx 69.6567 Mx 11.4400 Mx 6.9000 Mx 4.8800 @@ -101,7 +97,7 @@ Mx 7.5600 Mx 3.8000 Mx 10.4400 T 77 y=-0.2600 69 a=11.4400 555 a=6.9000 78 x=-1.6000 y=-0.3800 529 a=9.2600 495 a=4.8800 79 x=-2.0000 505 a=7.5600 78 x=-4.3200 y=-0.3800 484 a=3.8000 77 x=1.2600 y=-0.2600 71 a=8.0000 x=2.4400 (وَیُقِیمُونَ) -Mx 180.8874 +Mx 181.0131 Mx 7.3800 Mx 6.4000 Mx 5.0600 @@ -109,13 +105,13 @@ Mx 14.4400 Mx 3.5000 Mx 4.3400 T 77 x=-1.7800 y=-0.2600 45 a=7.3800 101 x=0.0400 y=-9.6800 577 a=6.4000 77 x=-4.1200 y=-0.2600 575 a=5.0600 369 x=7.1000 y=0.2200 80 x=3.0600 y=-0.4600 541 a=14.4400 525 a=3.5000 102 a=4.3400 (ٱلصَّلَوٰةَ) -Mx 152.7789 +Mx 153.0303 Mx 4.5800 Mx 3.4600 Mx 5.6000 Mx 8.0000 T 459 a=4.5800 369 x=1.8400 y=0.2200 80 x=-2.2000 y=-0.4600 1091 a=3.4600 79 x=-1.5600 1090 a=5.6000 77 x=-1.1800 y=-0.2600 71 a=8.0000 (وَمِمَّا) -Mx 95.4904 +Mx 95.8675 Mx 10.4400 Mx 7.8000 Mx 5.8400 @@ -124,7 +120,7 @@ Mx 5.9000 Mx 7.9800 Mx 7.9800 T 118 x=0.9800 y=-0.2800 527 a=10.4400 78 x=-1.6000 y=-0.3800 520 a=7.8000 1258 a=5.8400 77 x=-3.7200 y=-0.2600 490 a=4.8800 118 x=-2.0800 y=-0.2800 501 a=5.9000 77 x=-1.4800 y=-0.2600 54 a=7.9800 77 x=-1.4800 y=-0.2600 53 a=7.9800 (رَزَقۡنَـٰهُمۡ) -Mx 46.3019 +Mx 46.8047 Mx 11.4400 Mx 6.9000 Mx 7.5600 @@ -133,7 +129,6 @@ Mx 3.3600 Mx 5.9000 T 77 y=-0.2600 69 a=11.4400 555 a=6.9000 78 x=-1.1000 y=-0.3800 505 a=7.5600 79 x=-2.0000 506 a=7.5600 688 a=3.3600 78 x=-3.0600 y=-0.3800 825 a=5.9000 (یُنفِقُونَ) Mx 40.4619 -T 22 w=5.8400 ( ) Set font Amiri Quran;20;400;;normal;;;LTR T 1265 a=5.8400 x=-15.7200 (٣) Mx 14.8819 @@ -195,7 +190,7 @@ Mx 3.9800 Mx 4.8800 Mx 5.9000 T 77 x=1.9400 y=-0.2600 1186 a=16.2600 79 x=-5.2200 1183 a=3.9800 118 x=-1.9800 y=-0.2800 488 a=4.8800 77 x=-3.8200 y=-0.2600 501 a=5.9000 (قَبۡلِكَ) -Mx 182.0124 +Mx 183.0491 Mx 7.3800 Mx 5.9800 Mx 11.5800 @@ -205,11 +200,11 @@ Mx 5.5600 Mx 5.7000 Mx 9.6600 T 79 x=-1.2200 45 a=7.3800 77 x=-1.4400 y=-0.2600 884 a=5.9800 79 x=0.5400 880 a=11.5800 679 a=6.8000 77 x=-5.3600 y=-0.2600 1260 118 x=0.2600 y=-0.2800 677 a=6.0400 462 a=5.5600 79 x=-2.2400 1012 a=5.7000 77 x=0.4800 y=-0.2600 71 a=8.0000 x=1.6600 (وَبِٱلۡءَاخِرَةِ) -Mx 151.5089 +Mx 153.5823 Mx 10.4400 Mx 9.0400 T 118 x=0.9800 y=-0.2800 527 a=10.4400 78 x=-2.5800 y=-0.3800 519 a=9.0400 (هُمۡ) -Mx 100.6654 +Mx 103.7755 Mx 11.4400 Mx 6.9000 Mx 4.8800 @@ -217,11 +212,10 @@ Mx 5.9000 Mx 6.9000 Mx 3.8000 T 77 y=-0.2600 69 a=11.4400 555 a=6.9000 78 x=-3.8400 y=-0.3800 490 a=4.8800 79 x=-2.3000 501 a=5.9000 555 a=6.9000 78 x=-4.3200 y=-0.3800 484 a=3.8000 (یُوقِنُونَ) -Mx 94.8254 -T 22 w=5.8400 ( ) +Mx 93.7887 Set font Amiri Quran;20;400;;normal;;;LTR T 1266 a=5.8400 x=-15.7200 (٤) -Mx 69.2454 +Mx 68.2087 T 114 w=25.5800 (۝) Mx 14.8819 Set font Amiri Quran;20;400;;normal;;;RTL @@ -277,11 +271,10 @@ Mx 9.2600 Mx 3.5000 Mx 4.3400 T 77 y=-0.2600 69 a=11.4400 555 a=6.9000 78 x=0.8400 y=-0.3800 515 a=12.2000 79 x=-3.3600 526 a=3.9400 118 x=0.7600 y=-0.2800 506 a=7.5600 78 x=-1.6000 y=-0.3800 529 a=9.2600 118 x=-2.8600 y=-0.2800 525 a=3.5000 102 a=4.3400 (ٱلۡمُفۡلِحُونَ) -Mx 10.9879 -T 22 w=5.8400 ( ) +Mx 11.7666 Set font Amiri Quran;20;400;;normal;;;LTR T 1267 a=5.8400 x=-15.7200 (٥) -Mx -14.5921 +Mx -13.8134 T 114 w=25.5800 (۝) Mx 266.9759 My 131.0364 @@ -335,7 +328,7 @@ My 206.0364 Mx 9.6600 Mx 4.7600 T 118 x=0.2600 y=-0.2800 1294 a=9.6600 77 x=-3.4200 y=0.4200 1121 a=4.7600 (لَمۡ) -Mx 214.6788 +Mx 214.6791 Mx 10.4400 Mx 9.0400 Mx 7.9800 @@ -343,11 +336,11 @@ Mx 9.4200 Mx 3.3600 Mx 6.3800 T 118 x=0.9800 y=-0.2800 527 a=10.4400 78 x=-2.5800 y=-0.3800 519 a=9.0400 118 x=0.2600 y=-0.2800 53 a=7.9800 79 x=-0.3400 496 a=9.4200 688 a=3.3600 78 x=-3.0600 y=-0.3800 821 a=6.3800 (تُنذِرۡهُمۡ) -Mx 194.8016 +Mx 194.8023 Mx 6.8000 Mx 6.0400 T 679 a=6.8000 77 x=-1.4800 y=-0.2600 677 a=6.0400 (لَا) -Mx 146.0645 +Mx 146.0656 Mx 11.4400 Mx 6.9000 Mx 4.8800 @@ -355,11 +348,10 @@ Mx 7.7800 Mx 6.9000 Mx 3.8000 T 77 y=-0.2600 69 a=11.4400 555 a=6.9000 78 x=-3.8400 y=-0.3800 490 a=4.8800 79 x=-1.3200 528 a=7.7800 118 x=-0.7200 y=-0.2800 554 a=6.9000 78 x=-4.3200 y=-0.3800 484 a=3.8000 (یُؤۡمِنُونَ) -Mx 140.2245 -T 22 w=5.8400 ( ) +Mx 139.0288 Set font Amiri Quran;20;400;;normal;;;LTR T 1268 a=5.8400 x=-15.7200 (٦) -Mx 114.6445 +Mx 113.4488 T 114 w=25.5800 (۝) End page Finish