Skip to content

Commit dd0b1b5

Browse files
Omikhleiaalerque
authored andcommitted
feat(math): Minimal support for accents in MathML and TeX-like commands
1 parent 41c3ad8 commit dd0b1b5

File tree

7 files changed

+352
-42
lines changed

7 files changed

+352
-42
lines changed

packages/math/atoms.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ local atomType = {
1414
over = 8, -- Unused for now (used for overlines etc. in The TeXbook)
1515
under = 9, -- Unused for now (used for underlines etc. in The TeXbook)
1616
accent = 10,
17-
botaccent = 11, -- Unused for now but botaccent is encoded in our dictionary
17+
botaccent = 11,
1818
}
1919

2020
return { types = atomType }

packages/math/base-elements.lua

Lines changed: 78 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,18 @@ local function isNotEmpty (element)
732732
return element and (element:is_a(elements.terminal) or #element.children > 0)
733733
end
734734

735+
local function getAccentMode (mode)
736+
-- Size unchanged but leave display mode
737+
-- See MathML Core §3.4.3
738+
if mode == mathMode.display then
739+
return mathMode.text
740+
end
741+
if mode == mathMode.displayCramped then
742+
return mathMode.textCramped
743+
end
744+
return mode
745+
end
746+
735747
local function unwrapSingleElementMrow (elt)
736748
-- CODE SMELL.
737749
-- For \overset or \underset in LaTeX, MathML would use <mover> or <munder>.
@@ -748,10 +760,13 @@ local function unwrapSingleElementMrow (elt)
748760
end
749761
end
750762

751-
function elements.underOver:_init (base, sub, sup)
763+
function elements.underOver:_init (attributes, base, sub, sup)
752764
elements.mbox._init(self)
753765
base = unwrapSingleElementMrow(base)
754766
self.atom = base.atom
767+
self.attributes = attributes or {}
768+
self.attributes.accent = SU.boolean(self.attributes.accent, false)
769+
self.attributes.accentunder = SU.boolean(self.attributes.accentunder, false)
755770
self.base = base
756771
self.sub = isNotEmpty(sub) and sub or nil
757772
self.sup = isNotEmpty(sup) and sup or nil
@@ -771,10 +786,10 @@ function elements.underOver:styleChildren ()
771786
self.base.mode = self.mode
772787
end
773788
if self.sub then
774-
self.sub.mode = getSubscriptMode(self.mode)
789+
self.sub.mode = self.attributes.accentunder and getAccentMode(self.mode) or getSubscriptMode(self.mode)
775790
end
776791
if self.sup then
777-
self.sup.mode = getSuperscriptMode(self.mode)
792+
self.sup.mode = self.attributes.accent and getAccentMode(self.mode) or getSuperscriptMode(self.mode)
778793
end
779794
end
780795

@@ -816,7 +831,10 @@ function elements.underOver:_stretchyReshapeToBase (part)
816831
end
817832

818833
function elements.underOver:shape ()
834+
local constants = self:getMathMetrics().constants
835+
local scaleDown = self:getScaleDown()
819836
local isMovableLimits = SU.boolean(self.base and self.base.movablelimits, false)
837+
local itCorr = self:calculateItalicsCorrection() * scaleDown
820838
if not (self.mode == mathMode.display or self.mode == mathMode.displayCramped) and isMovableLimits then
821839
-- When the base is a movable limit, the under/over scripts are not placed under/over the base,
822840
-- but other to the right of it, when display mode is not used.
@@ -827,32 +845,54 @@ function elements.underOver:shape ()
827845
elements.subscript.shape(self)
828846
return
829847
end
830-
local constants = self:getMathMetrics().constants
831-
local scaleDown = self:getScaleDown()
832848
-- Determine relative Ys
833849
if self.base then
834850
self.base.relY = SILE.types.length(0)
835851
end
836852
if self.sub then
837853
self:_stretchyReshapeToBase(self.sub)
838-
self.sub.relY = self.base.depth
839-
+ SILE.types.length(
840-
math.max(
841-
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber(),
842-
constants.lowerLimitBaselineDropMin * scaleDown
854+
-- TODO These rules are incomplete and even wrong if we were to fully implement MathML Core.
855+
if self.attributes.accentunder then
856+
self.sub.relY = self.base.depth
857+
+ SILE.types.length(
858+
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber()
859+
-- We assume that the accent is aligned on the base.
843860
)
844-
)
861+
else
862+
self.sub.relY = self.base.depth
863+
+ SILE.types.length(
864+
math.max(
865+
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber(),
866+
constants.lowerLimitBaselineDropMin * scaleDown
867+
)
868+
)
869+
end
845870
end
846871
if self.sup then
847872
self:_stretchyReshapeToBase(self.sup)
848-
self.sup.relY = 0
849-
- self.base.height
850-
- SILE.types.length(
851-
math.max(
852-
(constants.upperLimitGapMin * scaleDown + self.sup.depth):tonumber(),
853-
constants.upperLimitBaselineRiseMin * scaleDown
873+
-- TODO These rules are incomplete if we were to fully implement MathML Core.
874+
if self.attributes.accent then
875+
self.sup.relY = 0 - self.base.height
876+
-- MathML Core wants to align on the accentBaseHeight...
877+
local overShift = math.max(0, constants.accentBaseHeight * scaleDown - self.base.height:tonumber())
878+
self.sup.relY = self.sup.relY - SILE.types.length(overShift)
879+
-- HACK: .... but improperly dimensioned accents can overshoot the base glyph.
880+
-- So we try some guesswork to correct this.
881+
-- Typically some non-combining symbols are in this case...
882+
local heuristics = 0.5 * constants.flattenedAccentBaseHeight + 0.5 * constants.accentBaseHeight
883+
if self.sup.height > SILE.types.length(heuristics * scaleDown) then
884+
self.sup.relY = self.sup.relY + SILE.types.length(constants.accentBaseHeight * scaleDown)
885+
end
886+
else
887+
self.sup.relY = 0
888+
- self.base.height
889+
- SILE.types.length(
890+
math.max(
891+
(constants.upperLimitGapMin * scaleDown + self.sup.depth):tonumber(),
892+
constants.upperLimitBaselineRiseMin * scaleDown
893+
)
854894
)
855-
)
895+
end
856896
end
857897
-- Determine relative Xs based on widest symbol
858898
local widest, a, b
@@ -893,7 +933,6 @@ function elements.underOver:shape ()
893933
if b then
894934
b.relX = c - b.width / 2
895935
end
896-
local itCorr = self:calculateItalicsCorrection() * scaleDown
897936
if self.sup then
898937
self.sup.relX = self.sup.relX + itCorr / 2
899938
end
@@ -1201,7 +1240,10 @@ end
12011240
function elements.text:_vertStretchyReshape (depth, height)
12021241
local hasStretched = self:_stretchyReshape(depth + height, true)
12031242
if hasStretched then
1204-
-- HACK: see output routine
1243+
-- RESCALING HACK: see output routine
1244+
-- We only do it if the scaling logic found constructions on the vertical block axis.
1245+
-- It's a dirty hack until we properly implement assembly of glyphs in the case we couldn't
1246+
-- find a big enough variant.
12051247
self.vertExpectedSz = height + depth
12061248
self.vertScalingRatio = (depth + height):tonumber() / (self.height:tonumber() + self.depth:tonumber())
12071249
self.height = height
@@ -1212,12 +1254,21 @@ end
12121254

12131255
function elements.text:_horizStretchyReshape (width)
12141256
local hasStretched = self:_stretchyReshape(width, false)
1215-
if hasStretched then
1216-
-- HACK: see output routine
1217-
self.horizScalingRatio = width:tonumber() / self.width:tonumber()
1218-
self.width = width
1219-
end
1220-
return hasStretched
1257+
if not hasStretched and width:tonumber() < self.width:tonumber() then
1258+
-- Never shrink glyphs, it looks ugly
1259+
return false
1260+
end
1261+
-- But if stretching couldn't be done, it will be ugly anyway, so we will force
1262+
-- a re-scaling of the glyph.
1263+
-- (So it slightly different from the vertical case, 'cause MathML just has one stretchy
1264+
-- attribute, whether for stretching on the vertical (block) or horizontal (inline) axis,
1265+
-- and we cannot know which axis is meant unless we implement yet another mapping table
1266+
-- as the one in the MathML Core appendices. Frankly, how many non-normative appendices
1267+
-- do we need to implement MathML correctly?)
1268+
-- RESCALING HACK: see output routine
1269+
self.horizScalingRatio = width:tonumber() / self.width:tonumber()
1270+
self.width = width
1271+
return true
12211272
end
12221273

12231274
function elements.text:output (x, y, line)
@@ -1356,7 +1407,7 @@ local function newSubscript (spec)
13561407
end
13571408

13581409
local function newUnderOver (spec)
1359-
return elements.underOver(spec.base, spec.sub, spec.sup)
1410+
return elements.underOver(spec.attributes, spec.base, spec.sub, spec.sup)
13601411
end
13611412

13621413
-- TODO replace with penlight equivalent

packages/math/init.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ The \code{counter} or the direct value \code{number} is passed as a parameter to
427427
428428
\paragraph{Missing features}
429429
This package still lacks support for some mathematical constructs, but hopefully we’ll get there.
430-
Among unsupported constructs are: decorating symbols with so-called accents, such as arrows or hats, “over” or “under” braces, and line breaking inside a formula.
430+
Among unsupported features, we can mention line breaking inside a formula.
431431
432432
\font:remove-fallback
433433
\end{document}

packages/math/texlike.lua

Lines changed: 113 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -396,9 +396,24 @@ local function isOperatorKind (tree, typeOfAtom)
396396
return false
397397
end
398398

399-
local function isMoveableLimits (tree)
399+
local function isMoveableLimitsOrAlwaysStacked (tree)
400+
if not tree then
401+
return false -- safeguard
402+
end
403+
if tree.is_always_stacked then
404+
-- We use an internal flag to mark commands that are always stacking
405+
-- their sup/sub arguments, such as brace-like commands.
406+
return true
407+
end
400408
if tree.command ~= "mo" then
401-
return false
409+
-- On the recursion:
410+
-- MathML allows movablelimits on <mo> elements, but "embellished operators"
411+
-- can be other elements inheriting the property from their "core operator",
412+
-- see MathML Core §3.2.4.1, which is full of intricacies so we are probably
413+
-- not even doing the right thing here.
414+
-- On the hack:
415+
-- See variant commands for limits further down.
416+
return SU.boolean(tree.is_hacked_movablelimits, false) or isMoveableLimitsOrAlwaysStacked(tree[1])
402417
end
403418
if tree.options and SU.boolean(tree.options.movablelimits, false) then
404419
return true
@@ -430,6 +445,9 @@ end
430445
local function isAccentSymbol (symbol)
431446
return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.accent
432447
end
448+
local function isBottomAccentSymbol (symbol)
449+
return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.botaccent
450+
end
433451

434452
local function compileToMathML_aux (_, arg_env, tree)
435453
if type(tree) == "string" then
@@ -565,14 +583,15 @@ local function compileToMathML_aux (_, arg_env, tree)
565583
end
566584
tree.options = {}
567585
-- Translate TeX-like sub/superscripts to `munderover` or `msubsup`,
568-
-- depending on whether the base is an operator with moveable limits.
569-
elseif tree.id == "sup" and isMoveableLimits(tree[1]) then
586+
-- depending on whether the base is an operator with moveable limits,
587+
-- or a brace-like command.
588+
elseif tree.id == "sup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
570589
tree.command = "mover"
571-
elseif tree.id == "sub" and isMoveableLimits(tree[1]) then
590+
elseif tree.id == "sub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
572591
tree.command = "munder"
573-
elseif tree.id == "subsup" and isMoveableLimits(tree[1]) then
592+
elseif tree.id == "subsup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
574593
tree.command = "munderover"
575-
elseif tree.id == "supsub" and isMoveableLimits(tree[1]) then
594+
elseif tree.id == "supsub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
576595
tree.command = "munderover"
577596
local tmp = tree[2]
578597
tree[2] = tree[3]
@@ -638,7 +657,7 @@ local function compileToMathML_aux (_, arg_env, tree)
638657
elseif tree.id == "command" and symbols[tree.command] then
639658
local atom = { id = "atom", [1] = symbols[tree.command] }
640659
if isAccentSymbol(symbols[tree.command]) and #tree > 0 then
641-
-- LaTeX-style accents \vec{v} = <mover accent="true"><mi>v</mi><mo></mo></mover>
660+
-- LaTeX-style accents \overrightarrow{v} = <mover accent="true"><mi>v</mi><mo>&#x20D7;</mo></mover>
642661
local accent = {
643662
id = "command",
644663
command = "mover",
@@ -649,6 +668,18 @@ local function compileToMathML_aux (_, arg_env, tree)
649668
accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
650669
accent[2] = compileToMathML_aux(nil, arg_env, atom)
651670
tree = accent
671+
elseif isBottomAccentSymbol(symbols[tree.command]) and #tree > 0 then
672+
-- LaTeX-style bottom accents \underleftarrow{v} = <munder accent="true"><mi>v</mi><mo>&#x20EE;</mo></munder>
673+
local accent = {
674+
id = "command",
675+
command = "munder",
676+
options = {
677+
accentunder = "true",
678+
},
679+
}
680+
accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
681+
accent[2] = compileToMathML_aux(nil, arg_env, atom)
682+
tree = accent
652683
elseif #tree > 0 then
653684
-- Play cool with LaTeX-style commands that don't take arguments:
654685
-- Edge case for non-accent symbols so we don't loose bracketed groups
@@ -728,6 +759,80 @@ registerCommand("mn", { [1] = objType.str }, function (x)
728759
return x
729760
end)
730761

762+
-- Register a limit-like variant command
763+
-- Variants of superior, inferior, projective and injective limits are special:
764+
-- They accept a sub/sup behaving as a movablelimits, but also have a symbol
765+
-- on top of the limit symbol, which is not a movablelimits.
766+
-- I can't see in the MathML specification how to do this properly: MathML Core
767+
-- seems to only allow movablelimits on <mo> elements, and <mover>/<munder> may
768+
-- inherit that property from their "core operator", but in this case we do not
769+
-- want the accent to be movable, only the limit sup/sub.
770+
-- So we use a hack, and also avoid "\def" here to prevent unwanted mrows.
771+
-- @tparam string name TeX command name
772+
-- @tparam string command MathML command (mover or munder)
773+
-- @tparam number symbol Unicode codepoint for the accent symbol
774+
-- @tparam string text Text representation
775+
local function registerVarLimits (name, command, symbol, text)
776+
registerCommand(name, {}, function ()
777+
local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
778+
return {
779+
command = command,
780+
is_hacked_movablelimits = true, -- Internal flag to mark this as a hack
781+
options = options,
782+
{
783+
command = "mo",
784+
options = { atom = "op", movablelimits = false },
785+
text,
786+
},
787+
{
788+
command = "mo",
789+
options = { accentunder = "true" },
790+
luautf8.char(symbol),
791+
},
792+
}
793+
end)
794+
end
795+
registerVarLimits("varlimsup", "mover", 0x203E, "lim") -- U+203E OVERLINE
796+
registerVarLimits("varliminf", "munder", 0x203E, "lim") -- U+203E OVERLINE
797+
registerVarLimits("varprojlim", "munder", 0x2190, "lim") -- U+2190 LEFTWARDS ARROW
798+
registerVarLimits("varinjlim", "munder", 0x2192, "lim") -- U+2192 RIGHTWARDS ARROW
799+
800+
-- Register a brace-like commands.
801+
-- Those symbols are accents per-se in MathML, and are non-combining in Unicode.
802+
-- But TeX treats them as "pseudo-accent" stretchy symbols.
803+
-- Moreover, they accept a sub/sup which is always stacked, and not movable.
804+
-- So we use an internal flag.
805+
-- We also avoid "\def" here to prevent unwanted mrows resulting from the
806+
-- compilation of the argument.
807+
-- @tparam string name TeX command name
808+
-- @tparam string command MathML command (mover or munder)
809+
-- @tparam number symbol Unicode codepoint for the brace symbol
810+
local function registerBraceLikeCommands (name, command, symbol)
811+
registerCommand(name, {
812+
[1] = objType.tree,
813+
}, function (tree)
814+
local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
815+
return {
816+
command = command,
817+
is_always_stacked = true, -- Internal flag to mark this as a brace-like command
818+
options = options,
819+
tree[1],
820+
{
821+
command = "mo",
822+
options = { stretchy = "true" },
823+
luautf8.char(symbol),
824+
},
825+
}
826+
end)
827+
end
828+
-- Note: the following overriddes the default commands from xml-entities / unicode-math.
829+
registerBraceLikeCommands("overbrace", "mover", 0x23DE) -- U+23DE TOP CURLY BRACKET
830+
registerBraceLikeCommands("underbrace", "munder", 0x23DF) -- U+23DF BOTTOM CURLY BRACKET
831+
registerBraceLikeCommands("overparen", "mover", 0x23DC) -- U+23DC TOP PARENTHESIS
832+
registerBraceLikeCommands("underparen", "munder", 0x23DD) -- U+23DD BOTTOM PARENTHESIS
833+
registerBraceLikeCommands("overbracket", "mover", 0x23B4) -- U+23B4 TOP SQUARE BRACKET
834+
registerBraceLikeCommands("underbracket", "munder", 0x23B5) -- U+23B5 BOTTOM SQUARE BRACKET
835+
731836
compileToMathML(
732837
nil,
733838
{},
@@ -737,7 +842,6 @@ compileToMathML(
737842
\def{sqrt}{\msqrt{#1}}
738843
\def{bi}{\mi[mathvariant=bold-italic]{#1}}
739844
\def{dsi}{\mi[mathvariant=double-struck]{#1}}
740-
\def{vec}{\mover[accent=true]{#1}{\rightarrow}}
741845
742846
% From amsmath:
743847
\def{to}{\mo[atom=bin]{→}}

0 commit comments

Comments
 (0)