Skip to content

Commit 3dd25e9

Browse files
Omikhleiaalerque
authored andcommitted
feat(math): Support the MathML operator dictionary and many TeX-like aliases
1 parent 088733e commit 3dd25e9

7 files changed

Lines changed: 5580 additions & 2639 deletions

File tree

packages/math/atoms.lua

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
local atomType = {
2+
ordinary = 0,
3+
bigOperator = 1,
4+
binaryOperator = 2,
5+
relationalOperator = 3,
6+
openingSymbol = 4,
7+
closeSymbol = 5,
8+
punctuationSymbol = 6,
9+
inner = 7,
10+
overSymbol = 8,
11+
underSymbol = 9,
12+
accentSymbol = 10,
13+
radicalSymbol = 11,
14+
vcenter = 12,
15+
}
16+
17+
-- Shorthands for atom types, used in the `atom` command option
18+
-- and also in the unicode symbols table / operator dictionary
19+
local atomTypeShort = {
20+
ord = atomType.ordinary,
21+
big = atomType.bigOperator,
22+
bin = atomType.binaryOperator,
23+
rel = atomType.relationalOperator,
24+
open = atomType.openingSymbol,
25+
close = atomType.closeSymbol,
26+
punct = atomType.punctuationSymbol,
27+
inner = atomType.inner,
28+
over = atomType.overSymbol,
29+
under = atomType.underSymbol,
30+
accent = atomType.accentSymbol,
31+
radical = atomType.radicalSymbol,
32+
vcenter = atomType.vcenter,
33+
}
34+
35+
return {
36+
atomType = atomType,
37+
atomTypeShort = atomTypeShort,
38+
}

packages/math/base-elements.lua

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
local nodefactory = require("types.node")
22
local hb = require("justenoughharfbuzz")
33
local ot = require("core.opentype-parser")
4-
local syms = require("packages.math.unicode-symbols")
4+
local atoms = require("packages.math.atoms")
55
local mathvariants = require("packages.math.unicode-mathvariants")
66
local convertMathVariantScript = mathvariants.convertMathVariantScript
77

8-
local atomType = syms.atomType
9-
local symbolDefaults = syms.symbolDefaults
8+
local atomType = atoms.atomType
109

1110
local elements = {}
1211

@@ -423,7 +422,7 @@ function elements.stackbox:shape ()
423422
end
424423
-- Handle stretchy operators
425424
for _, elt in ipairs(self.children) do
426-
if elt.is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
425+
if elt:is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
427426
elt:_vertStretchyReshape(self.depth, self.height)
428427
end
429428
end
@@ -694,14 +693,14 @@ function elements.underOver:_stretchyReshapeToBase (part)
694693
-- MathML3 "complex1" torture test: Maxwell's Equations (vectors in fractions)
695694
if #part.children == 0 then
696695
local elt = part
697-
if elt.is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
696+
if elt:is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
698697
elt:_horizStretchyReshape(self.base.width)
699698
end
700699
elseif part:is_a(elements.underOver) then
701700
-- Big assumption here: only considering one level of stacked under/over.
702701
local hasStretched = false
703702
for _, elt in ipairs(part.children) do
704-
if elt.is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
703+
if elt:is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
705704
local stretched = elt:_horizStretchyReshape(self.base.width)
706705
if stretched then
707706
hasStretched = true
@@ -1652,8 +1651,6 @@ function elements.bevelledFraction:output (x, y, line)
16521651
end
16531652

16541653
elements.mathMode = mathMode
1655-
elements.atomType = atomType
1656-
elements.symbolDefaults = symbolDefaults
16571654
elements.newSubscript = newSubscript
16581655
elements.newUnderOver = newUnderOver
16591656

packages/math/texlike.lua

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1+
local atoms = require("packages.math.atoms")
12
local syms = require("packages.math.unicode-symbols")
23
local bits = require("core.parserbits")
34

45
local epnf = require("epnf")
56
local lpeg = require("lpeg")
67

7-
local atomType = syms.atomType
8-
local symbolDefaults = syms.symbolDefaults
8+
local atomType = atoms.atomType
9+
local atomTypeShort = atoms.atomTypeShort
10+
local operatorDict = syms.operatorDict
911
local symbols = syms.symbols
1012

1113
-- Grammar to parse TeX-like math
@@ -260,7 +262,7 @@ local compileToStr = function (argEnv, mathlist)
260262
end
261263
end
262264

263-
local function isOperatorKind (tree, typeOfAtom, typeOfSymbol)
265+
local function isOperatorKind (tree, typeOfAtom)
264266
if not tree then
265267
return false -- safeguard
266268
end
@@ -274,8 +276,8 @@ local function isOperatorKind (tree, typeOfAtom, typeOfSymbol)
274276
end
275277
-- Case \mo{ops} where ops is registered with the resquested type
276278
-- E.g. \mo{∑) or \sum
277-
if tree[1] and symbolDefaults[tree[1]] and symbolDefaults[tree[1]].atom == typeOfSymbol then
278-
return true
279+
if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].atom then
280+
return operatorDict[tree[1]].atom == atomTypeShort[typeOfAtom]
279281
end
280282
return false
281283
end
@@ -287,20 +289,32 @@ local function isMoveableLimits (tree)
287289
if tree.options and SU.boolean(tree.options.movablelimits, false) then
288290
return true
289291
end
290-
if tree[1] and symbolDefaults[tree[1]] and SU.boolean(symbolDefaults[tree[1]].movablelimits, false) then
291-
return true
292+
if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].forms then
293+
-- Leap of faith: We have not idea yet which form the operator will take
294+
-- in the final MathML.
295+
-- In the MathML operator dictionary, some operators have a movablelimits
296+
-- in some forms and not in others.
297+
-- Ex. \Join (U+2A1D) and \bigtriangleleft (U+2A1E) have it prefix but not
298+
-- infix, for some unspecified reason (?).
299+
-- Assume that if at least one form has movablelimits, the operator is
300+
-- considered to have movablelimits "in general".
301+
for _, form in pairs(operatorDict[tree[1]].forms) do
302+
if SU.boolean(form.movablelimits, false) then
303+
return true
304+
end
305+
end
292306
end
293307
return false
294308
end
295309
local function isCloseOperator (tree)
296-
return isOperatorKind(tree, "close", atomType.closeSymbol)
310+
return isOperatorKind(tree, "close")
297311
end
298312
local function isOpeningOperator (tree)
299-
return isOperatorKind(tree, "open", atomType.openingSymbol)
313+
return isOperatorKind(tree, "open")
300314
end
301315

302316
local function isAccentSymbol (symbol)
303-
return symbolDefaults[symbol] and symbolDefaults[symbol].atom == atomType.accentSymbol
317+
return operatorDict[symbol] and operatorDict[symbol].atom == atomType.accentSymbol
304318
end
305319

306320
local function compileToMathML_aux (_, arg_env, tree)
@@ -666,8 +680,12 @@ compileToMathML(
666680
\def{mathtt}{\mi[mathvariant=monospace]{#1}}
667681
668682
% Modulus operator forms
669-
\def{bmod}{\mo{mod}}
670-
\def{pmod}{\quad(\mo{mod} #1)}
683+
% See Michael Downes & Barbara Beeton, "Short Math Guide for LaTeX"
684+
% American Mathematical Society (v2.0, 2017), §7.1 p. 18
685+
\def{bmod}{\mo[atom=bin]{mod}}
686+
\def{pmod}{\quad(\mo[atom=ord]{mod}\>#1)}
687+
\def{mod}{\quad \mo[atom=ord]{mod}\>#1}
688+
\def{pod}{\quad(#1)}
671689
672690
% Phantom commands from TeX/LaTeX
673691
\def{phantom}{\mphantom{#1}}
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
2+
<!--
3+
Stylesheet to convert the unicode.xml file to a SILE Lua file:
4+
xsltproc unicode-xml-to-sile.xsl unicode.xml > ../packages/math/unicode-symbols-generated.lua
5+
Where unicode.xml is:
6+
https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml
7+
-->
8+
<xsl:output method="text" indent="no"/>
9+
10+
<xsl:template name="format-value">
11+
<xsl:param name="value" />
12+
<xsl:choose>
13+
<!-- integer -->
14+
<xsl:when test="floor($value) = $value"><xsl:value-of select="$value" /></xsl:when>
15+
<!-- boolean -->
16+
<xsl:when test="$value = 'true' or $value = 'false'"><xsl:value-of select="$value" /></xsl:when>
17+
<!-- string -->
18+
<xsl:otherwise>"<xsl:value-of select="$value" />"</xsl:otherwise>
19+
</xsl:choose>
20+
</xsl:template>
21+
22+
<xsl:template name="format-codepoint">
23+
<xsl:param name="codepoint" />
24+
<!-- Codepoint is UXXXX, remove the U -->
25+
<xsl:variable name="hex" select="concat('U(0x', substring($codepoint, 2), ')')" />
26+
<xsl:choose>
27+
<xsl:when test="contains($hex, '-')">
28+
<!-- Special case for 2-characters operators -->
29+
<!-- CAVEAT: We do not expect operators with more than 2 characters -->
30+
<xsl:value-of select="substring-before($hex, '-')" />
31+
<xsl:value-of select="concat(', 0x', substring-after($hex, '-'))" />
32+
</xsl:when>
33+
<xsl:otherwise>
34+
<xsl:value-of select="$hex" />
35+
</xsl:otherwise>
36+
</xsl:choose>
37+
</xsl:template>
38+
39+
<xsl:template name="format-class">
40+
<xsl:param name="class" />
41+
<xsl:param name="combclass" />
42+
<xsl:param name="description" />
43+
<xsl:choose>
44+
<xsl:when test="$class = 'N'">ord</xsl:when><!-- Normal = mathord = atomType.ordinary -->
45+
<xsl:when test="$class = 'A'">ord</xsl:when><!-- Alphabetic = mathalpha = atomType.ordinary -->
46+
<xsl:when test="$class = 'B'">bin</xsl:when><!-- Binary = mathbin = atomType.binaryOperator -->
47+
<xsl:when test="$class = 'C'">close</xsl:when><!-- Closing = mathclose = atomType.closeSymbol -->
48+
<xsl:when test="$class = 'D'"><!-- Diacritic -->
49+
<xsl:choose>
50+
<xsl:when test="$combclass = '220'">botaccent</xsl:when>
51+
<xsl:when test="$combclass = '230'">accent</xsl:when>
52+
<xsl:otherwise>ord</xsl:otherwise><!-- assuming atomType.ordinary -->
53+
</xsl:choose>
54+
</xsl:when>
55+
<xsl:when test="$class = 'F'">ord</xsl:when><!-- Fence = mathfence = atomType.ordinary -->
56+
<xsl:when test="$class = 'G'">ord</xsl:when><!-- Glyph Part = assuming atomType.ordinary -->
57+
<xsl:when test="$class = 'L'"><!-- Large -->
58+
<xsl:choose>
59+
<!-- SILE uses the atom for spacing currently (ignoring lspace and rspace) -->
60+
<!-- HACK: integral signs are NOTconsidered as big for spacing purpose -->
61+
<xsl:when test="contains($description,'INTEGRAL')">ord</xsl:when>
62+
<xsl:otherwise>big</xsl:otherwise><!-- mathop = atomType.bigOperator -->
63+
</xsl:choose>
64+
</xsl:when>
65+
<xsl:when test="$class = 'O'">open</xsl:when><!-- Opening = mathopen = atomType.openingSymbol -->
66+
<xsl:when test="$class = 'P'">punct</xsl:when><!-- Punctuation = mathpunct = atomType.punctuationSymbol -->
67+
<xsl:when test="$class = 'R'">rel</xsl:when><!-- Relation = mathrel = atomType.relationalOperator -->
68+
<xsl:when test="$class = 'S'">ord</xsl:when><!-- Space = assuming atomType.ordinary -->
69+
<xsl:when test="$class = 'U'">ord</xsl:when><!-- Unary = mathord = atomType.ordinary -->
70+
<xsl:when test="$class = 'V'">bin</xsl:when><!-- Vary = assume mathbin = atomType.binaryOperator -->
71+
<xsl:otherwise>ord</xsl:otherwise><!-- assuming atomType.ordinary if not specified -->
72+
</xsl:choose>
73+
</xsl:template>
74+
75+
<xsl:template name="format-mathlatex">
76+
<xsl:param name="mathlatex" />
77+
<xsl:choose>
78+
<xsl:when test="$mathlatex">"<xsl:value-of select="substring($mathlatex, 2)" />"</xsl:when>
79+
<xsl:otherwise>nil</xsl:otherwise>
80+
</xsl:choose>
81+
</xsl:template>
82+
83+
<xsl:template match="unicode">--- GENERATED FILE, DO NOT EDIT MANUALLY
84+
--
85+
-- Operator dictionary for unicode characters
86+
--
87+
-- Extracted from https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml
88+
-- (https://github.com/w3c/xml-entities)
89+
-- Copyright David Carlisle 1999-2024
90+
-- Use and distribution of this code are permitted under the terms of the
91+
-- W3C Software Notice and License.
92+
-- http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html
93+
-- This file is a collection of information about how to map Unicode entities to LaTeX,
94+
-- and various SGML/XML entity sets (ISO and MathML/HTML).
95+
-- A Unicode character may be mapped to several entities.
96+
-- Originally designed by Sebastian Rahtz in conjunction with Barbara Beeton for the STIX project
97+
--
98+
99+
local atoms = require("packages/math/atoms")
100+
local atomTypeShort = atoms.atomTypeShort
101+
102+
--- Transform a list of codepoints into a string
103+
local function U (...)
104+
local t = { ... }
105+
local str = ""
106+
for i = 1, #t do
107+
str = str .. luautf8.char(t[i])
108+
end
109+
return str
110+
end
111+
112+
local symbols = {}
113+
local operatorDict = {}
114+
115+
--- Register a symbol
116+
-- @tparam string str String representation of the symbol
117+
-- @tparam string shortatom Short atom type
118+
-- @tparam string mathlatex TeX-like name of the symbol (from unicode-math)
119+
-- @tparam string _ Unicode name of the symbol (informative)
120+
-- @tparam table ops List of operator forms and their properties
121+
local function addSymbol (str, shortatom, mathlatex, _, ops)
122+
if mathlatex then
123+
SU.debug("math.symbols", "Registering symbol", str, "as", mathlatex)
124+
symbols[mathlatex] = str
125+
end
126+
local op = {}
127+
op.atom = atomTypeShort[shortatom]
128+
if ops then
129+
op.forms = {}
130+
for _, v in pairs(ops) do
131+
if v.form then
132+
v.lspace = SILE.types.length(v.lspace and ("%smu"):format(v.lspace) or "0mu")
133+
v.rspace = SILE.types.length(v.rspace and ("%smu"):format(v.rspace) or "0mu")
134+
op.forms[v.form] = v
135+
else
136+
SU.warn("No form for operator " .. str .. " (operator dictionary is probably incomplete)")
137+
end
138+
end
139+
end
140+
operatorDict[str] = op
141+
end
142+
143+
<xsl:apply-templates select="charlist/character" />
144+
145+
return {
146+
operatorDict = operatorDict,
147+
symbols = symbols
148+
}
149+
</xsl:template>
150+
151+
<xsl:template match="character">
152+
<xsl:variable name="mathclass" select="unicodedata/@mathclass" />
153+
<xsl:variable name="mathlatex" select="mathlatex[@set='unicode-math']/text()" />
154+
<xsl:variable name="combclass" select="unicodedata/@combclass" />
155+
<xsl:variable name="atom">
156+
<xsl:call-template name="format-class">
157+
<xsl:with-param name="class" select="$mathclass" />
158+
<xsl:with-param name="combclass" select="$combclass" />
159+
<xsl:with-param name="description" select="description" />
160+
</xsl:call-template>
161+
</xsl:variable>
162+
<xsl:if test="$atom != 'ord' or $mathlatex or operator-dictionary">
163+
<xsl:text>
164+
addSymbol(</xsl:text>
165+
<!-- Codepoints -->
166+
<xsl:call-template name="format-codepoint">
167+
<xsl:with-param name="codepoint" select="@id" />
168+
</xsl:call-template>
169+
<!-- Atom type -->
170+
<xsl:text>, "</xsl:text><xsl:value-of select="$atom" /><xsl:text>", </xsl:text>
171+
<!-- Math latex name or nil -->
172+
<xsl:call-template name="format-mathlatex">
173+
<xsl:with-param name="mathlatex" select="$mathlatex" />
174+
</xsl:call-template>
175+
<!-- Description -->
176+
<xsl:text>, "</xsl:text><xsl:value-of select="description" /><xsl:text>"</xsl:text>
177+
<!-- Operator dictionary or nil -->
178+
<xsl:choose>
179+
<xsl:when test="operator-dictionary">
180+
<xsl:text>, {</xsl:text>
181+
<xsl:apply-templates select="operator-dictionary">
182+
<xsl:sort select="@priority" data-type="number" order="descending" /><!-- sort by @priority -->
183+
</xsl:apply-templates>
184+
<xsl:text>}</xsl:text>
185+
</xsl:when>
186+
<xsl:otherwise><xsl:text>, nil</xsl:text></xsl:otherwise>
187+
</xsl:choose>
188+
<xsl:text>)</xsl:text>
189+
</xsl:if>
190+
</xsl:template>
191+
192+
<xsl:template match="operator-dictionary">
193+
{ <xsl:for-each select="@*">
194+
<xsl:sort select="name()" />
195+
<xsl:value-of select="name()" /> = <xsl:call-template name="format-value">
196+
<xsl:with-param name="value" select="." />
197+
</xsl:call-template><xsl:if test="position() != last()">, </xsl:if>
198+
</xsl:for-each> }<xsl:if test="position() != last()">,</xsl:if>
199+
</xsl:template>
200+
201+
</xsl:stylesheet>

0 commit comments

Comments
 (0)