|
| 1 | +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> |
| 2 | +<!-- |
| 3 | + Stylesheet to convert the unicode.xml file to a SILE Lua file: |
| 4 | + xsltproc unicode-xml-to-sile.xsl unicode.xml > ../packages/math/unicode-symbols-generated.lua |
| 5 | + Where unicode.xml is: |
| 6 | + https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml |
| 7 | +--> |
| 8 | +<xsl:output method="text" indent="no"/> |
| 9 | + |
| 10 | +<xsl:template name="format-value"> |
| 11 | + <xsl:param name="value" /> |
| 12 | + <xsl:choose> |
| 13 | + <!-- integer --> |
| 14 | + <xsl:when test="floor($value) = $value"><xsl:value-of select="$value" /></xsl:when> |
| 15 | + <!-- boolean --> |
| 16 | + <xsl:when test="$value = 'true' or $value = 'false'"><xsl:value-of select="$value" /></xsl:when> |
| 17 | + <!-- string --> |
| 18 | + <xsl:otherwise>"<xsl:value-of select="$value" />"</xsl:otherwise> |
| 19 | + </xsl:choose> |
| 20 | +</xsl:template> |
| 21 | + |
| 22 | +<xsl:template name="format-codepoint"> |
| 23 | + <xsl:param name="codepoint" /> |
| 24 | + <!-- Codepoint is UXXXX, remove the U --> |
| 25 | + <xsl:variable name="hex" select="concat('U(0x', substring($codepoint, 2), ')')" /> |
| 26 | + <xsl:choose> |
| 27 | + <xsl:when test="contains($hex, '-')"> |
| 28 | + <!-- Special case for 2-characters operators --> |
| 29 | + <!-- CAVEAT: We do not expect operators with more than 2 characters --> |
| 30 | + <xsl:value-of select="substring-before($hex, '-')" /> |
| 31 | + <xsl:value-of select="concat(', 0x', substring-after($hex, '-'))" /> |
| 32 | + </xsl:when> |
| 33 | + <xsl:otherwise> |
| 34 | + <xsl:value-of select="$hex" /> |
| 35 | + </xsl:otherwise> |
| 36 | + </xsl:choose> |
| 37 | +</xsl:template> |
| 38 | + |
| 39 | +<xsl:template name="format-class"> |
| 40 | + <xsl:param name="class" /> |
| 41 | + <xsl:param name="combclass" /> |
| 42 | + <xsl:param name="description" /> |
| 43 | + <xsl:choose> |
| 44 | + <xsl:when test="$class = 'N'">ord</xsl:when><!-- Normal = mathord = atomType.ordinary --> |
| 45 | + <xsl:when test="$class = 'A'">ord</xsl:when><!-- Alphabetic = mathalpha = atomType.ordinary --> |
| 46 | + <xsl:when test="$class = 'B'">bin</xsl:when><!-- Binary = mathbin = atomType.binaryOperator --> |
| 47 | + <xsl:when test="$class = 'C'">close</xsl:when><!-- Closing = mathclose = atomType.closeSymbol --> |
| 48 | + <xsl:when test="$class = 'D'"><!-- Diacritic --> |
| 49 | + <xsl:choose> |
| 50 | + <xsl:when test="$combclass = '220'">botaccent</xsl:when> |
| 51 | + <xsl:when test="$combclass = '230'">accent</xsl:when> |
| 52 | + <xsl:otherwise>ord</xsl:otherwise><!-- assuming atomType.ordinary --> |
| 53 | + </xsl:choose> |
| 54 | + </xsl:when> |
| 55 | + <xsl:when test="$class = 'F'">ord</xsl:when><!-- Fence = mathfence = atomType.ordinary --> |
| 56 | + <xsl:when test="$class = 'G'">ord</xsl:when><!-- Glyph Part = assuming atomType.ordinary --> |
| 57 | + <xsl:when test="$class = 'L'"><!-- Large --> |
| 58 | + <xsl:choose> |
| 59 | + <!-- SILE uses the atom for spacing currently (ignoring lspace and rspace) --> |
| 60 | + <!-- HACK: integral signs are NOTconsidered as big for spacing purpose --> |
| 61 | + <xsl:when test="contains($description,'INTEGRAL')">ord</xsl:when> |
| 62 | + <xsl:otherwise>big</xsl:otherwise><!-- mathop = atomType.bigOperator --> |
| 63 | + </xsl:choose> |
| 64 | + </xsl:when> |
| 65 | + <xsl:when test="$class = 'O'">open</xsl:when><!-- Opening = mathopen = atomType.openingSymbol --> |
| 66 | + <xsl:when test="$class = 'P'">punct</xsl:when><!-- Punctuation = mathpunct = atomType.punctuationSymbol --> |
| 67 | + <xsl:when test="$class = 'R'">rel</xsl:when><!-- Relation = mathrel = atomType.relationalOperator --> |
| 68 | + <xsl:when test="$class = 'S'">ord</xsl:when><!-- Space = assuming atomType.ordinary --> |
| 69 | + <xsl:when test="$class = 'U'">ord</xsl:when><!-- Unary = mathord = atomType.ordinary --> |
| 70 | + <xsl:when test="$class = 'V'">bin</xsl:when><!-- Vary = assume mathbin = atomType.binaryOperator --> |
| 71 | + <xsl:otherwise>ord</xsl:otherwise><!-- assuming atomType.ordinary if not specified --> |
| 72 | + </xsl:choose> |
| 73 | +</xsl:template> |
| 74 | + |
| 75 | +<xsl:template name="format-mathlatex"> |
| 76 | + <xsl:param name="mathlatex" /> |
| 77 | + <xsl:choose> |
| 78 | + <xsl:when test="$mathlatex">"<xsl:value-of select="substring($mathlatex, 2)" />"</xsl:when> |
| 79 | + <xsl:otherwise>nil</xsl:otherwise> |
| 80 | + </xsl:choose> |
| 81 | +</xsl:template> |
| 82 | + |
| 83 | +<xsl:template match="unicode">--- GENERATED FILE, DO NOT EDIT MANUALLY |
| 84 | +-- |
| 85 | +-- Operator dictionary for unicode characters |
| 86 | +-- |
| 87 | +-- Extracted from https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml |
| 88 | +-- (https://github.com/w3c/xml-entities) |
| 89 | +-- Copyright David Carlisle 1999-2024 |
| 90 | +-- Use and distribution of this code are permitted under the terms of the |
| 91 | +-- W3C Software Notice and License. |
| 92 | +-- http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html |
| 93 | +-- This file is a collection of information about how to map Unicode entities to LaTeX, |
| 94 | +-- and various SGML/XML entity sets (ISO and MathML/HTML). |
| 95 | +-- A Unicode character may be mapped to several entities. |
| 96 | +-- Originally designed by Sebastian Rahtz in conjunction with Barbara Beeton for the STIX project |
| 97 | +-- |
| 98 | + |
| 99 | +local atoms = require("packages/math/atoms") |
| 100 | +local atomTypeShort = atoms.atomTypeShort |
| 101 | + |
| 102 | +--- Transform a list of codepoints into a string |
| 103 | +local function U (...) |
| 104 | + local t = { ... } |
| 105 | + local str = "" |
| 106 | + for i = 1, #t do |
| 107 | + str = str .. luautf8.char(t[i]) |
| 108 | + end |
| 109 | + return str |
| 110 | +end |
| 111 | + |
| 112 | +local symbols = {} |
| 113 | +local operatorDict = {} |
| 114 | + |
| 115 | +--- Register a symbol |
| 116 | +-- @tparam string str String representation of the symbol |
| 117 | +-- @tparam string shortatom Short atom type |
| 118 | +-- @tparam string mathlatex TeX-like name of the symbol (from unicode-math) |
| 119 | +-- @tparam string _ Unicode name of the symbol (informative) |
| 120 | +-- @tparam table ops List of operator forms and their properties |
| 121 | +local function addSymbol (str, shortatom, mathlatex, _, ops) |
| 122 | + if mathlatex then |
| 123 | + SU.debug("math.symbols", "Registering symbol", str, "as", mathlatex) |
| 124 | + symbols[mathlatex] = str |
| 125 | + end |
| 126 | + local op = {} |
| 127 | + op.atom = atomTypeShort[shortatom] |
| 128 | + if ops then |
| 129 | + op.forms = {} |
| 130 | + for _, v in pairs(ops) do |
| 131 | + if v.form then |
| 132 | + v.lspace = SILE.types.length(v.lspace and ("%smu"):format(v.lspace) or "0mu") |
| 133 | + v.rspace = SILE.types.length(v.rspace and ("%smu"):format(v.rspace) or "0mu") |
| 134 | + op.forms[v.form] = v |
| 135 | + else |
| 136 | + SU.warn("No form for operator " .. str .. " (operator dictionary is probably incomplete)") |
| 137 | + end |
| 138 | + end |
| 139 | + end |
| 140 | + operatorDict[str] = op |
| 141 | +end |
| 142 | + |
| 143 | +<xsl:apply-templates select="charlist/character" /> |
| 144 | + |
| 145 | +return { |
| 146 | + operatorDict = operatorDict, |
| 147 | + symbols = symbols |
| 148 | +} |
| 149 | +</xsl:template> |
| 150 | + |
| 151 | +<xsl:template match="character"> |
| 152 | + <xsl:variable name="mathclass" select="unicodedata/@mathclass" /> |
| 153 | + <xsl:variable name="mathlatex" select="mathlatex[@set='unicode-math']/text()" /> |
| 154 | + <xsl:variable name="combclass" select="unicodedata/@combclass" /> |
| 155 | + <xsl:variable name="atom"> |
| 156 | + <xsl:call-template name="format-class"> |
| 157 | + <xsl:with-param name="class" select="$mathclass" /> |
| 158 | + <xsl:with-param name="combclass" select="$combclass" /> |
| 159 | + <xsl:with-param name="description" select="description" /> |
| 160 | + </xsl:call-template> |
| 161 | + </xsl:variable> |
| 162 | + <xsl:if test="$atom != 'ord' or $mathlatex or operator-dictionary"> |
| 163 | + <xsl:text> |
| 164 | +addSymbol(</xsl:text> |
| 165 | + <!-- Codepoints --> |
| 166 | + <xsl:call-template name="format-codepoint"> |
| 167 | + <xsl:with-param name="codepoint" select="@id" /> |
| 168 | + </xsl:call-template> |
| 169 | + <!-- Atom type --> |
| 170 | + <xsl:text>, "</xsl:text><xsl:value-of select="$atom" /><xsl:text>", </xsl:text> |
| 171 | + <!-- Math latex name or nil --> |
| 172 | + <xsl:call-template name="format-mathlatex"> |
| 173 | + <xsl:with-param name="mathlatex" select="$mathlatex" /> |
| 174 | + </xsl:call-template> |
| 175 | + <!-- Description --> |
| 176 | + <xsl:text>, "</xsl:text><xsl:value-of select="description" /><xsl:text>"</xsl:text> |
| 177 | + <!-- Operator dictionary or nil --> |
| 178 | + <xsl:choose> |
| 179 | + <xsl:when test="operator-dictionary"> |
| 180 | + <xsl:text>, {</xsl:text> |
| 181 | + <xsl:apply-templates select="operator-dictionary"> |
| 182 | + <xsl:sort select="@priority" data-type="number" order="descending" /><!-- sort by @priority --> |
| 183 | + </xsl:apply-templates> |
| 184 | + <xsl:text>}</xsl:text> |
| 185 | + </xsl:when> |
| 186 | + <xsl:otherwise><xsl:text>, nil</xsl:text></xsl:otherwise> |
| 187 | + </xsl:choose> |
| 188 | + <xsl:text>)</xsl:text> |
| 189 | + </xsl:if> |
| 190 | +</xsl:template> |
| 191 | + |
| 192 | +<xsl:template match="operator-dictionary"> |
| 193 | + { <xsl:for-each select="@*"> |
| 194 | + <xsl:sort select="name()" /> |
| 195 | + <xsl:value-of select="name()" /> = <xsl:call-template name="format-value"> |
| 196 | + <xsl:with-param name="value" select="." /> |
| 197 | + </xsl:call-template><xsl:if test="position() != last()">, </xsl:if> |
| 198 | + </xsl:for-each> }<xsl:if test="position() != last()">,</xsl:if> |
| 199 | +</xsl:template> |
| 200 | + |
| 201 | +</xsl:stylesheet> |
0 commit comments