diff --git a/core/hyphenator-liang.lua b/core/hyphenator-liang.lua index 77d6066bc..a6bd30b65 100644 --- a/core/hyphenator-liang.lua +++ b/core/hyphenator-liang.lua @@ -95,11 +95,19 @@ SILE.hyphenator = {} SILE.hyphenator.languages = {} SILE._hyphenators = {} +local function defaultHyphenateSegments (node, segments, _) + local hyphen = SILE.shaper:createNnodes(SILE.settings:get("font.hyphenchar"), node.options) + return SILE.nodefactory.discretionary({ prebreak = hyphen }), segments +end + local initHyphenator = function (lang) if not SILE._hyphenators[lang] then SILE._hyphenators[lang] = { minWord = 5, leftmin = 2, rightmin = 2, trie = {}, exceptions = {} } loadPatterns(SILE._hyphenators[lang], lang) end + if SILE.hyphenator.languages[lang] and not SILE.hyphenator.languages[lang].hyphenateSegments then + SILE.hyphenator.languages[lang].hyphenateSegments = defaultHyphenateSegments + end end local hyphenateNode = function (node) @@ -110,71 +118,25 @@ local hyphenateNode = function (node) end initHyphenator(node.language) local segments = SILE._hyphenate(SILE._hyphenators[node.language], node.text) + local hyphen if #segments > 1 then - local hyphen = SILE.shaper:createNnodes(SILE.settings:get("font.hyphenchar"), node.options) + local hyphenateSegments = SILE.hyphenator.languages[node.language].hyphenateSegments local newnodes = {} for j, segment in ipairs(segments) do - local specificDiscretionary if segment == "" then SU.dump({ j, segments }) SU.error("No hyphenation segment should ever be empty", true) end - if node.options.language == "tr" then - local nextApostrophe = j < #segments and luautf8.match(segments[j+1], "^['’]") - if nextApostrophe then - segments[j+1] = luautf8.gsub(segments[j+1], "^['’]", "") - local replacement = SILE.shaper:createNnodes(nextApostrophe, node.options) - if SILE.settings:get("languages.tr.replaceApostropheAtHyphenation") then - -- leading apostrophe (on next segment) cancels when hyphenated - specificDiscretionary = SILE.nodefactory.discretionary({ replacement = replacement, prebreak = hyphen }) - else - -- hyphen character substituted for upcomming apostrophe - local kesme = SILE.shaper:createNnodes(nextApostrophe, node.options) - specificDiscretionary = SILE.nodefactory.discretionary({ replacement = replacement, prebreak = kesme }) - end - end - elseif node.options.language == "ca" then - -- punt volat (middle dot) cancels when hyphenated - -- Catalan typists may use a punt volat or precomposed characters. - -- The shaper might behave differently depending on the font, so we need to - -- be consistent here with the typist's choice. - if luautf8.find(segment, "ŀ$") then -- U+0140 - segment = luautf8.sub(segment, 1, -2) - local ldot = SILE.shaper:createNnodes("ŀ", node.options) - local lhyp = SILE.shaper:createNnodes("l" .. SILE.settings:get("font.hyphenchar"), node.options) - specificDiscretionary = SILE.nodefactory.discretionary({ replacement = ldot, prebreak = lhyp }) - elseif luautf8.find(segment, "Ŀ$") then -- U+013F - segment = luautf8.sub(segment, 1, -2) - local ldot = SILE.shaper:createNnodes("Ŀ", node.options) - local lhyp = SILE.shaper:createNnodes("L" .. SILE.settings:get("font.hyphenchar"), node.options) - specificDiscretionary = SILE.nodefactory.discretionary({ replacement = ldot, prebreak = lhyp }) - elseif luautf8.find(segment, "l·$") then -- l + U+00B7 - segment = luautf8.sub(segment, 1, -3) - local ldot = SILE.shaper:createNnodes("l·", node.options) - local lhyp = SILE.shaper:createNnodes("l" .. SILE.settings:get("font.hyphenchar"), node.options) - specificDiscretionary = SILE.nodefactory.discretionary({ replacement = ldot, prebreak = lhyp }) - elseif luautf8.find(segment, "L·$") then -- L + U+00B7 - segment = luautf8.sub(segment, 1, -3) - local ldot = SILE.shaper:createNnodes("L·", node.options) - local lhyp = SILE.shaper:createNnodes("L" .. SILE.settings:get("font.hyphenchar"), node.options) - specificDiscretionary = SILE.nodefactory.discretionary({ replacement = ldot, prebreak = lhyp }) - end - end - for _, newNode in ipairs(SILE.shaper:createNnodes(segment, node.options)) do + hyphen, segments = hyphenateSegments(node, segments, j) + for _, newNode in ipairs(SILE.shaper:createNnodes(segments[j], node.options)) do if newNode.is_nnode then newNode.parent = node table.insert(newnodes, newNode) end end if j < #segments then - if specificDiscretionary then - specificDiscretionary.parent = node - table.insert(newnodes, specificDiscretionary) - else - local newNode = SILE.nodefactory.discretionary({ prebreak = hyphen }) - newNode.parent = node - table.insert(newnodes, newNode) - end + hyphen.parent = node + table.insert(newnodes, hyphen) end end node.children = newnodes diff --git a/languages/ca.lua b/languages/ca.lua index 58437edc7..e3a4a7cca 100644 --- a/languages/ca.lua +++ b/languages/ca.lua @@ -6,6 +6,37 @@ SILE.nodeMakers.ca = pl.class({ }) SILE.hyphenator.languages["ca"] = {} + +SILE.hyphenator.languages["ca"].hyphenateSegments = function (node, segments, j) + -- punt volat (middle dot) cancels when hyphenated + -- Catalan typists may use a punt volat or precomposed characters. + -- The shaper might behave differently depending on the font, so we need to + -- be consistent here with the typist's choice. + local hyphenChar = SILE.settings:get("font.hyphenchar") + local replacement, hyphen + if luautf8.find(segments[j], "ŀ$") then -- U+0140 + segments[j] = luautf8.sub(segments[j], 1, -2) + replacement = SILE.shaper:createNnodes("ŀ", node.options) + hyphen = SILE.shaper:createNnodes("l" .. hyphenChar, node.options) + elseif luautf8.find(segments[j], "Ŀ$") then -- U+013F + segments[j] = luautf8.sub(segments[j], 1, -2) + replacement = SILE.shaper:createNnodes("Ŀ", node.options) + hyphen = SILE.shaper:createNnodes("L" .. hyphenChar, node.options) + elseif luautf8.find(segments[j], "l·$") then -- l + U+00B7 + segments[j] = luautf8.sub(segments[j], 1, -3) + replacement = SILE.shaper:createNnodes("l·", node.options) + hyphen = SILE.shaper:createNnodes("l" .. hyphenChar, node.options) + elseif luautf8.find(segments[j], "L·$") then -- L + U+00B7 + segments[j] = luautf8.sub(segments[j], 1, -3) + replacement = SILE.shaper:createNnodes("L·", node.options) + hyphen = SILE.shaper:createNnodes("L" .. hyphenChar, node.options) + else + hyphen = SILE.shaper:createNnodes(hyphenChar, node.options) + end + local discretionary = SILE.nodefactory.discretionary({ replacement = replacement, prebreak = hyphen }) + return discretionary, segments +end + SILE.hyphenator.languages["ca"].patterns = { -- diff --git a/languages/tr.lua b/languages/tr.lua index b74bc05b8..cf2b9329a 100644 --- a/languages/tr.lua +++ b/languages/tr.lua @@ -14,6 +14,25 @@ SILE.nodeMakers.tr = pl.class(SILE.nodeMakers.unicode) SILE.nodeMakers.tr.wordTypes = { cm = true, qu = true } SILE.hyphenator.languages["tr"] = {} + +SILE.hyphenator.languages["tr"].hyphenateSegments = function (node, segments, j) + local hyphenChar, replacement + local maybeNextApostrophe = #segments > j and luautf8.match(segments[j+1], "^['’]") + if maybeNextApostrophe then + segments[j+1] = luautf8.gsub(segments[j+1], "^['’]", "") + if SILE.settings:get("languages.tr.replaceApostropheAtHyphenation") then + hyphenChar = SILE.settings:get("font.hyphenchar") + else + hyphenChar = maybeNextApostrophe + replacement = SILE.shaper:createNnodes(maybeNextApostrophe, node.options) + end + else + hyphenChar = SILE.settings:get("font.hyphenchar") + end + local hyphen = SILE.shaper:createNnodes(hyphenChar, node.options) + return SILE.nodefactory.discretionary({ replacement = replacement, prebreak = hyphen }), segments +end + SILE.hyphenator.languages["tr"].patterns = { "2a1", diff --git a/languages/unicode.lua b/languages/unicode.lua index 1caf7f0d4..01cd8ed05 100644 --- a/languages/unicode.lua +++ b/languages/unicode.lua @@ -68,12 +68,12 @@ SILE.nodeMakers.base = pl.class({ return chardata[cp] end, - isPunctuation = function (self, char) - return self.puctuationTypes[self:charData(char).category] + isActiveNonBreakingSpace = function (self, char) + return self:isNonBreakingSpace(char) and not SILE.settings:get("languages.fixedNbsp") end, - isSpace = function (self, char) - return self.spaceTypes[self:charData(char).linebreak] + isBreaking = function (self, char) + return self.breakingTypes[self:charData(char).linebreak] end, isNonBreakingSpace = function (self, char) @@ -81,26 +81,31 @@ SILE.nodeMakers.base = pl.class({ return c.contextname and c.contextname == "nobreakspace" end, - isActiveNonBreakingSpace = function (self, char) - return self:isNonBreakingSpace(char) and not SILE.settings:get("languages.fixedNbsp") + isPunctuation = function (self, char) + return self.puctuationTypes[self:charData(char).category] end, - isBreaking = function (self, char) - return self.breakingTypes[self:charData(char).linebreak] + isSpace = function (self, char) + return self.spaceTypes[self:charData(char).linebreak] end, + isQuote = function (self, char) return self.quoteTypes[self:charData(char).linebreak] - end + end, + + isWord = function (self, char) + return self.wordTypes[self:charData(char).linebreak] + end, }) SILE.nodeMakers.unicode = pl.class(SILE.nodeMakers.base) -SILE.nodeMakers.unicode.wordTypes = { cm = true } -SILE.nodeMakers.unicode.spaceTypes = { sp = true } SILE.nodeMakers.unicode.breakingTypes = { ba = true, zw = true } SILE.nodeMakers.unicode.puctuationTypes = { po = true } SILE.nodeMakers.unicode.quoteTypes = {} -- quote linebreak category is ambiguous depending on the language +SILE.nodeMakers.unicode.spaceTypes = { sp = true } +SILE.nodeMakers.unicode.wordTypes = { cm = true } function SILE.nodeMakers.unicode:dealWith (item) local char = item.text @@ -119,7 +124,7 @@ function SILE.nodeMakers.unicode:dealWith (item) elseif self:isQuote(item.text) then self:addToken(char, item) self:makeToken() - elseif self.lasttype and (thistype and thistype ~= self.lasttype and not self.wordTypes[thistype]) then + elseif self.lasttype and (thistype and thistype ~= self.lasttype and not self:isWord(thistype)) then self:addToken(char, item) else self:letterspace()