Skip to content

Commit

Permalink
refactor(languages): Move language specific discretionary handling ou…
Browse files Browse the repository at this point in the history
…t of generic module
  • Loading branch information
alerque committed Jan 26, 2024
1 parent aed271e commit 95ac070
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 64 deletions.
66 changes: 14 additions & 52 deletions core/hyphenator-liang.lua
Expand Up @@ -95,11 +95,19 @@ SILE.hyphenator = {}
SILE.hyphenator.languages = {}
SILE._hyphenators = {}

local function defaultHyphenateSegments (node, segments, _)
local hyphen = SILE.shaper:createNnodes(SILE.settings:get("font.hyphenchar"), node.options)
return SILE.nodefactory.discretionary({ prebreak = hyphen }), segments
end

local initHyphenator = function (lang)
if not SILE._hyphenators[lang] then
SILE._hyphenators[lang] = { minWord = 5, leftmin = 2, rightmin = 2, trie = {}, exceptions = {} }
loadPatterns(SILE._hyphenators[lang], lang)
end
if SILE.hyphenator.languages[lang] and not SILE.hyphenator.languages[lang].hyphenateSegments then
SILE.hyphenator.languages[lang].hyphenateSegments = defaultHyphenateSegments
end
end

local hyphenateNode = function (node)
Expand All @@ -110,71 +118,25 @@ local hyphenateNode = function (node)
end
initHyphenator(node.language)
local segments = SILE._hyphenate(SILE._hyphenators[node.language], node.text)
local hyphen
if #segments > 1 then
local hyphen = SILE.shaper:createNnodes(SILE.settings:get("font.hyphenchar"), node.options)
local hyphenateSegments = SILE.hyphenator.languages[node.language].hyphenateSegments
local newnodes = {}
for j, segment in ipairs(segments) do
local specificDiscretionary
if segment == "" then
SU.dump({ j, segments })
SU.error("No hyphenation segment should ever be empty", true)
end
if node.options.language == "tr" then
local nextApostrophe = j < #segments and luautf8.match(segments[j+1], "^['’]")
if nextApostrophe then
segments[j+1] = luautf8.gsub(segments[j+1], "^['’]", "")
local replacement = SILE.shaper:createNnodes(nextApostrophe, node.options)
if SILE.settings:get("languages.tr.replaceApostropheAtHyphenation") then
-- leading apostrophe (on next segment) cancels when hyphenated
specificDiscretionary = SILE.nodefactory.discretionary({ replacement = replacement, prebreak = hyphen })
else
-- hyphen character substituted for upcomming apostrophe
local kesme = SILE.shaper:createNnodes(nextApostrophe, node.options)
specificDiscretionary = SILE.nodefactory.discretionary({ replacement = replacement, prebreak = kesme })
end
end
elseif node.options.language == "ca" then
-- punt volat (middle dot) cancels when hyphenated
-- Catalan typists may use a punt volat or precomposed characters.
-- The shaper might behave differently depending on the font, so we need to
-- be consistent here with the typist's choice.
if luautf8.find(segment, "ŀ$") then -- U+0140
segment = luautf8.sub(segment, 1, -2)
local ldot = SILE.shaper:createNnodes("ŀ", node.options)
local lhyp = SILE.shaper:createNnodes("l" .. SILE.settings:get("font.hyphenchar"), node.options)
specificDiscretionary = SILE.nodefactory.discretionary({ replacement = ldot, prebreak = lhyp })
elseif luautf8.find(segment, "Ŀ$") then -- U+013F
segment = luautf8.sub(segment, 1, -2)
local ldot = SILE.shaper:createNnodes("Ŀ", node.options)
local lhyp = SILE.shaper:createNnodes("L" .. SILE.settings:get("font.hyphenchar"), node.options)
specificDiscretionary = SILE.nodefactory.discretionary({ replacement = ldot, prebreak = lhyp })
elseif luautf8.find(segment, "l·$") then -- l + U+00B7
segment = luautf8.sub(segment, 1, -3)
local ldot = SILE.shaper:createNnodes("", node.options)
local lhyp = SILE.shaper:createNnodes("l" .. SILE.settings:get("font.hyphenchar"), node.options)
specificDiscretionary = SILE.nodefactory.discretionary({ replacement = ldot, prebreak = lhyp })
elseif luautf8.find(segment, "L·$") then -- L + U+00B7
segment = luautf8.sub(segment, 1, -3)
local ldot = SILE.shaper:createNnodes("", node.options)
local lhyp = SILE.shaper:createNnodes("L" .. SILE.settings:get("font.hyphenchar"), node.options)
specificDiscretionary = SILE.nodefactory.discretionary({ replacement = ldot, prebreak = lhyp })
end
end
for _, newNode in ipairs(SILE.shaper:createNnodes(segment, node.options)) do
hyphen, segments = hyphenateSegments(node, segments, j)
for _, newNode in ipairs(SILE.shaper:createNnodes(segments[j], node.options)) do
if newNode.is_nnode then
newNode.parent = node
table.insert(newnodes, newNode)
end
end
if j < #segments then
if specificDiscretionary then
specificDiscretionary.parent = node
table.insert(newnodes, specificDiscretionary)
else
local newNode = SILE.nodefactory.discretionary({ prebreak = hyphen })
newNode.parent = node
table.insert(newnodes, newNode)
end
hyphen.parent = node
table.insert(newnodes, hyphen)
end
end
node.children = newnodes
Expand Down
31 changes: 31 additions & 0 deletions languages/ca.lua
Expand Up @@ -6,6 +6,37 @@ SILE.nodeMakers.ca = pl.class({
})

SILE.hyphenator.languages["ca"] = {}

SILE.hyphenator.languages["ca"].hyphenateSegments = function (node, segments, j)
-- punt volat (middle dot) cancels when hyphenated
-- Catalan typists may use a punt volat or precomposed characters.
-- The shaper might behave differently depending on the font, so we need to
-- be consistent here with the typist's choice.
local hyphenChar = SILE.settings:get("font.hyphenchar")
local replacement, hyphen
if luautf8.find(segments[j], "ŀ$") then -- U+0140
segments[j] = luautf8.sub(segments[j], 1, -2)
replacement = SILE.shaper:createNnodes("ŀ", node.options)
hyphen = SILE.shaper:createNnodes("l" .. hyphenChar, node.options)
elseif luautf8.find(segments[j], "Ŀ$") then -- U+013F
segments[j] = luautf8.sub(segments[j], 1, -2)
replacement = SILE.shaper:createNnodes("Ŀ", node.options)
hyphen = SILE.shaper:createNnodes("L" .. hyphenChar, node.options)
elseif luautf8.find(segments[j], "l·$") then -- l + U+00B7
segments[j] = luautf8.sub(segments[j], 1, -3)
replacement = SILE.shaper:createNnodes("", node.options)
hyphen = SILE.shaper:createNnodes("l" .. hyphenChar, node.options)
elseif luautf8.find(segments[j], "L·$") then -- L + U+00B7
segments[j] = luautf8.sub(segments[j], 1, -3)
replacement = SILE.shaper:createNnodes("", node.options)
hyphen = SILE.shaper:createNnodes("L" .. hyphenChar, node.options)
else
hyphen = SILE.shaper:createNnodes(hyphenChar, node.options)
end
local discretionary = SILE.nodefactory.discretionary({ replacement = replacement, prebreak = hyphen })
return discretionary, segments
end

SILE.hyphenator.languages["ca"].patterns =
{
--
Expand Down
19 changes: 19 additions & 0 deletions languages/tr.lua
Expand Up @@ -14,6 +14,25 @@ SILE.nodeMakers.tr = pl.class(SILE.nodeMakers.unicode)
SILE.nodeMakers.tr.wordTypes = { cm = true, qu = true }

SILE.hyphenator.languages["tr"] = {}

SILE.hyphenator.languages["tr"].hyphenateSegments = function (node, segments, j)
local hyphenChar, replacement
local maybeNextApostrophe = #segments > j and luautf8.match(segments[j+1], "^['’]")
if maybeNextApostrophe then
segments[j+1] = luautf8.gsub(segments[j+1], "^['’]", "")
if SILE.settings:get("languages.tr.replaceApostropheAtHyphenation") then
hyphenChar = SILE.settings:get("font.hyphenchar")
else
hyphenChar = maybeNextApostrophe
replacement = SILE.shaper:createNnodes(maybeNextApostrophe, node.options)
end
else
hyphenChar = SILE.settings:get("font.hyphenchar")
end
local hyphen = SILE.shaper:createNnodes(hyphenChar, node.options)
return SILE.nodefactory.discretionary({ replacement = replacement, prebreak = hyphen }), segments
end

SILE.hyphenator.languages["tr"].patterns =
{
"2a1",
Expand Down
29 changes: 17 additions & 12 deletions languages/unicode.lua
Expand Up @@ -68,39 +68,44 @@ SILE.nodeMakers.base = pl.class({
return chardata[cp]
end,

isPunctuation = function (self, char)
return self.puctuationTypes[self:charData(char).category]
isActiveNonBreakingSpace = function (self, char)
return self:isNonBreakingSpace(char) and not SILE.settings:get("languages.fixedNbsp")
end,

isSpace = function (self, char)
return self.spaceTypes[self:charData(char).linebreak]
isBreaking = function (self, char)
return self.breakingTypes[self:charData(char).linebreak]
end,

isNonBreakingSpace = function (self, char)
local c = self:charData(char)
return c.contextname and c.contextname == "nobreakspace"
end,

isActiveNonBreakingSpace = function (self, char)
return self:isNonBreakingSpace(char) and not SILE.settings:get("languages.fixedNbsp")
isPunctuation = function (self, char)
return self.puctuationTypes[self:charData(char).category]
end,

isBreaking = function (self, char)
return self.breakingTypes[self:charData(char).linebreak]
isSpace = function (self, char)
return self.spaceTypes[self:charData(char).linebreak]
end,

isQuote = function (self, char)
return self.quoteTypes[self:charData(char).linebreak]
end
end,

isWord = function (self, char)
return self.wordTypes[self:charData(char).linebreak]
end,

})

SILE.nodeMakers.unicode = pl.class(SILE.nodeMakers.base)

SILE.nodeMakers.unicode.wordTypes = { cm = true }
SILE.nodeMakers.unicode.spaceTypes = { sp = true }
SILE.nodeMakers.unicode.breakingTypes = { ba = true, zw = true }
SILE.nodeMakers.unicode.puctuationTypes = { po = true }
SILE.nodeMakers.unicode.quoteTypes = {} -- quote linebreak category is ambiguous depending on the language
SILE.nodeMakers.unicode.spaceTypes = { sp = true }
SILE.nodeMakers.unicode.wordTypes = { cm = true }

function SILE.nodeMakers.unicode:dealWith (item)
local char = item.text
Expand All @@ -119,7 +124,7 @@ function SILE.nodeMakers.unicode:dealWith (item)
elseif self:isQuote(item.text) then
self:addToken(char, item)
self:makeToken()
elseif self.lasttype and (thistype and thistype ~= self.lasttype and not self.wordTypes[thistype]) then
elseif self.lasttype and (thistype and thistype ~= self.lasttype and not self:isWord(thistype)) then
self:addToken(char, item)
else
self:letterspace()
Expand Down

0 comments on commit 95ac070

Please sign in to comment.