Skip to content

Commit

Permalink
fix(languages): Correct punctuation rules for French
Browse files Browse the repository at this point in the history
  • Loading branch information
Omikhleia authored and Didier Willis committed Sep 15, 2021
1 parent 190683a commit 95c2398
Show file tree
Hide file tree
Showing 2 changed files with 196 additions and 42 deletions.
12 changes: 7 additions & 5 deletions documentation/c08-language.sil
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,13 @@ to control which style is used. The default is \code{left}

\subsection{French}

In French typesetting, there is normally a thin space between text and “high”
punctuation (question marks, exclamation marks, colons, semicolons). SILE
will automatically insert this space if it is not present, and “thin” a normal
space if it is. The size of the space is determined by
\code{languages.fr.punctuationspace}.
In French typesetting, there is normally a non-breakable space between text
and “high” punctuation (a thin fixed space before question marks, exclamation
marks, semicolons and an inter-word space before colons), and also spaces
within "guillemets" (quotation marks). SILE will automatically apply the
correct space. The size of these spaces is determined by
\code{languages.fr.thinspace}, \code{languages.fr.colonspace} and
\code{languages.fr.guillspace}.

\subsection{Japanese / Chinese}

Expand Down
226 changes: 189 additions & 37 deletions languages/fr.lua
Original file line number Diff line number Diff line change
@@ -1,55 +1,207 @@
-- French language rules

local computeSpaces = function()
-- Computes:
-- - regular inter-word space,
-- - half inter-word fixed space,
-- - "guillemet space", as defined in LaTeX's babel-french which is based
-- on Thierry Bouche's recommendations,
-- These should be usual for France and Canada. The Swiss may prefer a thin
-- space for guillemets, that's why we are having settings hereafter.
local enlargement = SILE.settings.get("shaper.spaceenlargementfactor")
local stretch = SILE.settings.get("shaper.spacestretchfactor")
local shrink = SILE.settings.get("shaper.spaceshrinkfactor")
return {
colonspace = SILE.length(enlargement.."spc plus "..stretch.."spc minus "..shrink.."spc"),
thinspace = SILE.length((0.5 * enlargement).."spc"),
guillspace = SILE.length((0.8 * enlargement).."spc plus "..(0.3 * stretch).."spc minus "..(0.8 * shrink).."spc")
}
end

local spaces = computeSpaces()
-- NOTE: We are only doing it at load time. We don't expect the shaper settings to be often
-- changed arbitrarily _after_ having selected a language...

SILE.settings.declare({
parameter = "languages.fr.punctuationspace",
type = "kern",
default = SILE.nodefactory.kern("0.2en"),
help = "The amount of space before a punctuation"
})
parameter = "languages.fr.colonspace",
type = "kern",
default = SILE.nodefactory.kern(spaces.colonspace),
help = "The amount of space before a colon, theoretically a non-breakable, shrinkable, strechable inter-word space"
})

-- Unfortunately, there is nothing in the Unicode properties
-- database which distinguishes between high and low punctuation.
-- But in a way that's precisely why we can't just rely on Unicode
-- for everything and need our language-specific typesetting
-- processors.
SILE.settings.declare({
parameter = "languages.fr.highpunctuation",
type = "string",
default = "?:;!",
help = "A list of punctuation marks which should be preceded by a punctuationspace"
})
parameter = "languages.fr.thinspace",
type = "kern",
default = SILE.nodefactory.kern(spaces.thinspace),
help = "The amount of space before high punctuations, theoretically a fixed, non-breakable space, around half the inter-word space"
})

SILE.settings.declare({
parameter = "languages.fr.guillspace",
type = "kern",
default = SILE.nodefactory.kern(spaces.guillspace),
help = "The amount of space applying to guillemets, theoretically smaller than a non-breakable inter-word space, with reduced stretchability"
})

SILE.settings.declare({
parameter = "languages.fr.debugspace",
type = "boolean",
default = false,
help = "If switched to true, uses large spaces instead of the regular punctuation ones"
})

local getSpaceGlue = function(parameter)
if SILE.settings.get("languages.fr.debugspace") then
return SILE.nodefactory.kern("5spc")
end
return SILE.settings.get(parameter)
end

SILE.nodeMakers.fr = pl.class({
_base = SILE.nodeMakers.unicode,
isHighPunctuation = function (_, text)
return string.find(SILE.settings.get("languages.fr.highpunctuation"), text, nil, true)

-- Unfortunately, there is nothing in the Unicode properties
-- database which distinguishes between high and low punctuation, etc.
-- But in a way that's precisely why we can't just rely on Unicode
-- for everything and need our language-specific typesetting
-- processors.
colonPunctuations = { ":" },
openingQuotes = { "«", "" },
closingQuotes = { "»", "" },
-- There's catch below: the shaper may have already processed common ligatures (!!, ?!, !?)
-- as a single item...
highPunctuations = { ";", "!", "?", "!!", "?!", "!?" },
-- High punctuations have some (kern) space before them... except in some cases!
-- By the books, they have it "after a letter or digit", at least. After a closing
-- punctuation, too, seems usual.
-- Otherwise, one shall have no space inside e.g. (?), ?!, [!], …?, !!! etc.
-- As a simplication, we reverse the rule and define after which characters the space
-- shall not be added. This is by no mean perfect, I couldn't find an explicit list
-- of exceptions. French typography is a delicate beast.
spaceExceptions = { "!", "?", ":", ".", "", "(", "[", "{", "<", "«", "", "", "", "?!", "!!", "!?" },

-- methods defined in this class

isIn = function(_, set, text)
for _, v in ipairs(set) do
if v == text then
return true
end
end
return false
end,

isOpeningQuote = function (self, text)
return self:isIn(self.openingQuotes, text)
end,
makeUnbreakableSpace = function (self)
isClosingQuote = function (self, text)
return self:isIn(self.closingQuotes, text)
end,
isColonPunctuation = function (self, text)
return self:isIn(self.colonPunctuations, text)
end,
isHighPunctuation = function (self, text)
return self:isIn(self.highPunctuations, text)
end,
isSpaceException = function (self, text)
return self:isIn(self.spaceExceptions, text)
end,

isPrevSpaceException = function (self)
return self.i > 1 and self:isSpaceException(self.items[self.i-1].text) or false
end,

makeUnbreakableSpace = function (self, parameter)
self:makeToken()
self.lastnode = "glue"
coroutine.yield(SILE.settings.get("languages.fr.punctuationspace"))
end,
previousIsHighPunctuation = function (self)
return self.i >1 and self:isHighPunctuation(self.items[self.i-1].text)
coroutine.yield(getSpaceGlue(parameter))
end,
nextIsHighPunctuation = function (self)
return self.items[self.i+1] and self:isHighPunctuation(self.items[self.i+1].text)

handleSpaceBefore = function (self, item)
if self:isHighPunctuation(item.text) and not self:isPrevSpaceException() then
self:makeUnbreakableSpace("languages.fr.thinspace")
self:makeToken()
self:addToken(item.text, item)
return true
end
if self:isColonPunctuation(item.text) and not self:isPrevSpaceException() then
self:makeUnbreakableSpace("languages.fr.colonspace")
self:makeToken()
self:addToken(item.text, item)
return true
end
if self:isClosingQuote(item.text) then
self:makeUnbreakableSpace("languages.fr.guillspace")
self:makeToken()
self:addToken(item.text, item)
return true
end
return false
end,
previousIsSpace = function (self)
return self.lastnode == "glue"

handleSpaceAfter = function (self, item)
if self:isOpeningQuote(item.text) then
self:addToken(item.text, item)
self:makeUnbreakableSpace("languages.fr.guillspace")
self:makeToken()
return true
end
return false
end,
handleICUBreak = function (self, chunks, item)
if self:isHighPunctuation(item.text) and self:previousIsHighPunctuation() then
return self._base.handleICUBreak(self, chunks, item)

mustRemove = function (self, i, items)
-- Clear "manual" spaces we do not want, so that later we only have to
-- insert the relevant kerns.
local curr = items[i].text
if self:isSpace(curr) then
if i < #items then
local next = items[i+1].text
if self:isSpace(next)
or self:isHighPunctuation(next)
or self:isColonPunctuation(next)
or self:isClosingQuote(next) then
return true
end
end
if i > 1 then
local prev= items[i-1].text
if self:isOpeningQuote(prev) then
return true
end
end
end
if self:nextIsHighPunctuation() and not self:isHighPunctuation(item.text) then
self:makeUnbreakableSpace()
while chunks[1] and item.index >= chunks[1].index do
table.remove(chunks, 1)
return false
end,

-- overriden methods from parent class

dealWith = function (self, item)
if self:handleSpaceBefore(item) then return end
if self:handleSpaceAfter(item) then return end
self._base.dealWith(self, item)
end,

handleWordBreak = function (self, item)
if self:handleSpaceBefore(item) then return end
if self:handleSpaceAfter(item) then return end
self._base.handleWordBreak(self, item)
end,

handleLineBreak = function (self, item, subtype)
if self:handleSpaceBefore(item) then return end
if self:handleSpaceAfter(item) then return end
self._base.handleLineBreak(self, item, subtype)
end,

iterator = function (self, items)
-- We start by cleaning up the input once for all.
local cleanItems = {}
for k = 1, #items do
if not self:mustRemove(k, items) then
table.insert(cleanItems, items[k])
end
return chunks
elseif self:isHighPunctuation(item.text) and not self:previousIsSpace() then
self:makeUnbreakableSpace()
end
return self._base.handleICUBreak(self, chunks, item)
return self._base.iterator(self, cleanItems)
end
})

Expand Down

0 comments on commit 95c2398

Please sign in to comment.