-
-
Notifications
You must be signed in to change notification settings - Fork 97
/
sil.abnf
100 lines (84 loc) · 3.67 KB
/
sil.abnf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
; Formal grammar definition for SIL (SILE Input Language) files
;
; Based on RFC 5234 (Augmented BNF for Syntax Specifications: ABNF)
; Uses RFC 7405 (Case-Sensitive String Support in ABNF)
; NOTE: ABNF does not seem to have a way to express matching / balancing of
; tags. The grammar below does not express SILE's ability to skip over
; passthrough content until it hits the correct matching closing tag for
; environments or the first unballanced brace for braced content.
; A master document can only have one top level content item, but we allow
; loading of fragments as well which can have any number of top level content
; items, hence valid grammar can be any number of content items.
document = *content
; Top level content can be any sequence of these things
content = environment
content =/ comment
content =/ text
content =/ braced-content
content =/ command
; Environments come in two flavors, passthrough (raw) and regular. The
; difference is what is allowed to terminate them and what escapes are needed
; for the content in the middle.
environment = %s"\begin" [ options ] "{" passthrough-command-id "}"
env-passthrough-text
%s"\end{" passthrough-command-id "}"
environment =/ %s"\begin" [ options ] "{" command-id "}"
content
%s"\end{" command-id "}"
; Passthrough (raw) environments can have any valid UTF-8 except the closing
; delimiter matching the opening, per the environment rule.
env-passthrough-text = utf8-text
; Nothing to see here.
; But potentially important because it eats newlines!
comment = "%" utf8-text CRLF
; Input strings that are not special
text = *text-char
; Input content wrapped in braces can be attatched to a command or used to
; manually isolate chunks of content (e.g. to hinder ligatures).
braced-content = "{" content "}"
; As with environments, the content format may be passthrough (raw) or more sil
; content depending on the command.
command = "\" passthrough-command-id [ options ] [ braced-passthrough-text ]
command =/ "\" command-id [ options ] [ braced-content ]
; Passthrough (raw) command text can have any valid UTF-8 except an unbalanced closing delimiter
braced-passthrough-text = utf8-text
; Building blocks
options = "[" parameter *( "," parameter ) "]"
parameter = *WSP sil-identifier *WSP "=" *WSP ( quoted-value / value ) *WSP
quoted-value = DQUOTE *quoted-value-char DQUOTE
value = *value-char
value-char =/ %x00-21 ; omit "
value-char =/ %x23-2B ; omit ,
value-char =/ %x2D-3A ; omit ;
value-char =/ %x3C-5C ; omit ]
value-char =/ %x3E-7F ; end of utf8-1
value-char =/ utf8-2
value-char =/ utf8-3
value-char =/ utf8-4
quoted-value-char = "\" %x22
quoted-value-char =/ %x00-21 ; omit "
quoted-value-char =/ %x23-7F ; end of utf8-1
quoted-value-char =/ utf8-2
quoted-value-char =/ utf8-3
quoted-value-char =/ utf8-4
text-char = "\" ( %x5C / %x25 / %x7B / %x7D )
text-char =/ %x00-24 ; omit %
text-char =/ %x26-5B ; omit \
text-char =/ %x5D-7A ; omit {
text-char =/ %x7C ; omit }
text-char =/ %x7E-7F ; end of utf8-1
text-char =/ utf8-2
text-char =/ utf8-3
text-char =/ utf8-4
letter = ALPHA / "_" / ":"
identifier = letter *( letter / DIGIT / "-" / "." )
command-id = identifier ; - ( %s"begin" / %s"end" / passthrough-command-id )
passthrough-command-id = %s"ftl" / %s"lua" / %s"math" / %s"raw" / %s"script" / %s"sil" / %s"use" / %s"xml"
; ASCII isn't good enough for us.
utf8-text = *utf8-char
utf8-char = utf8-1 / utf8-2 / utf8-3 / utf8-4
utf8-1 = %x00-7F
utf8-2 = %xC2-DF utf8-tail
utf8-3 = %xE0 %xA0-BF utf8-tail / %xE1-EC 2utf8-tail / %xED %x80-9F utf8-tail / %xEE-EF 2utf8-tail
utf8-4 = %xF0 %x90-BF 2utf8-tail / %xF1-F3 3utf8-tail / %xF4 %x80-8F 2utf8-tail
utf8-tail = %x80-BF