-
-
Notifications
You must be signed in to change notification settings - Fork 163
/
Xml.scala
124 lines (108 loc) · 7.12 KB
/
Xml.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package scalaParser
import acyclic.file
import macros.Macros._
import org.parboiled2._
import scala.language.implicitConversions
import macros.Macros._
trait Xml extends Core {
def Patterns: Rule0
def XmlExpr = rule( WL ~ Xml.XmlContent ~ rep(WL ~ Xml.Element) )
def XmlPattern = rule( WL ~ Xml.ElemPattern )
private[this] object Xml{
def BaseChar = rule(
("\u0041"-"\u005A") | ("\u0061"-"\u007A") | ("\u00C0"-"\u00D6") | ("\u00D8"-"\u00F6") |
("\u00F8"-"\u00FF") | ("\u0100"-"\u0131") | ("\u0134"-"\u013E") | ("\u0141"-"\u0148") |
("\u014A"-"\u017E") | ("\u0180"-"\u01C3") | ("\u01CD"-"\u01F0") | ("\u01F4"-"\u01F5") |
("\u01FA"-"\u0217") | ("\u0250"-"\u02A8") | ("\u02BB"-"\u02C1") | "\u0386" |
("\u0388"-"\u038A") | "\u038C" | ("\u038E"-"\u03A1") | ("\u03A3"-"\u03CE") |
("\u03D0"-"\u03D6") | "\u03DA" | "\u03DC" | "\u03DE" | "\u03E0" | ("\u03E2"-"\u03F3") |
("\u0401"-"\u040C") | ("\u040E"-"\u044F") | ("\u0451"-"\u045C") | ("\u045E"-"\u0481") |
("\u0490"-"\u04C4") | ("\u04C7"-"\u04C8") | ("\u04CB"-"\u04CC") | ("\u04D0"-"\u04EB") |
("\u04EE"-"\u04F5") | ("\u04F8"-"\u04F9") | ("\u0531"-"\u0556") | "\u0559" |
("\u0561"-"\u0586") | ("\u05D0"-"\u05EA") | ("\u05F0"-"\u05F2") | ("\u0621"-"\u063A") |
("\u0641"-"\u064A") | ("\u0671"-"\u06B7") | ("\u06BA"-"\u06BE") | ("\u06C0"-"\u06CE") |
("\u06D0"-"\u06D3") | "\u06D5" | ("\u06E5"-"\u06E6") | ("\u0905"-"\u0939") | "\u093D" |
("\u0958"-"\u0961") | ("\u0985"-"\u098C") | ("\u098F"-"\u0990") | ("\u0993"-"\u09A8") |
("\u09AA"-"\u09B0") | "\u09B2" | ("\u09B6"-"\u09B9") | ("\u09DC"-"\u09DD") |
("\u09DF"-"\u09E1") | ("\u09F0"-"\u09F1") | ("\u0A05"-"\u0A0A") | ("\u0A0F"-"\u0A10") |
("\u0A13"-"\u0A28") | ("\u0A2A"-"\u0A30") | ("\u0A32"-"\u0A33") | ("\u0A35"-"\u0A36") |
("\u0A38"-"\u0A39") | ("\u0A59"-"\u0A5C") | "\u0A5E" | ("\u0A72"-"\u0A74") |
("\u0A85"-"\u0A8B") | "\u0A8D" | ("\u0A8F"-"\u0A91") | ("\u0A93"-"\u0AA8") |
("\u0AAA"-"\u0AB0") | ("\u0AB2"-"\u0AB3") | ("\u0AB5"-"\u0AB9") | "\u0ABD" | "\u0AE0" |
("\u0B05"-"\u0B0C") | ("\u0B0F"-"\u0B10") | ("\u0B13"-"\u0B28") | ("\u0B2A"-"\u0B30") |
("\u0B32"-"\u0B33") | ("\u0B36"-"\u0B39") | "\u0B3D" | ("\u0B5C"-"\u0B5D") |
("\u0B5F"-"\u0B61") | ("\u0B85"-"\u0B8A") | ("\u0B8E"-"\u0B90") | ("\u0B92"-"\u0B95") |
("\u0B99"-"\u0B9A") | "\u0B9C" | ("\u0B9E"-"\u0B9F") | ("\u0BA3"-"\u0BA4") |
("\u0BA8"-"\u0BAA") | ("\u0BAE"-"\u0BB5") | ("\u0BB7"-"\u0BB9") | ("\u0C05"-"\u0C0C") |
("\u0C0E"-"\u0C10") | ("\u0C12"-"\u0C28") | ("\u0C2A"-"\u0C33") | ("\u0C35"-"\u0C39") |
("\u0C60"-"\u0C61") | ("\u0C85"-"\u0C8C") | ("\u0C8E"-"\u0C90") | ("\u0C92"-"\u0CA8") |
("\u0CAA"-"\u0CB3") | ("\u0CB5"-"\u0CB9") | "\u0CDE" | ("\u0CE0"-"\u0CE1") |
("\u0D05"-"\u0D0C") | ("\u0D0E"-"\u0D10") | ("\u0D12"-"\u0D28") | ("\u0D2A"-"\u0D39") |
("\u0D60"-"\u0D61") | ("\u0E01"-"\u0E2E") | "\u0E30" | ("\u0E32"-"\u0E33") |
("\u0E40"-"\u0E45") | ("\u0E81"-"\u0E82") | "\u0E84" | ("\u0E87"-"\u0E88") | "\u0E8A" |
"\u0E8D" | ("\u0E94"-"\u0E97") | ("\u0E99"-"\u0E9F") | ("\u0EA1"-"\u0EA3") | "\u0EA5" |
"\u0EA7" | ("\u0EAA"-"\u0EAB") | ("\u0EAD"-"\u0EAE") | "\u0EB0" | ("\u0EB2"-"\u0EB3") |
"\u0EBD" | ("\u0EC0"-"\u0EC4") | ("\u0F40"-"\u0F47") | ("\u0F49"-"\u0F69") |
("\u10A0"-"\u10C5") | ("\u10D0"-"\u10F6") | "\u1100" | ("\u1102"-"\u1103") |
("\u1105"-"\u1107") | "\u1109" | ("\u110B"-"\u110C") | ("\u110E"-"\u1112") |
"\u113C" | "\u113E" | "\u1140" | "\u114C" | "\u114E" | "\u1150" | ("\u1154"-"\u1155") |
"\u1159" | ("\u115F"-"\u1161") | "\u1163" | "\u1165" | "\u1167" | "\u1169" |
("\u116D"-"\u116E") | ("\u1172"-"\u1173") | "\u1175" | "\u119E" | "\u11A8" | "\u11AB" |
("\u11AE"-"\u11AF") | ("\u11B7"-"\u11B8") | "\u11BA" | ("\u11BC"-"\u11C2") | "\u11EB" |
"\u11F0" | "\u11F9" | ("\u1E00"-"\u1E9B") | ("\u1EA0"-"\u1EF9") | ("\u1F00"-"\u1F15") |
("\u1F18"-"\u1F1D") | ("\u1F20"-"\u1F45") | ("\u1F48"-"\u1F4D") | ("\u1F50"-"\u1F57") |
"\u1F59" | "\u1F5B" | "\u1F5D" | ("\u1F5F"-"\u1F7D") | ("\u1F80"-"\u1FB4") |
("\u1FB6"-"\u1FBC") | "\u1FBE" | ("\u1FC2"-"\u1FC4") | ("\u1FC6"-"\u1FCC") |
("\u1FD0"-"\u1FD3") | ("\u1FD6"-"\u1FDB") | ("\u1FE0"-"\u1FEC") | ("\u1FF2"-"\u1FF4") |
("\u1FF6"-"\u1FFC") | "\u2126" | ("\u212A"-"\u212B") | "\u212E" | ("\u2180"-"\u2182") |
("\u3041"-"\u3094") | ("\u30A1"-"\u30FA") | ("\u3105"-"\u312C") | ("\uAC00"-"\uD7A3")
)
def Ideographic = rule( "\u4E00"-"\u9FA5" | "\u3007" | "\u3021"-"\u3029" )
def Eq = rule (opt(WL) ~ '=' ~ opt(WL))
def Element = rule( EmptyElemTag | STag ~ Content ~ ETag )
def EmptyElemTag = rule( '<' ~ Name ~ rep(WL ~ Attribute) ~ opt(WL) ~ "/>" )
def STag = rule( '<' ~ Name ~ rep(WL ~ Attribute) ~ opt(WL) ~ '>' )
def ETag = rule( "</" ~ Name ~ opt(WL) ~ '>' )
def Content = rule( rep(CharData | Content1) )
def Content1 = rule( XmlContent | Reference | ScalaExpr )
def XmlContent: Rule0 = rule( Element | CDSect | PI | Comment )
def CDSect = rule( CDStart ~ CData ~ CDEnd )
def CDStart = rule( "<![CDATA[" )
def CData = rule( rep(!"]]>" ~ Char))
def CDEnd = rule( "]]>" )
def Attribute = rule( Name ~ Eq ~ AttValue )
def AttValue = rule(
'"' ~ rep(CharQ | Reference) ~ '"' |
"'" ~ rep(CharA | Reference) ~ "'" |
ScalaExpr
)
def Comment = rule( "<!--" ~ rep((!'-' ~ Char) | ('-' ~ (!'-' ~ Char))) ~ "-->" )
def PI = rule( "<?" ~ PITarget ~ opt(WL ~ rep(!"?>" ~ Char)) ~ "?>" )
def PITarget = rule( !(("X" | "x") ~ ("M" | "m") ~ ("L" | "l")) ~ Name )
def CharRef = rule( "&#" ~ rep1("0"-"9") ~ ';' | "&#x" ~ Basic.HexNum ~ ";" )
def Reference = rule( EntityRef | CharRef )
def EntityRef = rule( "&" ~ Name ~ ";" )
def ScalaExpr = rule("{" ~ WS ~ Block ~ WS ~ "}")
def Char = rule( ANY )
def CharData = rule( rep1(!("{" | "]]>" | CharRef) ~ Char1 | "{{") )
def Char1 = rule( &(noneOf("<&")) ~ Char )
def CharQ = rule( !'"' ~ Char1 )
def CharA = rule( !"'" ~ Char1 )
def CharB = rule( !'{' ~ Char1 )
def Name = rule( XNameStart ~ rep(NameChar) )
def XNameStart = rule( '_' | BaseChar | Ideographic )
def NameStartChar = rule(
":" | ("A"-"Z") | "_" | ("a"-"z") | ("\u00C0"-"\u00D6") | ("\u00D8"-"\u00F6") |
("\u00F8"-"\u02FF") | ("\u0370"-"\u037D") | ("\u037F"-"\u1FFF") | ("\u200C"-"\u200D") |
("\u2070"-"\u218F") | ("\u2C00"-"\u2FEF") | ("\u3001"-"\uD7FF") | ("\uF900"-"\uFDCF") |
("\uFDF0"-"\uFFFD") )// | [#x10000-#xEFFFF] ???? don't chars max out at \uffff ????
def NameChar = rule( NameStartChar | "-" | "." | ("0"-"9") | "\u00B7" | ("\u0300"-"\u036F") | ("\u203F"-"\u2040") )
def ElemPattern: Rule0 = rule( EmptyElemTagP | STagP ~ ContentP ~ ETagP )
def EmptyElemTagP = rule( "<" ~ Name ~ opt(WL) ~ "/>" )
def STagP = rule( "<" ~ Name ~ opt(WL) ~ ">")
def ETagP = rule( "</" ~ Name ~ opt(WL) ~ ">" )
def ContentP = rule( opt(CharData) ~ rep((ElemPattern | ScalaPatterns) ~ opt(CharData)) )
def ContentP1 = rule( ElemPattern | Reference | CDSect | PI | Comment | ScalaPatterns )
def ScalaPatterns = rule( "{" ~ Patterns ~ WL ~ "}" )
}
}