-
Notifications
You must be signed in to change notification settings - Fork 95
/
parslet.rb
238 lines (222 loc) · 7.08 KB
/
parslet.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# A simple parser generator library. Typical usage would look like this:
#
# require 'parslet'
#
# class MyParser < Parslet::Parser
# rule(:a) { str('a').repeat }
# root(:a)
# end
#
# pp MyParser.new.parse('aaaa') # => 'aaaa'@0
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
# # Don't know what to do with bbbb at line 1 char 1.
#
# The simple DSL allows you to define grammars in PEG-style. This kind of
# grammar construction does away with the ambiguities that usually comes with
# parsers; instead, it allows you to construct grammars that are easier to
# debug, since less magic is involved.
#
# Parslet is typically used in stages:
#
#
# * Parsing the input string; this yields an intermediary tree, see
# Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
# Parslet::ClassMethods#root.
# * Transformation of the tree into something useful to you, see
# Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
#
# The first stage is traditionally intermingled with the second stage; output
# from the second stage is usually called the 'Abstract Syntax Tree' or AST.
#
# The stages are completely decoupled; You can change your grammar around and
# use the second stage to isolate the rest of your code from the changes
# you've effected.
#
# == Further reading
#
# All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
# look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
# {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
# {Parslet::Atoms::Alternative}.
#
# == When things go wrong
#
# A parse that fails will raise {Parslet::ParseFailed}. This exception contains
# all the details of what went wrong, including a detailed error trace that
# can be printed out as an ascii tree. ({Parslet::Cause})
#
module Parslet
# Extends classes that include Parslet with the module
# {Parslet::ClassMethods}.
#
def self.included(base)
base.extend(ClassMethods)
end
# Raised when the parse failed to match. It contains the message that should
# be presented to the user. More details can be extracted from the
# exceptions #cause member: It contains an instance of {Parslet::Cause} that
# stores all the details of your failed parse in a tree structure.
#
# begin
# parslet.parse(str)
# rescue Parslet::ParseFailed => failure
# puts failure.cause.ascii_tree
# end
#
# Alternatively, you can just require 'parslet/convenience' and call the
# method #parse_with_debug instead of #parse. This method will never raise
# and print error trees to stdout.
#
# require 'parslet/convenience'
# parslet.parse_with_debug(str)
#
class ParseFailed < StandardError
def initialize(message, cause=nil)
super(message)
@cause = cause
end
# Why the parse failed.
#
# @return [Parslet::Cause]
attr_reader :cause
end
module ClassMethods
# Define an entity for the parser. This generates a method of the same
# name that can be used as part of other patterns. Those methods can be
# freely mixed in your parser class with real ruby methods.
#
# class MyParser
# include Parslet
#
# rule(:bar) { str('bar') }
# rule(:twobar) do
# bar >> bar
# end
#
# root :twobar
# end
#
def rule(name, &definition)
define_method(name) do
@rules ||= {} # <name, rule> memoization
return @rules[name] if @rules.has_key?(name)
# Capture the self of the parser class along with the definition.
definition_closure = proc {
self.instance_eval(&definition)
}
@rules[name] = Atoms::Entity.new(name, &definition_closure)
end
end
end
# Allows for delayed construction of #match. See also Parslet.match.
#
# @api private
class DelayedMatchConstructor
def [](str)
Atoms::Re.new("[" + str + "]")
end
end
# Returns an atom matching a character class. All regular expressions can be
# used, as long as they match only a single character at a time.
#
# match('[ab]') # will match either 'a' or 'b'
# match('[\n\s]') # will match newlines and spaces
#
# There is also another (convenience) form of this method:
#
# match['a-z'] # synonymous to match('[a-z]')
# match['\n'] # synonymous to match('[\n]')
#
# @overload match(str)
# @param str [String] character class to match (regexp syntax)
# @return [Parslet::Atoms::Re] a parslet atom
#
def match(str=nil)
return DelayedMatchConstructor.new unless str
return Atoms::Re.new(str)
end
module_function :match
# Returns an atom matching the +str+ given:
#
# str('class') # will match 'class'
#
# @param str [String] string to match verbatim
# @return [Parslet::Atoms::Str] a parslet atom
#
def str(str)
Atoms::Str.new(str)
end
module_function :str
# Returns an atom matching any character. It acts like the '.' (dot)
# character in regular expressions.
#
# any.parse('a') # => 'a'
#
# @return [Parslet::Atoms::Re] a parslet atom
#
def any
Atoms::Re.new('.')
end
module_function :any
# A special kind of atom that allows embedding whole treetop expressions
# into parslet construction.
#
# # the same as str('a') >> str('b').maybe
# exp(%Q("a" "b"?))
#
# @param str [String] a treetop expression
# @return [Parslet::Atoms::Base] the corresponding parslet parser
#
def exp(str)
Parslet::Expression.new(str).to_parslet
end
module_function :exp
# Returns a placeholder for a tree transformation that will only match a
# sequence of elements. The +symbol+ you specify will be the key for the
# matched sequence in the returned dictionary.
#
# # This would match a body element that contains several declarations.
# { :body => sequence(:declarations) }
#
# The above example would match <code>:body => ['a', 'b']</code>, but not
# <code>:body => 'a'</code>.
#
# see {Parslet::Transform}
#
def sequence(symbol)
Pattern::SequenceBind.new(symbol)
end
module_function :sequence
# Returns a placeholder for a tree transformation that will only match
# simple elements. This matches everything that <code>#sequence</code>
# doesn't match.
#
# # Matches a single header.
# { :header => simple(:header) }
#
# see {Parslet::Transform}
#
def simple(symbol)
Pattern::SimpleBind.new(symbol)
end
module_function :simple
# Returns a placeholder for tree transformation patterns that will match
# any kind of subtree.
#
# { :expression => subtree(:exp) }
#
def subtree(symbol)
Pattern::SubtreeBind.new(symbol)
end
module_function :subtree
autoload :Expression, 'parslet/expression'
end
require 'parslet/slice'
require 'parslet/cause'
require 'parslet/source'
require 'parslet/atoms'
require 'parslet/pattern'
require 'parslet/pattern/binding'
require 'parslet/transform'
require 'parslet/parser'
require 'parslet/error_reporter'