Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

238 lines (222 sloc) 7.245 kB
# A simple parser generator library. Typical usage would look like this:
#
# require 'parslet'
#
# class MyParser < Parslet::Parser
# rule(:a) { str('a').repeat }
# root(:a)
# end
#
# pp MyParser.new.parse('aaaa') # => 'aaaa'@0
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
# # Don't know what to do with bbbb at line 1 char 1.
#
# The simple DSL allows you to define grammars in PEG-style. This kind of
# grammar construction does away with the ambiguities that usually comes with
# parsers; instead, it allows you to construct grammars that are easier to
# debug, since less magic is involved.
#
# Parslet is typically used in stages:
#
#
# * Parsing the input string; this yields an intermediary tree, see
# Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
# Parslet::ClassMethods#root.
# * Transformation of the tree into something useful to you, see
# Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
#
# The first stage is traditionally intermingled with the second stage; output
# from the second stage is usually called the 'Abstract Syntax Tree' or AST.
#
# The stages are completely decoupled; You can change your grammar around and
# use the second stage to isolate the rest of your code from the changes
# you've effected.
#
# == Further reading
#
# All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
# look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
# {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
# {Parslet::Atoms::Alternative}.
#
# == When things go wrong
#
# A parse that fails will raise {Parslet::ParseFailed}. This exception contains
# all the details of what went wrong, including a detailed error trace that
# can be printed out as an ascii tree. ({Parslet::Cause})
#
module Parslet
# Extends classes that include Parslet with the module
# {Parslet::ClassMethods}.
#
def self.included(base)
base.extend(ClassMethods)
end
# Raised when the parse failed to match. It contains the message that should
# be presented to the user. More details can be extracted from the
# exceptions #cause member: It contains an instance of {Parslet::Cause} that
# stores all the details of your failed parse in a tree structure.
#
# begin
# parslet.parse(str)
# rescue Parslet::ParseFailed => failure
# puts failure.cause.ascii_tree
# end
#
# Alternatively, you can just require 'parslet/convenience' and call the
# method #parse_with_debug instead of #parse. This method will never raise
# and print error trees to stdout.
#
# require 'parslet/convenience'
# parslet.parse_with_debug(str)
#
class ParseFailed < StandardError
def initialize(message, cause=nil)
super(message)
@cause = cause
end
# Why the parse failed.
#
# @return [Parslet::Cause]
attr_reader :cause
end
module ClassMethods
# Define an entity for the parser. This generates a method of the same
# name that can be used as part of other patterns. Those methods can be
# freely mixed in your parser class with real ruby methods.
#
# class MyParser
# include Parslet
#
# rule(:bar) { str('bar') }
# rule(:twobar) do
# bar >> bar
# end
#
# root :twobar
# end
#
def rule(name, &definition)
define_method(name) do
@rules ||= {} # <name, rule> memoization
return @rules[name] if @rules.has_key?(name)
# Capture the self of the parser class along with the definition.
definition_closure = proc {
self.instance_eval(&definition)
}
@rules[name] = Atoms::Entity.new(name, &definition_closure)
end
end
end
# Allows for delayed construction of #match. See also Parslet.match.
#
# @api private
class DelayedMatchConstructor
def [](str)
Atoms::Re.new("[" + str + "]")
end
end
# Returns an atom matching a character class. All regular expressions can be
# used, as long as they match only a single character at a time.
#
# match('[ab]') # will match either 'a' or 'b'
# match('[\n\s]') # will match newlines and spaces
#
# There is also another (convenience) form of this method:
#
# match['a-z'] # synonymous to match('[a-z]')
# match['\n'] # synonymous to match('[\n]')
#
# @overload match(str)
# @param str [String] character class to match (regexp syntax)
# @return [Parslet::Atoms::Re] a parslet atom
#
def match(str=nil)
return DelayedMatchConstructor.new unless str
return Atoms::Re.new(str)
end
module_function :match
# Returns an atom matching the +str+ given:
#
# str('class') # will match 'class'
#
# @param str [String] string to match verbatim
# @return [Parslet::Atoms::Str] a parslet atom
#
def str(str)
Atoms::Str.new(str)
end
module_function :str
# Returns an atom matching any character. It acts like the '.' (dot)
# character in regular expressions.
#
# any.parse('a') # => 'a'
#
# @return [Parslet::Atoms::Re] a parslet atom
#
def any
Atoms::Re.new('.')
end
module_function :any
# A special kind of atom that allows embedding whole treetop expressions
# into parslet construction.
#
# # the same as str('a') >> str('b').maybe
# exp(%Q("a" "b"?))
#
# @param str [String] a treetop expression
# @return [Parslet::Atoms::Base] the corresponding parslet parser
#
def exp(str)
Parslet::Expression.new(str).to_parslet
end
module_function :exp
# Returns a placeholder for a tree transformation that will only match a
# sequence of elements. The +symbol+ you specify will be the key for the
# matched sequence in the returned dictionary.
#
# # This would match a body element that contains several declarations.
# { :body => sequence(:declarations) }
#
# The above example would match <code>:body => ['a', 'b']</code>, but not
# <code>:body => 'a'</code>.
#
# see {Parslet::Transform}
#
def sequence(symbol)
Pattern::SequenceBind.new(symbol)
end
module_function :sequence
# Returns a placeholder for a tree transformation that will only match
# simple elements. This matches everything that <code>#sequence</code>
# doesn't match.
#
# # Matches a single header.
# { :header => simple(:header) }
#
# see {Parslet::Transform}
#
def simple(symbol)
Pattern::SimpleBind.new(symbol)
end
module_function :simple
# Returns a placeholder for tree transformation patterns that will match
# any kind of subtree.
#
# { :expression => subtree(:exp) }
#
def subtree(symbol)
Pattern::SubtreeBind.new(symbol)
end
module_function :subtree
autoload :Expression, 'parslet/expression'
end
require 'parslet/slice'
require 'parslet/cause'
require 'parslet/source'
require 'parslet/atoms'
require 'parslet/pattern'
require 'parslet/pattern/binding'
require 'parslet/transform'
require 'parslet/parser'
require 'parslet/error_reporter'
Jump to Line
Something went wrong with that request. Please try again.