Permalink
Browse files

Implements simple captures

Captures are pieces of input that are matched against and can then be
reused in matching other pieces of the input. Think heredocs.
  • Loading branch information...
1 parent 159edfd commit c80816baeaa3753baa609bc3361ad5a16cedfb44 @kschiess committed Dec 27, 2012
View
41 example/capture.rb
@@ -0,0 +1,41 @@
+
+# This example demonstrates how pieces of input can be captured and matched
+# against later on. Without this, you cannot match here-documents and other
+# self-dependent grammars.
+
+$:.unshift File.dirname(__FILE__) + "/../lib"
+require 'parslet'
+require 'parslet/convenience'
+require 'pp'
+
+
+
+class CapturingParser < Parslet::Parser
+ root :document
+
+ rule(:document) { scope { doc_start >> text >> doc_end } }
+ rule(:doc_start) { str('<') >> marker >> newline }
+ rule(:text) { (document.as(:doc) | text_line.as(:line)).repeat(1) }
+ rule(:text_line) { captured_marker.absent? >> any >>
+ (newline.absent? >> any).repeat >> newline }
+ rule(:doc_end) { captured_marker }
+
+ rule(:marker) { match['A-Z'].repeat(1).capture(:marker) }
+ rule(:newline) { match["\n"] }
+ rule(:captured_marker) {
+ dynamic { |source, context|
+ str(context.captures[:marker])
+ }
+ }
+end
+
+parser = CapturingParser.new
+pp parser.parse_with_debug %Q(<CAPTURE
+Text1
+<FOOBAR
+Text3
+Text4
+FOOBAR
+Text2
+CAPTURE)
+
View
3 lib/parslet.rb
@@ -235,4 +235,5 @@ def subtree(symbol)
require 'parslet/pattern/binding'
require 'parslet/transform'
require 'parslet/parser'
-require 'parslet/error_reporter'
+require 'parslet/error_reporter'
+require 'parslet/scope'
View
3 lib/parslet/atoms.rb
@@ -27,5 +27,8 @@ module Precedence
require 'parslet/atoms/re'
require 'parslet/atoms/str'
require 'parslet/atoms/entity'
+ require 'parslet/atoms/capture'
+ require 'parslet/atoms/dynamic'
+ require 'parslet/atoms/scope'
end
View
7 lib/parslet/atoms/base.rb
@@ -118,6 +118,13 @@ def try(source, context, consume_all)
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
end
+ # Returns true if this atom can be cached in the packrat cache. Most parslet
+ # atoms are cached, so this always returns true, unless overridden.
+ #
+ def cached?
+ true
+ end
+
# Debug printing - in Treetop syntax.
#
def self.precedence(prec)
View
34 lib/parslet/atoms/capture.rb
@@ -0,0 +1,34 @@
+
+# Stores the result of matching an atom against input in the #captures in
+# parse context. Doing so will allow you to pull parts of the ongoing parse
+# out later and use them to match other pieces of input.
+#
+# Example:
+# # After this, context.captures[:an_a] returns 'a'
+# str('a').capture(:an_a)
+#
+# # Capture and use of the capture: (matches either 'aa' or 'bb')
+# match['ab'].capture(:first) >>
+# dynamic { |src, ctx| str(ctx.captures[:first]) }
+#
+class Parslet::Atoms::Capture < Parslet::Atoms::Base
+ attr_reader :parslet, :name
+
+ def initialize(parslet, name)
+ super()
+
+ @parslet, @name = parslet, name
+ end
+
+ def apply(source, context, consume_all)
+ success, value = result = parslet.apply(source, context, consume_all)
+
+ if success
+ context.captures[name.to_sym] =
+ flatten(value)
+ end
+
+ return result
+ end
+end
+
View
23 lib/parslet/atoms/context.rb
@@ -12,6 +12,7 @@ class Context
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
@cache = Hash.new { |h, k| h[k] = {} }
@reporter = reporter
+ @captures = Parslet::Scope.new
end
# Caches a parse answer for obj at source.pos. Applying the same parslet
@@ -29,7 +30,10 @@ def try_with_cache(obj, source, consume_all)
unless entry = lookup(obj, beg)
result = obj.try(source, self, consume_all)
- set obj, beg, [result, source.pos-beg]
+ if obj.cached?
+ set obj, beg, [result, source.pos-beg]
+ end
+
return result
end
@@ -59,6 +63,23 @@ def err(*args)
return [false, nil]
end
+ # Returns the current captures made on the input (see
+ # Parslet::Atoms::Base#capture). Use as follows:
+ #
+ # context.captures[:foobar] # => returns capture :foobar
+ #
+ attr_reader :captures
+
+ # Starts a new scope. Use the #scope method of Parslet::Atoms::DSL
+ # to call this.
+ #
+ def scope
+ captures.push
+ yield
+ ensure
+ captures.pop
+ end
+
private
def lookup(obj, pos)
@cache[pos][obj]
View
40 lib/parslet/atoms/dsl.rb
@@ -95,4 +95,44 @@ def present?
def as(name)
Parslet::Atoms::Named.new(self, name)
end
+
+ # Captures a part of the input and stores it under the name given. This
+ # is very useful to create self-referential parses. A capture stores
+ # the result of its parse (may be complex) on a successful parse action.
+ #
+ # Example:
+ # str('a').capture(:b) # will store captures[:b] == 'a'
+ #
+ def capture(name)
+ Parslet::Atoms::Capture.new(self, name)
+ end
+
+ # Introduces a new capture scope. This means that all old captures stay
+ # accessible, but new values stored will only be available during the block
+ # given and the old values will be restored after the block.
+ #
+ # Example:
+ # # :a will be available until the end of the block. Afterwards,
+ # # :a from the outer scope will be available again, if such a thing
+ # # exists.
+ # scope { str('a').capture(:a) }
+ #
+ def scope(&block)
+ Parslet::Atoms::Scope.new(block)
+ end
+
+ # Designates a piece of the parser as being dynamic. Dynamic parsers can
+ # either return a parser at runtime, which will be applied on the input, or
+ # return a result from a parse.
+ #
+ # Dynamic parse pieces are never cached and can introduce performance
+ # abnormalitites - use sparingly where other constructs fail.
+ #
+ # Example:
+ # # Parses either 'a' or 'b', depending on the weather
+ # dynamic { rand() < 0.5 ? str('a') : str('b') }
+ #
+ def dynamic(&block)
+ Parslet::Atoms::Dynamic.new(block)
+ end
end
View
34 lib/parslet/atoms/dynamic.rb
@@ -0,0 +1,34 @@
+# Evaluates a block at parse time. The result from the block can be either
+# a parser or a result from calling a parser. In the first case, the parser
+# will then be applied to the input, creating the result.
+#
+# Dynamic parses are never cached.
+#
+# Example:
+# dynamic { rand < 0.5 ? str('a') : str('b') }
+#
+class Parslet::Atoms::Dynamic < Parslet::Atoms::Base
+ attr_reader :block
+
+ def initialize(block)
+ @block = block
+ end
+
+ def cached?
+ false
+ end
+
+ def try(source, context, consume_all)
+ result = block.call(source, context)
+
+ # Result is either a parslet atom, in which case we apply it to the input,
+ # or it is a result from a parslet atom, in which case we return it
+ # directly.
+ if result.respond_to?(:apply)
+ return result.apply(source, context, consume_all)
+ else
+ return result
+ end
+ end
+end
+
View
22 lib/parslet/atoms/scope.rb
@@ -0,0 +1,22 @@
+# Starts a new scope in the parsing process. Please also see the #captures
+# method.
+#
+class Parslet::Atoms::Scope < Parslet::Atoms::Base
+ attr_reader :block
+ def initialize(block)
+ super()
+
+ @block = block
+ end
+
+ def cached?
+ false
+ end
+
+ def apply(source, context, consume_all)
+ context.scope do
+ parslet = block.call
+ return parslet.apply(source, context, consume_all)
+ end
+ end
+end
View
35 lib/parslet/scope.rb
@@ -0,0 +1,35 @@
+class Parslet::Scope
+ class Binding
+ attr_reader :parent
+
+ def initialize(parent=nil)
+ @parent = parent
+ @hash = Hash.new
+ end
+
+ def [](k)
+ @hash.fetch(k)
+ end
+ def []=(k,v)
+ @hash.store(k,v)
+ end
+ end
+
+ def [](k)
+ @current[k]
+ end
+ def []=(k,v)
+ @current[k] = v
+ end
+
+ def initialize
+ @current = Binding.new
+ end
+
+ def push
+ @current = Binding.new(@current)
+ end
+ def pop
+ @current = @current.parent
+ end
+end

0 comments on commit c80816b

Please sign in to comment.