Permalink
Browse files

Added the ButPredicate to be used with the tilde.

  • Loading branch information...
1 parent 2c52b6b commit 0d0f2eb40b68d79749b53371b2873e89748e9996 @mjackson committed Sep 19, 2010
Showing with 96 additions and 16 deletions.
  1. +11 −4 README
  2. +48 −10 lib/citrus.rb
  3. +10 −2 lib/citrus/file.rb
  4. +27 −0 test/but_predicate_test.rb
View
15 README
@@ -149,6 +149,12 @@ neither case is any input consumed.
!'a' 'b' # match a "b" that is not preceded by an "a"
!'a' . # match any character except for "a"
+A special form of lookahead is also supported which will match any character
+that does not match a given expression.
+
+ ~'a' # match all characters until an "a"
+ ~/xyz/ # match all characters until /xyz/ matches
+
== Sequences
Sequences of expressions may be separated by a space to indicate that the rules
@@ -183,11 +189,11 @@ immediately preceding any expression.
== Precedence
-The following table contains a list of all operators and their precedence. A
-higher level of precedence indicates tighter binding.
+The following table contains a list of all Citrus operators and their
+precedence. A higher precedence indicates tighter binding.
-Operator | Name | Level of Precedence
---------------------------------------------------------------
+Operator | Name | Precedence
+-----------------------------------------------------
'' | String (single quoted) | 6
"" | String (double quoted) | 6
[] | Character class | 6
@@ -199,6 +205,7 @@ Operator | Name | Level of Precedence
? | Repetition (zero or one) | 5
& | And predicate | 4
! | Not predicate | 4
+~ | But predicate | 4
: | Label | 4
<> | Extension (module name) | 3
{} | Extension (literal) | 3
View
@@ -4,20 +4,20 @@
#
# http://mjijackson.com/citrus
module Citrus
- VERSION = [1, 7, 0]
-
- Infinity = 1.0 / 0
-
- # A pattern to match any character, including \n.
- ANY = /./m
-
autoload :File, 'citrus/file'
+ VERSION = [1, 7, 0]
+
# Returns the current version of Citrus as a string.
def self.version
VERSION.join('.')
end
+ # A pattern to match any character, including \n.
+ ANY = /./m
+
+ Infinity = 1.0 / 0
+
F = ::File
# Loads the grammar from the given +file+ into the global scope using #eval.
@@ -199,6 +199,12 @@ def notp(rule, &block)
ext(NotPredicate.new(rule), block)
end
+ # Creates a new ButPredicate using the given +rule+. A block may be provided
+ # to specify semantic behavior (via #ext).
+ def butp(rule, &block)
+ ext(ButPredicate.new(rule), block)
+ end
+
# Creates a new Label using the given +rule+ and +label+. A block may be
# provided to specify semantic behavior (via #ext).
def label(rule, label, &block)
@@ -568,7 +574,7 @@ def initialize(rule='')
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
# no match can be made.
def match(input, offset=0)
- create_match(rule.dup, offset) if rule == input[offset, rule.length]
+ create_match(rule.dup, offset) if input[offset, rule.length] == rule
end
end
@@ -626,7 +632,7 @@ module Predicate
include Nonterminal
def initialize(rule='')
- super([ rule ])
+ super([rule])
end
# Returns the Rule object this rule uses to match.
@@ -677,6 +683,38 @@ def to_s
end
end
+ # A ButPredicate is a Predicate that consumes all characters until its rule
+ # matches. It must match at least one character in order to succeed. The
+ # Citrus notation is any expression preceded by a tilde, e.g.:
+ #
+ # ~expr
+ #
+ class ButPredicate
+ include Predicate
+
+ ANY_RULE = Rule.new(ANY)
+
+ # Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
+ # no match can be made.
+ def match(input, offset=0)
+ matches = []
+ os = offset
+ while input.match(rule, os).nil?
+ m = input.match(ANY_RULE, os)
+ break unless m
+ matches << m
+ os += m.length
+ end
+ # Create a single match from the aggregate text value of all submatches.
+ create_match(matches.map {|m| m.text }.join, offset) if matches.any?
+ end
+
+ # Returns the Citrus notation of this rule as a string.
+ def to_s
+ '~' + rule.embed
+ end
+ end
+
# A Label is a Predicate that applies a new name to any matches made by its
# rule. The Citrus notation is any sequence of word characters (i.e.
# <tt>[a-zA-Z0-9_]</tt>) followed by a colon, followed by any other
@@ -704,7 +742,7 @@ def label_name=(label_name)
# no match can be made. When a Label makes a match, it re-names the match to
# the value of its label.
def match(input, offset=0)
- m = rule.match(input, offset)
+ m = input.match(rule, offset)
extend_match(m, label_name) if m
end
View
@@ -210,7 +210,7 @@ def value
rule :anything_symbol do
all('.', :space) {
def value
- ANY # Match newlines
+ ANY
end
}
end
@@ -224,7 +224,7 @@ def value
end
rule :predicate do
- any(:and, :not, :label)
+ any(:and, :not, :but, :label)
end
rule :and do
@@ -243,6 +243,14 @@ def wrap(rule)
}
end
+ rule :but do
+ all('~', :space) {
+ def wrap(rule)
+ ButPredicate.new(rule)
+ end
+ }
+ end
+
rule :label do
all(/[a-zA-Z0-9_]+/, :space, ':', :space) {
def wrap(rule)
View
@@ -0,0 +1,27 @@
+require File.expand_path('../helper', __FILE__)
+
+class ButPredicateTest < Test::Unit::TestCase
+
+ def test_terminal?
+ rule = ButPredicate.new
+ assert_equal(false, rule.terminal?)
+ end
+
+ def test_match
+ rule = ButPredicate.new('a')
+
+ match = rule.match(input('b'))
+ assert(match)
+ assert_equal('b', match.text)
+ assert_equal(1, match.length)
+
+ match = rule.match(input('a'))
+ assert_equal(nil, match)
+ end
+
+ def test_to_s
+ rule = ButPredicate.new('a')
+ assert_equal('~"a"', rule.to_s)
+ end
+
+end

0 comments on commit 0d0f2eb

Please sign in to comment.