Skip to content
This repository
Browse code

+ Rough postfix handling implementation

Relates to issue #68 where jmettraux proposes a new way of handling
input that is not consumed because the parser only matches a prefix.
This commit explores a different strategy where extra input after a
successful parse is treated as parse error in the parse branch it
happens. This generates nice error messages so far.

The implementation is very rough and will probably become neater as time
goes on - but the idea seems to be valid.
  • Loading branch information...
commit 4037846ae2fa6d26f6714760354f361a89468f5b 1 parent 8a9b522
Kaspar Schiess authored September 16, 2012
4  example/ignore.rb
@@ -10,8 +10,8 @@ def initialize(parslet)
10 10
   def to_s_inner(prec)
11 11
     @parslet.to_s(prec)
12 12
   end
13  
-  def try(source, context)
14  
-    success, value = result = @parslet.try(source, context)
  13
+  def try(source, context, postfix)
  14
+    success, value = result = @parslet.try(source, context, postfix)
15 15
     
16 16
     return succ(nil) if success
17 17
     return result
4  lib/parslet/atoms/alternative.rb
@@ -30,9 +30,9 @@ def |(parslet)
30 30
     self.class.new(*@alternatives + [parslet])
31 31
   end
32 32
   
33  
-  def try(source, context)
  33
+  def try(source, context, postfix)
34 34
     errors = alternatives.map { |a|
35  
-      success, value = result = a.apply(source, context)
  35
+      success, value = result = a.apply(source, context, postfix)
36 36
       return result if success
37 37
       
38 38
       # Aggregate all errors
56  lib/parslet/atoms/base.rb
@@ -27,7 +27,7 @@ def parse(io, options={})
27 27
 
28 28
     # Try to cheat. Assuming that we'll be able to parse the input, don't 
29 29
     # run error reporting code. 
30  
-    success, value = setup_and_apply(source, nil)
  30
+    success, value = setup_and_apply(source, nil, !options[:prefix])
31 31
     
32 32
     # If we didn't succeed the parse, raise an exception for the user. 
33 33
     # Stack trace will be off, but the error tree should explain the reason
@@ -36,7 +36,8 @@ def parse(io, options={})
36 36
       # Cheating has not paid off. Now pay the cost: Rerun the parse,
37 37
       # gathering error information in the process.
38 38
       reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
39  
-      success, value = setup_and_apply(source, reporter)
  39
+      source.pos = 0
  40
+      success, value = setup_and_apply(source, reporter, !options[:prefix])
40 41
       
41 42
       fail "Assertion failed: success was true when parsing with reporter" \
42 43
         if success
@@ -48,15 +49,12 @@ def parse(io, options={})
48 49
     end
49 50
     
50 51
     # assert: success is true
51  
-    
52  
-    # If we haven't consumed the input, then the pattern doesn't match. Try
53  
-    # to provide a good error message
  52
+
  53
+    # Extra input is now handled inline with the rest of the parsing. If 
  54
+    # really we have success == true, prefix: false and still some input 
  55
+    # is left dangling, that is a BUG.
54 56
     if !options[:prefix] && source.chars_left > 0
55  
-      old_pos = source.pos
56  
-      Parslet::Cause.format(
57  
-        source, old_pos, 
58  
-        "Don't know what to do with #{source.consume(10).to_s.inspect}").
59  
-        raise(Parslet::UnconsumedInput)
  57
+      fail "BUG: New error strategy should not reach this point."
60 58
     end
61 59
     
62 60
     return flatten(value)
@@ -67,21 +65,43 @@ def parse(io, options={})
67 65
   #
68 66
   # @return [<Boolean, Object>] Result of the parse. If the first member is 
69 67
   #   true, the parse has succeeded. 
70  
-  def setup_and_apply(source, error_reporter)
  68
+  def setup_and_apply(source, error_reporter, demand_postfix)
71 69
     context = Parslet::Atoms::Context.new(error_reporter)
72  
-    apply(source, context)
  70
+    apply(source, context, demand_postfix)
73 71
   end
74 72
 
75  
-  #---
76 73
   # Calls the #try method of this parslet. Success consumes input, error will 
77 74
   # rewind the input. 
78  
-  #+++
79  
-  def apply(source, context)
  75
+  #
  76
+  # @param source [Parslet::Source] source to read input from
  77
+  # @param context [Parslet::Atoms::Context] context to use for the parsing
  78
+  # @param postfix [Boolean] true if this atom is in postfix position 
  79
+  #   for the current parse. 
  80
+  def apply(source, context, postfix=false)
80 81
     old_pos = source.pos
81 82
     
82  
-    success, value = result = context.try_with_cache(self, source)
  83
+    success, value = result = context.try_with_cache(self, source, postfix)
83 84
 
84  
-    return result if success
  85
+    if success
  86
+      # If a postfix parse was made and doesn't result in the consumption of 
  87
+      # all the input, that is considered an error. 
  88
+      # old_pos = source.pos
  89
+      # Parslet::Cause.format(
  90
+      #   source, old_pos, 
  91
+      #   "Don't know what to do with #{source.consume(10).to_s.inspect}").
  92
+      #   raise(Parslet::UnconsumedInput)
  93
+      
  94
+      offending_pos   = source.pos
  95
+      offending_input = source.consume(10)
  96
+      source.pos = offending_pos
  97
+      return context.err(
  98
+        self, 
  99
+        source, 
  100
+        "Don't know what to do with #{offending_input.to_s.inspect}"
  101
+      ) if postfix && source.chars_left>0
  102
+      
  103
+      return result
  104
+    end
85 105
     
86 106
     # We only reach this point if the parse has failed. Rewind the input.
87 107
     source.pos = old_pos
@@ -91,7 +111,7 @@ def apply(source, context)
91 111
   # Override this in your Atoms::Base subclasses to implement parsing
92 112
   # behaviour. 
93 113
   #
94  
-  def try(source, context)
  114
+  def try(source, context, postfix)
95 115
     raise NotImplementedError, \
96 116
       "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
97 117
   end
4  lib/parslet/atoms/context.rb
@@ -22,12 +22,12 @@ def initialize(reporter=Parslet::ErrorReporter::Tree.new)
22 22
     # were consumed by a successful parse. Imitation of such a parse must 
23 23
     # advance the input pos by the same amount of bytes.
24 24
     #
25  
-    def try_with_cache(obj, source)
  25
+    def try_with_cache(obj, source, postfix)
26 26
       beg = source.pos
27 27
         
28 28
       # Not in cache yet? Return early.
29 29
       unless entry = lookup(obj, beg)
30  
-        result = obj.try(source, self)
  30
+        result = obj.try(source, self, postfix)
31 31
     
32 32
         set obj, beg, [result, source.pos-beg]
33 33
         return result
4  lib/parslet/atoms/entity.rb
@@ -17,8 +17,8 @@ def initialize(name, &block)
17 17
     @block = block
18 18
   end
19 19
 
20  
-  def try(source, context)
21  
-    parslet.apply(source, context)
  20
+  def try(source, context, postfix)
  21
+    parslet.apply(source, context, postfix)
22 22
   end
23 23
   
24 24
   def parslet
4  lib/parslet/atoms/lookahead.rb
@@ -21,10 +21,10 @@ def initialize(bound_parslet, positive=true)
21 21
     }
22 22
   end
23 23
   
24  
-  def try(source, context)
  24
+  def try(source, context, postfix)
25 25
     pos = source.pos
26 26
 
27  
-    success, value = bound_parslet.apply(source, context)
  27
+    success, value = bound_parslet.apply(source, context, postfix)
28 28
     
29 29
     if positive
30 30
       return succ(nil) if success
4  lib/parslet/atoms/named.rb
@@ -13,8 +13,8 @@ def initialize(parslet, name)
13 13
     @parslet, @name = parslet, name
14 14
   end
15 15
   
16  
-  def apply(source, context)
17  
-    success, value = result = parslet.apply(source, context)
  16
+  def apply(source, context, postfix)
  17
+    success, value = result = parslet.apply(source, context, postfix)
18 18
 
19 19
     return result unless success
20 20
     succ(
2  lib/parslet/atoms/re.rb
@@ -20,7 +20,7 @@ def initialize(match)
20 20
     }
21 21
   end
22 22
 
23  
-  def try(source, context)
  23
+  def try(source, context, postfix)
24 24
     return succ(source.consume(1)) if source.matches?(re)
25 25
     
26 26
     # No string could be read
16  lib/parslet/atoms/repetition.rb
@@ -15,18 +15,19 @@ def initialize(parslet, min, max, tag=:repetition)
15 15
     @min, @max = min, max
16 16
     @tag = tag
17 17
     @error_msgs = {
18  
-      :minrep  => "Expected at least #{min} of #{parslet.inspect}"
  18
+      :minrep  => "Expected at least #{min} of #{parslet.inspect}", 
  19
+      :unconsumed => "Extra input after last repetition"
19 20
     }
20 21
   end
21 22
   
22  
-  def try(source, context)
  23
+  def try(source, context, postfix)
23 24
     occ = 0
24 25
     accum = [@tag]   # initialize the result array with the tag (for flattening)
25 26
     start_pos = source.pos
26 27
     
27 28
     break_on = nil
28 29
     loop do
29  
-      success, value = parslet.apply(source, context)
  30
+      success, value = parslet.apply(source, context, false)
30 31
 
31 32
       break_on = value
32 33
       break unless success
@@ -49,6 +50,15 @@ def try(source, context)
49 50
       start_pos, 
50 51
       [break_on]) if occ < min
51 52
       
  53
+    # Postfix is true, that means that we're inside the part of the parser that
  54
+    # should consume the input completely. Repetition failing here means
  55
+    # probably that we didn't. 
  56
+    return context.err(
  57
+      self, 
  58
+      source, 
  59
+      @error_msgs[:unconsumed], 
  60
+      [break_on]) if postfix && source.chars_left>0
  61
+      
52 62
     return succ(accum)
53 63
   end
54 64
   
7  lib/parslet/atoms/sequence.rb
@@ -19,9 +19,10 @@ def >>(parslet)
19 19
     self.class.new(* @parslets+[parslet])
20 20
   end
21 21
   
22  
-  def try(source, context)
23  
-    succ([:sequence]+parslets.map { |p| 
24  
-      success, value = p.apply(source, context) 
  22
+  def try(source, context, postfix)
  23
+    succ([:sequence]+parslets.map.each_with_index { |p, idx| 
  24
+      child_postfix = postfix && (idx == parslets.size-1)
  25
+      success, value = p.apply(source, context, child_postfix) 
25 26
 
26 27
       unless success
27 28
         return context.err(self, source, @error_msgs[:failed], [value]) 
7  lib/parslet/atoms/str.rb
@@ -17,13 +17,14 @@ def initialize(str)
17 17
     }
18 18
   end
19 19
   
20  
-  def try(source, context)
  20
+  def try(source, context, postfix)
21 21
     return succ(source.consume(@len)) if source.matches?(str)
22 22
     
23  
-    # Failures: 
  23
+    # Input ending early:
24 24
     return context.err(self, source, @error_msgs[:premature]) \
25 25
       if source.chars_left<@len
26  
-      
  26
+    
  27
+    # Expected something, but got something else instead:  
27 28
     error_pos = source.pos  
28 29
     return context.err_at(
29 30
       self, source, 
4  lib/parslet/parser.rb
@@ -57,8 +57,8 @@ def root(name)
57 57
     end
58 58
   end
59 59
   
60  
-  def try(source, context)
61  
-    root.try(source, context)
  60
+  def try(source, context, postfix)
  61
+    root.try(source, context, postfix)
62 62
   end
63 63
   
64 64
   def to_s_inner(prec)
24  spec/acceptance/regression_spec.rb
@@ -130,8 +130,7 @@ def remove_indent(s)
130 130
     
131 131
     it "should count lines correctly" do
132 132
       cause = catch_failed_parse {
133  
-        subject.parse('
134  
-          a 
  133
+        subject.parse('a
135 134
           a a a 
136 135
           aaa // ff
137 136
           /* 
@@ -142,7 +141,13 @@ def remove_indent(s)
142 141
       }
143 142
 
144 143
       remove_indent(cause.ascii_tree).should == remove_indent(%q(
145  
-        Don't know what to do with "b\n        " at line 8 char 11.).strip)
  144
+      Expected one of [(LINE EOL){1, }, LINE] at line 1 char 2.
  145
+      |- Extra input after last repetition at line 7 char 11.
  146
+      |  `- Failed to match sequence (LINE EOL) at line 7 char 11.
  147
+      |     `- Failed to match sequence (SPACE? [\n\r]{1, } SPACE?) at line 7 char 11.
  148
+      |        `- Expected at least 1 of [\n\r] at line 7 char 11.
  149
+      |           `- Failed to match [\n\r] at line 7 char 11.
  150
+      `- Don't know what to do with "\n         " at line 1 char 2.).strip)
146 151
     end 
147 152
   end
148 153
 
@@ -203,10 +208,17 @@ class TwoCharLanguage < Parslet::Parser
203 208
     rule(:twochar) { any >> str('2') }
204 209
   end
205 210
   describe TwoCharLanguage do
  211
+    def di(s)
  212
+      s.strip.to_s.lines.map { |l| l.chomp.strip }.join("\n")
  213
+    end
  214
+
206 215
     it "should raise UnconsumedInput" do
207  
-      expect {
208  
-        subject.parse('123')
209  
-      }.to raise_error(Parslet::UnconsumedInput)
  216
+      error = catch_failed_parse {
  217
+        subject.parse('123') }
  218
+      di(error.ascii_tree).should == di(%q(
  219
+        Failed to match sequence (. '2') at line 1 char 3.
  220
+        `- Don't know what to do with "3" at line 1 char 3.
  221
+      ))
210 222
     end 
211 223
   end
212 224
 end
21  spec/acceptance/unconsumed_input_spec.rb
... ...
@@ -0,0 +1,21 @@
  1
+require 'spec_helper'
  2
+
  3
+describe "Unconsumed input:" do
  4
+  class RepeatingBlockParser < Parslet::Parser
  5
+    root :expressions
  6
+    rule(:expressions) { expression.repeat }
  7
+    rule(:expression) { str('(') >> aab >> str(')') }
  8
+    rule(:aab) { str('a').repeat(1) >> str('b') }
  9
+  end
  10
+  describe RepeatingBlockParser do
  11
+    let(:parser) { described_class.new }
  12
+    it "throws annotated error" do
  13
+      error = catch_failed_parse { parser.parse('(aaac)') }
  14
+    end
  15
+    it "doesn't error out if prefix is true" do
  16
+      expect {
  17
+        parser.parse('(aaac)', prefix: true)
  18
+      }.not_to raise_error
  19
+    end
  20
+  end
  21
+end
6  spec/parslet/atom_results_spec.rb
@@ -9,8 +9,10 @@
9 9
       [str('foo').maybe >> str('bar'), "bar", "bar"],
10 10
       [str('bar') >> str('foo').maybe, "bar", 'bar'], 
11 11
       
12  
-      # These might be hard to understand; look at the result of str.maybe >> str
13  
-      # and str.maybe >> str first. 
  12
+      # These might be hard to understand; look at the result of 
  13
+      #   str.maybe >> str
  14
+      # and 
  15
+      #   str.maybe >> str first. 
14 16
       [(str('f').maybe >> str('b')).repeat, "bb", "bb"],
15 17
       [(str('b') >> str('f').maybe).repeat, "bb", 'bb'], 
16 18
       
11  spec/parslet/atoms/base_spec.rb
@@ -7,7 +7,7 @@
7 7
   describe "<- #try(io)" do
8 8
     it "should raise NotImplementedError" do
9 9
       lambda {
10  
-        parslet.try(flexmock(:io), context)
  10
+        parslet.try(flexmock(:io), context, false)
11 11
       }.should raise_error(NotImplementedError)
12 12
     end 
13 13
   end
@@ -82,11 +82,10 @@ def unnamed(obj)
82 82
     let(:parslet) { Parslet.str('foo') }
83 83
     
84 84
     it "should raise with a proper error message" do
85  
-      begin
86  
-        parslet.parse('foobar')
87  
-      rescue Parslet::ParseFailed => ex
88  
-        ex.message.should == "Don't know what to do with \"bar\" at line 1 char 4."
89  
-      end
  85
+      error = catch_failed_parse {
  86
+        parslet.parse('foobar') }
  87
+      
  88
+      error.to_s.should == "Don't know what to do with \"bar\" at line 1 char 4."
90 89
     end 
91 90
   end
92 91
   context "when only parsing string prefix" do
7  spec/parslet/atoms_spec.rb
@@ -263,12 +263,13 @@ def src(str); Parslet::Source.new str; end
263 263
         end
264 264
       end
265 265
 
266  
-      it "raises Parslet::UnconsumedInput" do
267  
-        exception.should be_kind_of(Parslet::UnconsumedInput)
  266
+      it "raises Parslet::ParseFailed" do
  267
+        # ParseFailed here, because the input doesn't match the parser grammar. 
  268
+        exception.should be_kind_of(Parslet::ParseFailed)
268 269
       end 
269 270
       it "has the correct error message" do
270 271
         exception.message.should == \
271  
-          "Don't know what to do with \".\" at line 1 char 2."
  272
+          "Extra input after last repetition at line 1 char 2."
272 273
       end 
273 274
     end
274 275
   end

0 notes on commit 4037846

Please sign in to comment.
Something went wrong with that request. Please try again.