Permalink
Browse files

Improved support for multibyte characters

  • Loading branch information...
1 parent c5f67b6 commit 51cc88767cadab40f06d7f3767ffb59bb9ba0557 @mjackson committed Jan 13, 2011
Showing with 55 additions and 17 deletions.
  1. +8 −7 lib/citrus.rb
  2. +3 −1 lib/citrus/file.rb
  3. +1 −1 test/memoized_input_test.rb
  4. +39 −6 test/multibyte_test.rb
  5. +4 −2 test/parse_error_test.rb
View
@@ -1,3 +1,5 @@
+# encoding: UTF-8
+
require 'strscan'
require 'pathname'
require 'citrus/version'
@@ -243,12 +245,11 @@ def exec(rule, events=[])
index = events.size
if apply_rule(rule, position, events).size > index
- position += events[-1]
- @max_offset = position if position > @max_offset
+ @max_offset = pos if pos > @max_offset
+ else
+ self.pos = position
end
- self.pos = position
-
events
end
@@ -837,12 +838,12 @@ def initialize(regexp=/^/)
# Returns an array of events for this rule on the given +input+.
def exec(input, events=[])
- length = input.scan_full(@regexp, false, false)
+ match = input.scan(@regexp)
- if length
+ if match
events << self
events << CLOSE
- events << length
+ events << match.length
end
events
View
@@ -1,3 +1,5 @@
+# encoding: UTF-8
+
require 'citrus'
module Citrus
@@ -227,7 +229,7 @@ def flags
rule :character_class do
all(/\[(?:\\?.)*?\]/, :space) {
- Regexp.new(first.to_s, nil, 'n')
+ eval("/#{first.to_s}/")
}
end
@@ -26,7 +26,7 @@ def test_memoized?
def test_cache_hits1
input = MemoizedInput.new('a')
input.exec(LetterA.rule(:top))
- assert_equal(3, input.cache_hits)
+ assert_equal(2, input.cache_hits)
end
def test_cache_hits2
View
@@ -1,32 +1,65 @@
+# encoding: UTF-8
+
require File.expand_path('../helper', __FILE__)
class MultibyteTest < Test::Unit::TestCase
grammar :Multibyte do
rule :string do
- "\xFF"
+ "ä"
end
rule :regexp do
- /\xFF/
+ /(ä)+/
end
rule :character_class do
- /[\xFF]/
+ /[ä]+/
end
end
def test_multibyte_string
- m = Multibyte.parse("\xFF", :root => :string)
+ m = Multibyte.parse("ä", :root => :string)
assert(m)
end
def test_multibyte_regexp
- m = Multibyte.parse("\xFF", :root => :regexp)
+ m = Multibyte.parse("äää", :root => :regexp)
assert(m)
end
def test_multibyte_character_class
- m = Multibyte.parse("\xFF", :root => :character_class)
+ m = Multibyte.parse("äää", :root => :character_class)
+ assert(m)
+ end
+
+ Citrus.eval(<<-CITRUS)
+ grammar Multibyte2
+ rule string
+ "ä"
+ end
+
+ rule regexp
+ /(ä)+/
+ end
+
+ rule character_class
+ [ä]+
+ end
+ end
+ CITRUS
+
+ def test_multibyte2_string
+ m = Multibyte2.parse("ä", :root => :string)
+ assert(m)
+ end
+
+ def test_multibyte2_regexp
+ m = Multibyte2.parse("äää", :root => :regexp)
+ assert(m)
+ end
+
+ def test_multibyte2_character_class
+ m = Multibyte2.parse("äää", :root => :character_class)
assert(m)
end
end
View
@@ -1,3 +1,5 @@
+# encoding: UTF-8
+
require File.expand_path('../helper', __FILE__)
class ParseErrorTest < Test::Unit::TestCase
@@ -32,10 +34,10 @@ def test_basic
def test_single_line
begin
- Sentence.parse('Once upon 4 time.')
+ Sentence.parse('Once upon ä time.')
rescue ParseError => e
assert_equal(10, e.offset)
- assert_equal('Once upon 4 time.', e.line)
+ assert_equal('Once upon ä time.', e.line)
assert_equal(1, e.line_number)
assert_equal(10, e.line_offset)
end

0 comments on commit 51cc887

Please sign in to comment.