Skip to content

Commit

Permalink
Improved support for multibyte characters
Browse files Browse the repository at this point in the history
  • Loading branch information
mjackson committed Jan 13, 2011
1 parent c5f67b6 commit 51cc887
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 17 deletions.
15 changes: 8 additions & 7 deletions lib/citrus.rb
@@ -1,3 +1,5 @@
# encoding: UTF-8

require 'strscan' require 'strscan'
require 'pathname' require 'pathname'
require 'citrus/version' require 'citrus/version'
Expand Down Expand Up @@ -243,12 +245,11 @@ def exec(rule, events=[])
index = events.size index = events.size


if apply_rule(rule, position, events).size > index if apply_rule(rule, position, events).size > index
position += events[-1] @max_offset = pos if pos > @max_offset
@max_offset = position if position > @max_offset else
self.pos = position
end end


self.pos = position

events events
end end


Expand Down Expand Up @@ -837,12 +838,12 @@ def initialize(regexp=/^/)


# Returns an array of events for this rule on the given +input+. # Returns an array of events for this rule on the given +input+.
def exec(input, events=[]) def exec(input, events=[])
length = input.scan_full(@regexp, false, false) match = input.scan(@regexp)


if length if match
events << self events << self
events << CLOSE events << CLOSE
events << length events << match.length
end end


events events
Expand Down
4 changes: 3 additions & 1 deletion lib/citrus/file.rb
@@ -1,3 +1,5 @@
# encoding: UTF-8

require 'citrus' require 'citrus'


module Citrus module Citrus
Expand Down Expand Up @@ -227,7 +229,7 @@ def flags


rule :character_class do rule :character_class do
all(/\[(?:\\?.)*?\]/, :space) { all(/\[(?:\\?.)*?\]/, :space) {
Regexp.new(first.to_s, nil, 'n') eval("/#{first.to_s}/")
} }
end end


Expand Down
2 changes: 1 addition & 1 deletion test/memoized_input_test.rb
Expand Up @@ -26,7 +26,7 @@ def test_memoized?
def test_cache_hits1 def test_cache_hits1
input = MemoizedInput.new('a') input = MemoizedInput.new('a')
input.exec(LetterA.rule(:top)) input.exec(LetterA.rule(:top))
assert_equal(3, input.cache_hits) assert_equal(2, input.cache_hits)
end end


def test_cache_hits2 def test_cache_hits2
Expand Down
45 changes: 39 additions & 6 deletions test/multibyte_test.rb
@@ -1,32 +1,65 @@
# encoding: UTF-8

require File.expand_path('../helper', __FILE__) require File.expand_path('../helper', __FILE__)


class MultibyteTest < Test::Unit::TestCase class MultibyteTest < Test::Unit::TestCase
grammar :Multibyte do grammar :Multibyte do
rule :string do rule :string do
"\xFF" "ä"
end end


rule :regexp do rule :regexp do
/\xFF/ /(ä)+/
end end


rule :character_class do rule :character_class do
/[\xFF]/ /[ä]+/
end end
end end


def test_multibyte_string def test_multibyte_string
m = Multibyte.parse("\xFF", :root => :string) m = Multibyte.parse("ä", :root => :string)
assert(m) assert(m)
end end


def test_multibyte_regexp def test_multibyte_regexp
m = Multibyte.parse("\xFF", :root => :regexp) m = Multibyte.parse("äää", :root => :regexp)
assert(m) assert(m)
end end


def test_multibyte_character_class def test_multibyte_character_class
m = Multibyte.parse("\xFF", :root => :character_class) m = Multibyte.parse("äää", :root => :character_class)
assert(m)
end

Citrus.eval(<<-CITRUS)
grammar Multibyte2
rule string
"ä"
end
rule regexp
/(ä)+/
end
rule character_class
[ä]+
end
end
CITRUS

def test_multibyte2_string
m = Multibyte2.parse("ä", :root => :string)
assert(m)
end

def test_multibyte2_regexp
m = Multibyte2.parse("äää", :root => :regexp)
assert(m)
end

def test_multibyte2_character_class
m = Multibyte2.parse("äää", :root => :character_class)
assert(m) assert(m)
end end
end end
6 changes: 4 additions & 2 deletions test/parse_error_test.rb
@@ -1,3 +1,5 @@
# encoding: UTF-8

require File.expand_path('../helper', __FILE__) require File.expand_path('../helper', __FILE__)


class ParseErrorTest < Test::Unit::TestCase class ParseErrorTest < Test::Unit::TestCase
Expand Down Expand Up @@ -32,10 +34,10 @@ def test_basic


def test_single_line def test_single_line
begin begin
Sentence.parse('Once upon 4 time.') Sentence.parse('Once upon ä time.')
rescue ParseError => e rescue ParseError => e
assert_equal(10, e.offset) assert_equal(10, e.offset)
assert_equal('Once upon 4 time.', e.line) assert_equal('Once upon ä time.', e.line)
assert_equal(1, e.line_number) assert_equal(1, e.line_number)
assert_equal(10, e.line_offset) assert_equal(10, e.line_offset)
end end
Expand Down

0 comments on commit 51cc887

Please sign in to comment.