Skip to content

Commit

Permalink
Improved support for multibyte characters
Browse files Browse the repository at this point in the history
  • Loading branch information
mjackson committed Jan 13, 2011
1 parent c5f67b6 commit 51cc887
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 17 deletions.
15 changes: 8 additions & 7 deletions lib/citrus.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# encoding: UTF-8

require 'strscan'
require 'pathname'
require 'citrus/version'
Expand Down Expand Up @@ -243,12 +245,11 @@ def exec(rule, events=[])
index = events.size

if apply_rule(rule, position, events).size > index
position += events[-1]
@max_offset = position if position > @max_offset
@max_offset = pos if pos > @max_offset
else
self.pos = position
end

self.pos = position

events
end

Expand Down Expand Up @@ -837,12 +838,12 @@ def initialize(regexp=/^/)

# Returns an array of events for this rule on the given +input+.
def exec(input, events=[])
length = input.scan_full(@regexp, false, false)
match = input.scan(@regexp)

if length
if match
events << self
events << CLOSE
events << length
events << match.length
end

events
Expand Down
4 changes: 3 additions & 1 deletion lib/citrus/file.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# encoding: UTF-8

require 'citrus'

module Citrus
Expand Down Expand Up @@ -227,7 +229,7 @@ def flags

rule :character_class do
all(/\[(?:\\?.)*?\]/, :space) {
Regexp.new(first.to_s, nil, 'n')
eval("/#{first.to_s}/")
}
end

Expand Down
2 changes: 1 addition & 1 deletion test/memoized_input_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_memoized?
def test_cache_hits1
input = MemoizedInput.new('a')
input.exec(LetterA.rule(:top))
assert_equal(3, input.cache_hits)
assert_equal(2, input.cache_hits)
end

def test_cache_hits2
Expand Down
45 changes: 39 additions & 6 deletions test/multibyte_test.rb
Original file line number Diff line number Diff line change
@@ -1,32 +1,65 @@
# encoding: UTF-8

require File.expand_path('../helper', __FILE__)

class MultibyteTest < Test::Unit::TestCase
grammar :Multibyte do
rule :string do
"\xFF"
"ä"
end

rule :regexp do
/\xFF/
/(ä)+/
end

rule :character_class do
/[\xFF]/
/[ä]+/
end
end

def test_multibyte_string
m = Multibyte.parse("\xFF", :root => :string)
m = Multibyte.parse("ä", :root => :string)
assert(m)
end

def test_multibyte_regexp
m = Multibyte.parse("\xFF", :root => :regexp)
m = Multibyte.parse("äää", :root => :regexp)
assert(m)
end

def test_multibyte_character_class
m = Multibyte.parse("\xFF", :root => :character_class)
m = Multibyte.parse("äää", :root => :character_class)
assert(m)
end

Citrus.eval(<<-CITRUS)
grammar Multibyte2
rule string
"ä"
end
rule regexp
/(ä)+/
end
rule character_class
[ä]+
end
end
CITRUS

def test_multibyte2_string
m = Multibyte2.parse("ä", :root => :string)
assert(m)
end

def test_multibyte2_regexp
m = Multibyte2.parse("äää", :root => :regexp)
assert(m)
end

def test_multibyte2_character_class
m = Multibyte2.parse("äää", :root => :character_class)
assert(m)
end
end
6 changes: 4 additions & 2 deletions test/parse_error_test.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# encoding: UTF-8

require File.expand_path('../helper', __FILE__)

class ParseErrorTest < Test::Unit::TestCase
Expand Down Expand Up @@ -32,10 +34,10 @@ def test_basic

def test_single_line
begin
Sentence.parse('Once upon 4 time.')
Sentence.parse('Once upon ä time.')
rescue ParseError => e
assert_equal(10, e.offset)
assert_equal('Once upon 4 time.', e.line)
assert_equal('Once upon ä time.', e.line)
assert_equal(1, e.line_number)
assert_equal(10, e.line_offset)
end
Expand Down

0 comments on commit 51cc887

Please sign in to comment.