Permalink
Browse files

- Added the support for UTF-8 characters. The patch was originally pr…

…oposed by hagabaka on github: http://github.com/hagabaka/treetop/commit/aff5a7e08200fbe7d1e047bb101256be377bab92

- All specs still pass.
- Merged with lazy terminal nodes by Clifford Heath
  • Loading branch information...
1 parent c3a825f commit 2c66175675e667cab9701c6803384f6f49a33ff1 Nicolas Fouché committed with cjheath Aug 17, 2009
@@ -4,9 +4,10 @@ class AnythingSymbol < AtomicExpression
def compile(address, builder, parent_expression = nil)
super
builder.if__ "index < input_length" do
- assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
+ builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
+ assign_result "instantiate_node(#{node_class_name},input, index...next_character)"
extend_result_with_inline_module
- builder << "@index += 1"
+ builder << "@index = next_character"
end
builder.else_ do
builder << 'terminal_parse_failure("any character")'
@@ -6,12 +6,13 @@ def compile(address, builder, parent_expression = nil)
builder.if__ "has_terminal?(#{grounded_regexp(text_value)}, true, index)" do
if address == 0 || decorated?
- assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
+ builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
+ assign_result "instantiate_node(#{node_class_name}, input, index...next_character)"
extend_result_with_inline_module
else
assign_lazily_instantiated_node
end
- builder << "@index += 1"
+ builder << "@index = next_character"
end
builder.else_ do
# "terminal_parse_failure(#{single_quote(characters)})"
@@ -88,7 +88,7 @@ def instantiate_node(node_type,*args)
def has_terminal?(terminal, regex, index)
if regex
- rx = @regexps[terminal] ||= Regexp.new(terminal)
+ rx = @regexps[terminal] ||= Regexp.new(terminal, nil, 'u')
input.index(rx, index) == index
else
input[index, terminal.size] == terminal
@@ -21,6 +21,13 @@ class Foo < Treetop::Runtime::SyntaxNode
it "fails to parse epsilon" do
parse('').should be_nil
end
+
+ describe "an anything symbol" do
+ testing_expression '.'
+ it "matches an UTF-8 character" do
+ parse("ø").should_not be_nil
+ end
+ end
end
module ModFoo
@@ -261,4 +261,10 @@ module ModFoo
end
end
+ describe "A character containing UTF-8 characters" do
+ testing_expression "[æøå]"
+ it "recognizes the UTF-8 characters" do
+ parse("ø").should_not be_nil
+ end
+ end
end

0 comments on commit 2c66175

Please sign in to comment.