diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb index 845569b0bc..313e69ea47 100644 --- a/lib/rdoc/ruby_lex.rb +++ b/lib/rdoc/ruby_lex.rb @@ -857,7 +857,7 @@ def identify_gvar end IDENT_RE = if defined? Encoding then - /[\w\u0080-\uFFFF]/u + eval '/[\w\u{0080}-\u{FFFFF}]/u' # 1.8 can't parse \u{} else /[\w\x80-\xFF]/ end diff --git a/test/test_rdoc_ruby_lex.rb b/test/test_rdoc_ruby_lex.rb index 1dc11e95a3..dfa350e018 100644 --- a/test/test_rdoc_ruby_lex.rb +++ b/test/test_rdoc_ruby_lex.rb @@ -1,3 +1,5 @@ +# coding: UTF-8 + require 'rdoc/test_case' class TestRDocRubyLex < RDoc::TestCase @@ -133,6 +135,14 @@ def test_class_tokenize_heredoc_percent_N assert_equal expected, tokens end + def test_class_tokenize_identifier_high_unicode + tokens = RDoc::RubyLex.tokenize '𝖒', nil + + expected = @TK::TkIDENTIFIER.new(0, 1, 0, '𝖒') + + assert_equal expected, tokens.first + end + def test_class_tokenize_percent_1 tokens = RDoc::RubyLex.tokenize 'v%10==10', nil