diff --git a/lib/rouge/lexers/crystal.rb b/lib/rouge/lexers/crystal.rb index 603b4ecca2..6bd6a7c8bd 100644 --- a/lib/rouge/lexers/crystal.rb +++ b/lib/rouge/lexers/crystal.rb @@ -21,7 +21,7 @@ def self.detect?(text) rule %r( : # initial : @{0,2} # optional ivar, for :@foo and :@@foo - [a-z_]\w*[!?]? # the symbol + [\p{Ll}_]\p{Word}*[!?]? # the symbol )xi, Str::Symbol # special symbols @@ -35,7 +35,7 @@ def self.detect?(text) # %-sigiled strings # %(abc), %[abc], %, %.abc., %r.abc., etc delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' } - rule %r/%([rqswQWxiI])?([^\w\s}])/ do |m| + rule %r/%([rqswQWxiI])?([^\p{Word}\s}])/ do |m| open = Regexp.escape(m[2]) close = Regexp.escape(delimiter_map[m[2]] || m[2]) interp = /[rQWxI]/ === m[1] @@ -77,7 +77,7 @@ def self.detect?(text) state :strings do mixin :symbols - rule %r/\b[a-z_]\w*?[?!]?:\s+/, Str::Symbol, :expr_start + rule %r/\b[\p{Ll}_]\p{Word}*?[?!]?:\s+/, Str::Symbol, :expr_start rule %r/"/, Str::Double, :simple_string rule %r/(?_*\$?:"]), Name::Variable::Global rule %r/\$-[0adFiIlpvw]/, Name::Variable::Global rule %r/::/, Operator @@ -181,7 +181,7 @@ def self.detect?(text) rule %r( (module) (\s+) - ([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*) + ([\p{L}_][\p{L}0-9_]*(::[\p{L}_][\p{L}0-9_]*)*) )x do groups Keyword, Text, Name::Namespace end @@ -207,14 +207,14 @@ def self.detect?(text) # Otherwise, they will be parsed as :method_call rule %r/\.{2,3}/, Operator, :expr_start - rule %r/[A-Z][a-zA-Z0-9_]*/, Name::Constant, :method_call - rule %r/(\.|::)(\s*)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/ do + rule %r/[\p{Lu}][\p{L}0-9_]*/, Name::Constant, :method_call + rule %r/(\.|::)(\s*)([\p{Ll}_]\p{Word}*[!?]?|[*%&^`~+-\/\[<>=])/ do groups Punctuation, Text, Name::Function push :method_call end - rule %r/[a-zA-Z_]\w*[?!]/, Name, :expr_start - rule %r/[a-zA-Z_]\w*/, Name, :method_call + rule %r/[\p{L}_]\p{Word}*[?!]/, Name, :expr_start + rule %r/[\p{L}_]\p{Word}*/, Name, :method_call rule %r/\*\*|\/\/|>=|<=|<=>|<>?|=~|={3}|!~|&&?|\|\||\./, Operator, :expr_start rule %r/{%|%}/, Punctuation @@ -225,7 +225,7 @@ def self.detect?(text) end state :has_heredocs do - rule %r/(?>? | <=>? | >= | ===? ) @@ -311,7 +311,7 @@ def self.detect?(text) goto :expr_start end - rule %r/[A-Z_]\w*/, Name::Class, :pop! + rule %r/[\p{Lu}_]\p{Word}*/, Name::Class, :pop! rule(//) { pop! } end @@ -343,7 +343,7 @@ def self.detect?(text) state :string_intp do rule %r/[#][{]/, Str::Interpol, :in_interp - rule %r/#(@@?|\$)[a-z_]\w*/i, Str::Interpol + rule %r/#(@@?|\$)[\p{Ll}_]\p{Word}*/i, Str::Interpol end state :string_intp_escaped do @@ -399,7 +399,7 @@ def self.detect?(text) rule %r( [?](\\[MC]-)* # modifiers (\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S) - (?!\w) + (?!\p{Word}) )x, Str::Char, :pop! # special case for using a single space. Ruby demands that diff --git a/lib/rouge/lexers/ruby.rb b/lib/rouge/lexers/ruby.rb index fa68b28aed..2f6be1a5a5 100644 --- a/lib/rouge/lexers/ruby.rb +++ b/lib/rouge/lexers/ruby.rb @@ -24,7 +24,7 @@ def self.detect?(text) rule %r( : # initial : @{0,2} # optional ivar, for :@foo and :@@foo - [a-z_]\w*[!?]? # the symbol + [\p{Ll}_]\p{Word}*[!?]? # the symbol )xi, Str::Symbol # special symbols @@ -39,7 +39,7 @@ def self.detect?(text) # %-sigiled strings # %(abc), %[abc], %, %.abc., %r.abc., etc delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' } - rule %r/%([rqswQWxiI])?([^\w\s])/ do |m| + rule %r/%([rqswQWxiI])?([^\p{Word}\s])/ do |m| open = Regexp.escape(m[2]) close = Regexp.escape(delimiter_map[m[2]] || m[2]) interp = /[rQWxI]/ === m[1] || !m[1] @@ -83,7 +83,7 @@ def self.detect?(text) state :strings do mixin :symbols - rule %r/\b[a-z_]\w*?[?!]?:\s+/, Str::Symbol, :expr_start + rule %r/\b[\p{Ll}_]\p{Word}*?[?!]?:\s+/, Str::Symbol, :expr_start rule %r/'(\\\\|\\'|[^'])*'/, Str::Single rule %r/"/, Str::Double, :simple_string rule %r/(?_*\$?:"]), Name::Variable::Global rule %r/\$-[0adFiIlpvw]/, Name::Variable::Global rule %r/::/, Operator @@ -193,7 +193,7 @@ def self.detect?(text) rule %r( (module) (\s+) - ([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*) + ([\p{L}_][\p{L}0-9_]*(::[\p{L}_][\p{L}0-9_]*)*) )x do groups Keyword, Text, Name::Namespace end @@ -219,14 +219,14 @@ def self.detect?(text) # Otherwise, they will be parsed as :method_call rule %r/\.{2,3}/, Operator, :expr_start - rule %r/[A-Z][a-zA-Z0-9_]*/, Name::Constant, :method_call - rule %r/(\.|::)(\s*)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/ do + rule %r/[\p{Lu}][\p{L}0-9_]*/, Name::Constant, :method_call + rule %r/(\.|::)(\s*)([\p{Ll}_]\p{Word}*[!?]?|[*%&^`~+-\/\[<>=])/ do groups Punctuation, Text, Name::Function push :method_call end - rule %r/[a-zA-Z_]\w*[?!]/, Name, :expr_start - rule %r/[a-zA-Z_]\w*/, Name, :method_call + rule %r/[\p{L}_]\p{Word}*[?!]/, Name, :expr_start + rule %r/[\p{L}_]\p{Word}*/, Name, :method_call rule %r/\*\*|<>?|>=|<=|<=>|=~|={3}|!~|&&?|\|\||\./, Operator, :expr_start rule %r/[-+\/*%=<>&!^|~]=?/, Operator, :expr_start @@ -236,7 +236,7 @@ def self.detect?(text) end state :has_heredocs do - rule %r/(?>? | <=>? | >= | ===? ) @@ -310,7 +310,7 @@ def self.detect?(text) state :classname do rule %r/\s+/, Text - rule %r/\w+(::\w+)+/, Name::Class + rule %r/\p{Word}+(::\p{Word}+)+/, Name::Class rule %r/\(/ do token Punctuation @@ -324,7 +324,7 @@ def self.detect?(text) goto :expr_start end - rule %r/[A-Z_]\w*/, Name::Class, :pop! + rule %r/[\p{Lu}_]\p{Word}*/, Name::Class, :pop! rule(//) { pop! } end @@ -364,7 +364,7 @@ def self.detect?(text) state :string_intp do rule %r/[#][{]/, Str::Interpol, :in_interp - rule %r/#(@@?|\$)[a-z_]\w*/i, Str::Interpol + rule %r/#(@@?|\$)[\p{Ll}_]\p{Word}*/i, Str::Interpol end state :string_intp_escaped do @@ -419,7 +419,7 @@ def self.detect?(text) rule %r( [?](\\[MC]-)* # modifiers (\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S) - (?!\w) + (?!\p{Word}) )x, Str::Char, :pop! # special case for using a single space. Ruby demands that