Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix non-ascii characters in names in Crystal and Ruby #1894

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions lib/rouge/lexers/crystal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def self.detect?(text)
rule %r(
: # initial :
@{0,2} # optional ivar, for :@foo and :@@foo
[a-z_]\w*[!?]? # the symbol
[\p{Ll}_]\p{Word}*[!?]? # the symbol
)xi, Str::Symbol

# special symbols
Expand All @@ -35,7 +35,7 @@ def self.detect?(text)
# %-sigiled strings
# %(abc), %[abc], %<abc>, %.abc., %r.abc., etc
delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' }
rule %r/%([rqswQWxiI])?([^\w\s}])/ do |m|
rule %r/%([rqswQWxiI])?([^\p{Word}\s}])/ do |m|
open = Regexp.escape(m[2])
close = Regexp.escape(delimiter_map[m[2]] || m[2])
interp = /[rQWxI]/ === m[1]
Expand Down Expand Up @@ -77,7 +77,7 @@ def self.detect?(text)

state :strings do
mixin :symbols
rule %r/\b[a-z_]\w*?[?!]?:\s+/, Str::Symbol, :expr_start
rule %r/\b[\p{Ll}_]\p{Word}*?[?!]?:\s+/, Str::Symbol, :expr_start
rule %r/"/, Str::Double, :simple_string
rule %r/(?<!\.)`/, Str::Backtick, :simple_backtick
rule %r/(')(\\u[a-fA-F0-9]{4}|\\u\{[a-fA-F0-9]{1,6}\}|\\[abefnrtv])?(\\\\|\\'|[^'])*(')/ do
Expand Down Expand Up @@ -166,9 +166,9 @@ def self.detect?(text)
rule %r/@\[([^\]]+)\]/, Name::Decorator

# names
rule %r/@@[a-z_]\w*/i, Name::Variable::Class
rule %r/@[a-z_]\w*/i, Name::Variable::Instance
rule %r/\$\w+/, Name::Variable::Global
rule %r/@@[\p{Ll}_]\p{Word}*/i, Name::Variable::Class
rule %r/@[\p{Ll}_]\p{Word}*/i, Name::Variable::Instance
rule %r/\$\p{Word}+/, Name::Variable::Global
rule %r(\$[!@&`'+~=/\\,;.<>_*\$?:"]), Name::Variable::Global
rule %r/\$-[0adFiIlpvw]/, Name::Variable::Global
rule %r/::/, Operator
Expand All @@ -181,7 +181,7 @@ def self.detect?(text)
rule %r(
(module)
(\s+)
([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)
([\p{L}_][\p{L}0-9_]*(::[\p{L}_][\p{L}0-9_]*)*)
)x do
groups Keyword, Text, Name::Namespace
end
Expand All @@ -207,14 +207,14 @@ def self.detect?(text)
# Otherwise, they will be parsed as :method_call
rule %r/\.{2,3}/, Operator, :expr_start

rule %r/[A-Z][a-zA-Z0-9_]*/, Name::Constant, :method_call
rule %r/(\.|::)(\s*)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/ do
rule %r/[\p{Lu}][\p{L}0-9_]*/, Name::Constant, :method_call
rule %r/(\.|::)(\s*)([\p{Ll}_]\p{Word}*[!?]?|[*%&^`~+-\/\[<>=])/ do
groups Punctuation, Text, Name::Function
push :method_call
end

rule %r/[a-zA-Z_]\w*[?!]/, Name, :expr_start
rule %r/[a-zA-Z_]\w*/, Name, :method_call
rule %r/[\p{L}_]\p{Word}*[?!]/, Name, :expr_start
rule %r/[\p{L}_]\p{Word}*/, Name, :method_call
rule %r/\*\*|\/\/|>=|<=|<=>|<<?|>>?|=~|={3}|!~|&&?|\|\||\./,
Operator, :expr_start
rule %r/{%|%}/, Punctuation
Expand All @@ -225,7 +225,7 @@ def self.detect?(text)
end

state :has_heredocs do
rule %r/(?<!\w)(<<[-~]?)(["`']?)([a-zA-Z_]\w*)(\2)/ do |m|
rule %r/(?<!\p{Word})(<<[-~]?)(["`']?)([\p{L}_]\p{Word}*)(\2)/ do |m|
token Operator, m[1]
token Name::Constant, "#{m[2]}#{m[3]}#{m[4]}"
@heredoc_queue << [['<<-', '<<~'].include?(m[1]), m[3]]
Expand Down Expand Up @@ -282,9 +282,9 @@ def self.detect?(text)
rule %r/\s+/, Text
rule %r/\(/, Punctuation, :defexpr
rule %r(
(?:([a-zA-Z_]\w*)(\.))?
(?:([\p{L}_]\p{Word}*)(\.))?
(
[a-zA-Z_]\w*[!?]? |
[\p{L}_]\p{Word}*[!?]? |
\*\*? | [-+]@? | [/%&\|^`~] | \[\]=? |
<<? | >>? | <=>? | >= | ===?
)
Expand All @@ -311,7 +311,7 @@ def self.detect?(text)
goto :expr_start
end

rule %r/[A-Z_]\w*/, Name::Class, :pop!
rule %r/[\p{Lu}_]\p{Word}*/, Name::Class, :pop!

rule(//) { pop! }
end
Expand Down Expand Up @@ -343,7 +343,7 @@ def self.detect?(text)

state :string_intp do
rule %r/[#][{]/, Str::Interpol, :in_interp
rule %r/#(@@?|\$)[a-z_]\w*/i, Str::Interpol
rule %r/#(@@?|\$)[\p{Ll}_]\p{Word}*/i, Str::Interpol
end

state :string_intp_escaped do
Expand Down Expand Up @@ -399,7 +399,7 @@ def self.detect?(text)
rule %r(
[?](\\[MC]-)* # modifiers
(\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)
(?!\w)
(?!\p{Word})
)x, Str::Char, :pop!

# special case for using a single space. Ruby demands that
Expand Down
36 changes: 18 additions & 18 deletions lib/rouge/lexers/ruby.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def self.detect?(text)
rule %r(
: # initial :
@{0,2} # optional ivar, for :@foo and :@@foo
[a-z_]\w*[!?]? # the symbol
[\p{Ll}_]\p{Word}*[!?]? # the symbol
)xi, Str::Symbol

# special symbols
Expand All @@ -39,7 +39,7 @@ def self.detect?(text)
# %-sigiled strings
# %(abc), %[abc], %<abc>, %.abc., %r.abc., etc
delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' }
rule %r/%([rqswQWxiI])?([^\w\s])/ do |m|
rule %r/%([rqswQWxiI])?([^\p{Word}\s])/ do |m|
open = Regexp.escape(m[2])
close = Regexp.escape(delimiter_map[m[2]] || m[2])
interp = /[rQWxI]/ === m[1]
Expand Down Expand Up @@ -83,7 +83,7 @@ def self.detect?(text)

state :strings do
mixin :symbols
rule %r/\b[a-z_]\w*?[?!]?:\s+/, Str::Symbol, :expr_start
rule %r/\b[\p{Ll}_]\p{Word}*?[?!]?:\s+/, Str::Symbol, :expr_start
rule %r/'(\\\\|\\'|[^'])*'/, Str::Single
rule %r/"/, Str::Double, :simple_string
rule %r/(?<!\.)`/, Str::Backtick, :simple_backtick
Expand Down Expand Up @@ -177,9 +177,9 @@ def self.detect?(text)
rule decimal, Num::Integer

# names
rule %r/@@[a-z_]\w*/i, Name::Variable::Class
rule %r/@[a-z_]\w*/i, Name::Variable::Instance
rule %r/\$\w+/, Name::Variable::Global
rule %r/@@[\p{Ll}_]\p{Word}*/i, Name::Variable::Class
rule %r/@[\p{Ll}_]\p{Word}*/i, Name::Variable::Instance
rule %r/\$\p{Word}+/, Name::Variable::Global
rule %r(\$[!@&`'+~=/\\,;.<>_*\$?:"]), Name::Variable::Global
rule %r/\$-[0adFiIlpvw]/, Name::Variable::Global
rule %r/::/, Operator
Expand All @@ -192,7 +192,7 @@ def self.detect?(text)
rule %r(
(module)
(\s+)
([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)
([\p{L}_][\p{L}0-9_]*(::[\p{L}_][\p{L}0-9_]*)*)
)x do
groups Keyword, Text, Name::Namespace
end
Expand All @@ -218,14 +218,14 @@ def self.detect?(text)
# Otherwise, they will be parsed as :method_call
rule %r/\.{2,3}/, Operator, :expr_start

rule %r/[A-Z][a-zA-Z0-9_]*/, Name::Constant, :method_call
rule %r/(\.|::)(\s*)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/ do
rule %r/[\p{Lu}][\p{L}0-9_]*/, Name::Constant, :method_call
rule %r/(\.|::)(\s*)([\p{Ll}_]\p{Word}*[!?]?|[*%&^`~+-\/\[<>=])/ do
groups Punctuation, Text, Name::Function
push :method_call
end

rule %r/[a-zA-Z_]\w*[?!]/, Name, :expr_start
rule %r/[a-zA-Z_]\w*/, Name, :method_call
rule %r/[\p{L}_]\p{Word}*[?!]/, Name, :expr_start
rule %r/[\p{L}_]\p{Word}*/, Name, :method_call
rule %r/\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|!~|&&?|\|\||\./,
Operator, :expr_start
rule %r/[-+\/*%=<>&!^|~]=?/, Operator, :expr_start
Expand All @@ -235,7 +235,7 @@ def self.detect?(text)
end

state :has_heredocs do
rule %r/(?<!\w)(<<[-~]?)(["`']?)([a-zA-Z_]\w*)(\2)/ do |m|
rule %r/(?<!\p{Word})(<<[-~]?)(["`']?)([\p{L}_]\p{Word}*)(\2)/ do |m|
token Operator, m[1]
token Name::Constant, "#{m[2]}#{m[3]}#{m[4]}"
@heredoc_queue << [['<<-', '<<~'].include?(m[1]), m[3]]
Expand Down Expand Up @@ -292,9 +292,9 @@ def self.detect?(text)
rule %r/\s+/, Text
rule %r/\(/, Punctuation, :defexpr
rule %r(
(?:([a-zA-Z_]\w*)(\.))?
(?:([\p{L}_]\p{Word}*)(\.))?
(
[a-zA-Z_]\w*[!?]? |
[\p{L}_]\p{Word}*[!?]? |
\*\*? | [-+]@? | [/%&\|^`~] | \[\]=? |
<<? | >>? | <=>? | >= | ===?
)
Expand All @@ -309,7 +309,7 @@ def self.detect?(text)

state :classname do
rule %r/\s+/, Text
rule %r/\w+(::\w+)+/, Name::Class
rule %r/\p{Word}+(::\p{Word}+)+/, Name::Class

rule %r/\(/ do
token Punctuation
Expand All @@ -323,7 +323,7 @@ def self.detect?(text)
goto :expr_start
end

rule %r/[A-Z_]\w*/, Name::Class, :pop!
rule %r/[\p{Lu}_]\p{Word}*/, Name::Class, :pop!

rule(//) { pop! }
end
Expand Down Expand Up @@ -363,7 +363,7 @@ def self.detect?(text)

state :string_intp do
rule %r/[#][{]/, Str::Interpol, :in_interp
rule %r/#(@@?|\$)[a-z_]\w*/i, Str::Interpol
rule %r/#(@@?|\$)[\p{Ll}_]\p{Word}*/i, Str::Interpol
end

state :string_intp_escaped do
Expand Down Expand Up @@ -418,7 +418,7 @@ def self.detect?(text)
rule %r(
[?](\\[MC]-)* # modifiers
(\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)
(?!\w)
(?!\p{Word})
)x, Str::Char, :pop!

# special case for using a single space. Ruby demands that
Expand Down