Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unicode update for Julia, XML, HTML, YAML, CSS and Javascript lexers. #1537

Merged
merged 5 commits into from
Jul 4, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/rouge/lexers/css.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ class CSS < RegexLexer
filenames '*.css'
mimetypes 'text/css'

identifier = /[a-zA-Z0-9_-]+/
# Documentation: https://www.w3.org/TR/CSS21/syndata.html#characters

identifier = /[\p{L}_-][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}_-]*/
number = /-?(?:[0-9]+(\.[0-9]+)?|\.[0-9]+)/

def self.attributes
Expand Down
12 changes: 6 additions & 6 deletions lib/rouge/lexers/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ def self.detect?(text)
rule %r(</), Name::Tag, :tag_end
rule %r/</, Name::Tag, :tag_start

rule %r(<\s*[a-zA-Z0-9:-]+), Name::Tag, :tag # opening tags
rule %r(<\s*/\s*[a-zA-Z0-9:-]+\s*>), Name::Tag # closing tags
rule %r(<\s*[\p{L}:_-][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·-]*), Name::Tag, :tag # opening tags
rule %r(<\s*/\s*[\p{L}:_-][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·-]*\s*>), Name::Tag # closing tags
end

state :tag_end do
mixin :tag_end_end
rule %r/[a-zA-Z0-9:-]+/ do
rule %r/[\p{L}:_-][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·-]*/ do
token Name::Tag
goto :tag_end_end
end
Expand All @@ -67,7 +67,7 @@ def self.detect?(text)
state :tag_start do
rule %r/\s+/, Text

rule %r/[a-zA-Z0-9:-]+/ do
rule %r/[\p{L}:_-][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·-]*/ do
token Name::Tag
goto :tag
end
Expand All @@ -83,8 +83,8 @@ def self.detect?(text)

state :tag do
rule %r/\s+/m, Text
rule %r/[a-zA-Z0-9_:\[\]()*.-]+\s*=\s*/m, Name::Attribute, :attr
rule %r/[a-zA-Z0-9_:#*-]+/, Name::Attribute
rule %r/[\p{L}:_\[\]()*.-][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·\[\]()*.-]*\s*=\s*/m, Name::Attribute, :attr
rule %r/[\p{L}:_*#-][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·*#-]*/, Name::Attribute
rule %r(/?\s*>)m, Name::Tag, :pop!
end

Expand Down
4 changes: 3 additions & 1 deletion lib/rouge/lexers/javascript.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class Javascript < RegexLexer
mimetypes 'application/javascript', 'application/x-javascript',
'text/javascript', 'text/x-javascript'

# Pseudo-documentation: https://stackoverflow.com/questions/1661197/what-characters-are-valid-for-javascript-variable-names

def self.detect?(text)
return 1 if text.shebang?('node')
return 1 if text.shebang?('jsc')
Expand Down Expand Up @@ -138,7 +140,7 @@ def self.builtins
end

def self.id_regex
/[$a-z_][a-z0-9_]*/io
/[\p{L}\p{Nl}$_][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}_]*/io
end

id = self.id_regex
Expand Down
6 changes: 4 additions & 2 deletions lib/rouge/lexers/julia.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class Julia < RegexLexer
filenames '*.jl'
mimetypes 'text/x-julia', 'application/x-julia'

# Documentation: https://docs.julialang.org/en/v1/manual/variables/#Allowed-Variable-Names-1

def self.detect?(text)
return true if text.shebang? 'julia'
end
Expand Down Expand Up @@ -252,13 +254,13 @@ def self.detect?(text)


state :funcname do
rule %r/[a-zA-Z_]\w*/, Name::Function, :pop!
rule %r/[\p{L}\p{Nl}\p{Sc}\p{So}\p{Sm}_][\p{Word}\p{S}!]*/, Name::Function, :pop!
rule %r/\([^\s\w{]{1,2}\)/, Operator, :pop!
rule %r/[^\s\w{]{1,2}/, Operator, :pop!
end

state :typename do
rule %r/[a-zA-Z_]\w*/, Name::Class, :pop!
rule %r/[\p{L}\p{Nl}\p{Sc}\p{So}\p{Sm}_][\p{Word}\p{S}!]*/, Name::Class, :pop!
end

state :stringescape do
Expand Down
8 changes: 5 additions & 3 deletions lib/rouge/lexers/xml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ class XML < RegexLexer
mimetypes 'text/xml', 'application/xml', 'image/svg+xml',
'application/rss+xml', 'application/atom+xml'

# Documentation: https://www.w3.org/TR/xml11/#charsets and https://www.w3.org/TR/xml11/#sec-suggested-names

def self.detect?(text)
return false if text.doctype?(/html/)
return true if text =~ /\A<\?xml\b/
Expand All @@ -27,10 +29,10 @@ def self.detect?(text)
rule %r/<![^>]*>/, Comment::Preproc

# open tags
rule %r(<\s*[\w:.-]+)m, Name::Tag, :tag
rule %r(<\s*[\p{L}:_][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·-]*)m, Name::Tag, :tag

# self-closing tags
rule %r(<\s*/\s*[\w:.-]+\s*>)m, Name::Tag
rule %r(<\s*/\s*[\p{L}:_][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·-]*\s*>)m, Name::Tag
end

state :comment do
Expand All @@ -41,7 +43,7 @@ def self.detect?(text)

state :tag do
rule %r/\s+/m, Text
rule %r/[\w.:-]+\s*=/m, Name::Attribute, :attr
rule %r/[\p{L}:_][\p{L}\p{Nl}\p{Mc}\p{Mn}\p{Nd}\p{Pc}\p{Cf}:_.·-]*\s*=/m, Name::Attribute, :attr
rule %r(/?\s*>), Name::Tag, :pop!
end

Expand Down
8 changes: 5 additions & 3 deletions lib/rouge/lexers/yaml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class YAML < RegexLexer
aliases 'yml'
filenames '*.yaml', '*.yml'

# Documentation: https://yaml.org/spec/1.2/spec.html

def self.detect?(text)
# look for the %YAML directive
return true if text =~ /\A\s*%YAML/m
Expand Down Expand Up @@ -165,15 +167,15 @@ def set_indent(match, opts={})
)x, Keyword::Type

# an anchor
rule %r/&[\w-]+/, Name::Label
rule %r/&[\p{L}\p{Nl}\p{Nd}_-]+/, Name::Label

# an alias
rule %r/\*[\w-]+/, Name::Variable
rule %r/\*[\p{L}\p{Nl}\p{Nd}_-]+/, Name::Variable
end

state :block_nodes do
# implicit key
rule %r/((?:\w[\w -]*)?)(:)(?=\s|$)/ do |m|
rule %r/((?:[\p{L}\p{Nl}\p{Nd}_][\p{L}\p{Nl}\p{Nd}\p{Blank}_-]*)?)(:)(?=\s|$)/ do |m|
groups Name::Attribute, Punctuation::Indicator
set_indent m[0], :implicit => true
end
Expand Down