Skip to content

Commit

Permalink
all: revert changes of [a-zA-Z0-9_] to \w
Browse files Browse the repository at this point in the history
... which is not equivalent in Unicode mode
  • Loading branch information
birkenfeld committed Sep 7, 2020
1 parent d464bf5 commit 080bbeb
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 51 deletions.
2 changes: 1 addition & 1 deletion pygments/lexers/arrow.py
Expand Up @@ -16,7 +16,7 @@
__all__ = ['ArrowLexer']

TYPES = r'\b(int|bool|char)((?:\[\])*)(?=\s+)'
IDENT = r'([a-zA-Z_]\w*)'
IDENT = r'([a-zA-Z_][a-zA-Z0-9_]*)'
DECL = TYPES + r'(\s+)' + IDENT


Expand Down
20 changes: 10 additions & 10 deletions pygments/lexers/asm.py
Expand Up @@ -472,19 +472,19 @@ class LlvmMirBodyLexer(RegexLexer):
# Attributes on basic blocks
(words(('liveins', 'successors'), suffix=':'), Keyword),
# Basic Block Labels
(r'bb\.[0-9]+(\.[\w.-]+)?( \(address-taken\))?:', Name.Label),
(r'bb\.[0-9]+ \(%[\w.-]+\)( \(address-taken\))?:', Name.Label),
(r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label),
(r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label),
(r'%bb\.[0-9]+(\.\w+)?', Name.Label),
# Stack references
(r'%stack\.[0-9]+(\.\w+\.addr)?', Name),
# Subreg indices
(r'%subreg\.\w+', Name),
# Virtual registers
(r'%\w+ *', Name.Variable, 'vreg'),
(r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'),
# Reference to LLVM-IR global
include('global'),
# Reference to Intrinsic
(r'intrinsic\(\@[\w.]+\)', Name.Variable.Global),
(r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global),
# Comparison predicates
(words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',
'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
Expand Down Expand Up @@ -537,7 +537,7 @@ class LlvmMirBodyLexer(RegexLexer):
# MIR Comments
(r';.*', Comment),
# If we get here, assume it's a target instruction
(r'\w+', Name),
(r'[a-zA-Z0-9_]+', Name),
# Everything else that isn't highlighted
(r'[(), \n]+', Text),
],
Expand All @@ -561,7 +561,7 @@ class LlvmMirBodyLexer(RegexLexer):
'vreg_bank_or_class': [
# The unassigned bank/class
(r' *_', Name.Variable.Magic),
(r' *\w+', Name.Variable),
(r' *[a-zA-Z0-9_]+', Name.Variable),
# The LLT if there is one
(r' *\(', Text, 'vreg_type'),
(r'(?=.)', Text, '#pop'),
Expand All @@ -580,8 +580,8 @@ class LlvmMirBodyLexer(RegexLexer):
'acquire', 'release', 'acq_rel', 'seq_cst')),
Keyword),
# IR references
(r'%ir\.[\w.-]+', Name),
(r'%ir-block\.[\w.-]+', Name),
(r'%ir\.[a-zA-Z0-9_.-]+', Name),
(r'%ir-block\.[a-zA-Z0-9_.-]+', Name),
(r'[-+]', Operator),
include('integer'),
include('global'),
Expand All @@ -591,7 +591,7 @@ class LlvmMirBodyLexer(RegexLexer):
],
'integer': [(r'-?[0-9]+', Number.Integer),],
'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],
'global': [(r'\@[\w.]+', Name.Variable.Global)],
'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],
}


Expand Down Expand Up @@ -935,7 +935,7 @@ class Dasm16Lexer(RegexLexer):
]

# Regexes yo
char = r'[\w$@.]'
char = r'[a-zA-Z0-9_$@.]'
identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
binary_number = r'0b[01_]+'
Expand Down
14 changes: 7 additions & 7 deletions pygments/lexers/basic.py
Expand Up @@ -523,15 +523,15 @@ class VBScriptLexer(RegexLexer):
(r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
(r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Float variant 2, for example: .1, .1e2
(r'[0-9]+e[+-]?[0-9]+', Number.Float), # Float variant 3, for example: 123e45
(r'\d+', Number.Integer),
(r'[0-9]+', Number.Integer),
('#.+#', String), # date or time value
(r'(dim)(\s+)([a-z_]\w*)',
(r'(dim)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Name.Variable), 'dim_more'),
(r'(function|sub)(\s+)([a-z_]\w*)',
(r'(function|sub)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Name.Function)),
(r'(class)(\s+)([a-z_]\w*)',
(r'(class)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Name.Class)),
(r'(const)(\s+)([a-z_]\w*)',
(r'(const)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Name.Constant)),
(r'(end)(\s+)(class|function|if|property|sub|with)',
bygroups(Keyword, Whitespace, Keyword)),
Expand All @@ -540,7 +540,7 @@ class VBScriptLexer(RegexLexer):
(r'(on)(\s+)(error)(\s+)(resume)(\s+)(next)',
bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Keyword)),
(r'(option)(\s+)(explicit)', bygroups(Keyword, Whitespace, Keyword)),
(r'(property)(\s+)(get|let|set)(\s+)([a-z_]\w*)',
(r'(property)(\s+)(get|let|set)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Whitespace, Name.Property)),
(r'rem\s.*[^\n]*', Comment.Single),
(words(_vbscript_builtins.KEYWORDS, suffix=r'\b'), Keyword),
Expand All @@ -549,7 +549,7 @@ class VBScriptLexer(RegexLexer):
(words(_vbscript_builtins.BUILTIN_CONSTANTS, suffix=r'\b'), Name.Constant),
(words(_vbscript_builtins.BUILTIN_FUNCTIONS, suffix=r'\b'), Name.Builtin),
(words(_vbscript_builtins.BUILTIN_VARIABLES, suffix=r'\b'), Name.Builtin),
(r'[a-z_]\w*', Name),
(r'[a-z_][a-z0-9_]*', Name),
(r'\b_\n', Operator),
(words(r'(),.:'), Punctuation),
(r'.+(\n)?', Error)
Expand Down
2 changes: 1 addition & 1 deletion pygments/lexers/clean.py
Expand Up @@ -40,7 +40,7 @@ class CleanLexer(ExtendedRegexLexer):
funnyId = r'[~@#$%\^?!+\-*<>\\/|&=:]+'
scoreUpperId = r'_' + upperId
scoreLowerId = r'_' + lowerId
moduleId = r'[a-zA-Z_][\w.`]+'
moduleId = r'[a-zA-Z_][a-zA-Z0-9_.`]+'
classId = '|'.join([lowerId, upperId, funnyId])

tokens = {
Expand Down
6 changes: 3 additions & 3 deletions pygments/lexers/elm.py
Expand Up @@ -27,7 +27,7 @@ class ElmLexer(RegexLexer):
filenames = ['*.elm']
mimetypes = ['text/x-elm']

validName = r'[a-z_][\w\']*'
validName = r'[a-z_][a-zA-Z0-9_\']*'

specialName = r'^main '

Expand All @@ -40,7 +40,7 @@ class ElmLexer(RegexLexer):
reservedWords = words((
'alias', 'as', 'case', 'else', 'if', 'import', 'in',
'let', 'module', 'of', 'port', 'then', 'type', 'where',
), suffix=r'\b')
), suffix=r'\b')

tokens = {
'root': [
Expand Down Expand Up @@ -68,7 +68,7 @@ class ElmLexer(RegexLexer):
(reservedWords, Keyword.Reserved),

# Types
(r'[A-Z]\w*', Keyword.Type),
(r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),

# Main
(specialName, Keyword.Reserved),
Expand Down
4 changes: 2 additions & 2 deletions pygments/lexers/praat.py
Expand Up @@ -215,7 +215,7 @@ class PraatLexer(RegexLexer):
],
'object_reference': [
include('string_interpolated'),
(r'([a-z]\w*|\d+)', Name.Builtin),
(r'([a-z][a-zA-Z0-9_]*|\d+)', Name.Builtin),

(words(object_attributes, prefix=r'\.'), Name.Builtin, '#pop'),

Expand All @@ -228,7 +228,7 @@ class PraatLexer(RegexLexer):

(words(variables_string, suffix=r'\$'), Name.Variable.Global),
(words(variables_numeric,
suffix=r'(?=[^\w."\'$#\[:(]|\s|^|$)'),
suffix=r'(?=[^a-zA-Z0-9_."\'$#\[:(]|\s|^|$)'),
Name.Variable.Global),

(words(objects, prefix=r'\b', suffix=r"(_)"),
Expand Down
12 changes: 6 additions & 6 deletions pygments/lexers/prolog.py
Expand Up @@ -113,7 +113,7 @@ class LogtalkLexer(RegexLexer):
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'\d+\.?\d*((e|E)(\+|-)?\d+)?', Number),
# Variables
(r'([A-Z_]\w*)', Name.Variable),
(r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable),
# Event handlers
(r'(after|before)(?=[(])', Keyword),
# Message forwarding handler
Expand Down Expand Up @@ -231,7 +231,7 @@ class LogtalkLexer(RegexLexer):
# Punctuation
(r'[()\[\],.|]', Text),
# Atoms
(r"[a-z]\w*", Text),
(r"[a-z][a-zA-Z0-9_]*", Text),
(r"'", String, 'quoted_atom'),
],

Expand Down Expand Up @@ -259,8 +259,8 @@ class LogtalkLexer(RegexLexer):
(r'(alias|d(ynamic|iscontiguous)|m(eta_(non_terminal|predicate)|ode|ultifile)|s(et_(logtalk|prolog)_flag|ynchronized))(?=[(])', Keyword, 'root'),
(r'op(?=[(])', Keyword, 'root'),
(r'(c(alls|oinductive)|module|reexport|use(s|_module))(?=[(])', Keyword, 'root'),
(r'[a-z]\w*(?=[(])', Text, 'root'),
(r'[a-z]\w*(?=[.])', Text, 'root'),
(r'[a-z][a-zA-Z0-9_]*(?=[(])', Text, 'root'),
(r'[a-z][a-zA-Z0-9_]*(?=[.])', Text, 'root'),
],

'entityrelations': [
Expand All @@ -272,9 +272,9 @@ class LogtalkLexer(RegexLexer):
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'\d+\.?\d*((e|E)(\+|-)?\d+)?', Number),
# Variables
(r'([A-Z_]\w*)', Name.Variable),
(r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable),
# Atoms
(r"[a-z]\w*", Text),
(r"[a-z][a-zA-Z0-9_]*", Text),
(r"'", String, 'quoted_atom'),
# Strings
(r'"(\\\\|\\"|[^"])*"', String),
Expand Down
4 changes: 2 additions & 2 deletions pygments/lexers/promql.py
Expand Up @@ -153,7 +153,7 @@ class PromQLLexer(RegexLexer):
(r"==|!=|>=|<=|<|>", Operator),
(r"and|or|unless", Operator.Word),
# Metrics
(r"[_a-zA-Z]\w+", Name.Variable),
(r"[_a-zA-Z][a-zA-Z0-9_]+", Name.Variable),
# Params
(r'(["\'])(.*?)(["\'])', bygroups(Punctuation, String, Punctuation)),
# Other states
Expand All @@ -167,7 +167,7 @@ class PromQLLexer(RegexLexer):
(r"\n", Whitespace),
(r"\s+", Whitespace),
(r",", Punctuation),
(r'([_a-zA-Z]\w*?)(\s*?)(=~|!=|=|~!)(\s*?)(")(.*?)(")',
(r'([_a-zA-Z][a-zA-Z0-9_]*?)(\s*?)(=~|!=|=|~!)(\s*?)(")(.*?)(")',
bygroups(Name.Label, Whitespace, Operator, Whitespace,
Punctuation, String, Punctuation)),
],
Expand Down
2 changes: 1 addition & 1 deletion pygments/lexers/ride.py
Expand Up @@ -28,7 +28,7 @@ class RideLexer(RegexLexer):
filenames = ['*.ride']
mimetypes = ['text/x-ride']

validName = r'[a-zA-Z_][\w\']*'
validName = r'[a-zA-Z_][a-zA-Z0-9_\']*'

builtinOps = (
'||', '|', '>=', '>', '==', '!',
Expand Down
21 changes: 10 additions & 11 deletions pygments/lexers/solidity.py
Expand Up @@ -13,7 +13,7 @@

from pygments.lexer import RegexLexer, bygroups, include, words
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation
Number, Punctuation, Whitespace

__all__ = ['SolidityLexer']

Expand All @@ -33,7 +33,7 @@ class SolidityLexer(RegexLexer):
flags = re.MULTILINE | re.UNICODE

datatype = (
r'\b(address|bool|((bytes|hash|int|string|uint)(8|16|24|32|40|48|56|64'
r'\b(address|bool|(?:(?:bytes|hash|int|string|uint)(?:8|16|24|32|40|48|56|64'
r'|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208'
r'|216|224|232|240|248|256)?))\b'
)
Expand All @@ -44,14 +44,13 @@ class SolidityLexer(RegexLexer):
include('comments'),
(r'\bpragma\s+solidity\b', Keyword, 'pragma'),
(r'\b(contract)(\s+)([a-zA-Z_]\w*)',
bygroups(Keyword, Text.WhiteSpace, Name.Entity)),
(datatype + r'(\s+)((external|public|internal|private)\s+)?' +
bygroups(Keyword, Whitespace, Name.Entity)),
(datatype + r'(\s+)((?:external|public|internal|private)\s+)?' +
r'([a-zA-Z_]\w*)',
bygroups(Keyword.Type, None, None, None, Text.WhiteSpace, Keyword,
None, Name.Variable)),
bygroups(Keyword.Type, Whitespace, Keyword, Name.Variable)),
(r'\b(enum|event|function|struct)(\s+)([a-zA-Z_]\w*)',
bygroups(Keyword.Type, Text.WhiteSpace, Name.Variable)),
(r'\b(msg|block|tx)\.([A-Za-z_]\w*)\b', Keyword),
bygroups(Keyword.Type, Whitespace, Name.Variable)),
(r'\b(msg|block|tx)\.([A-Za-z_][a-zA-Z0-9_]*)\b', Keyword),
(words((
'block', 'break', 'constant', 'constructor', 'continue',
'contract', 'do', 'else', 'external', 'false', 'for',
Expand Down Expand Up @@ -83,11 +82,11 @@ class SolidityLexer(RegexLexer):
include('whitespace'),
include('comments'),
(r'(\^|>=|<)(\s*)(\d+\.\d+\.\d+)',
bygroups(Operator, Text.WhiteSpace, Keyword)),
bygroups(Operator, Whitespace, Keyword)),
(r';', Punctuation, '#pop')
],
'whitespace': [
(r'\s+', Text.WhiteSpace),
(r'\n', Text.WhiteSpace)
(r'\s+', Whitespace),
(r'\n', Whitespace)
]
}
8 changes: 5 additions & 3 deletions pygments/lexers/sql.py
Expand Up @@ -623,10 +623,12 @@ class MySqlLexer(RegexLexer):
(r'[0-9]+', Number.Integer),

# Date literals
(r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", Literal.Date),
(r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
Literal.Date),

# Time literals
(r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", Literal.Date),
(r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
Literal.Date),

# Timestamp literals
(
Expand All @@ -644,7 +646,7 @@ class MySqlLexer(RegexLexer):

# Variables
(r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
(r'@[\w$.]+', Name.Variable),
(r'@[a-z0-9_$.]+', Name.Variable),
(r"@'", Name.Variable, 'single-quoted-variable'),
(r'@"', Name.Variable, 'double-quoted-variable'),
(r"@`", Name.Variable, 'backtick-quoted-variable'),
Expand Down
6 changes: 3 additions & 3 deletions pygments/lexers/teraterm.py
Expand Up @@ -52,7 +52,7 @@ class TeraTermLexer(RegexLexer):
(r'[*/]', Comment.Multiline)
],
'labels': [
(r'^(\s*)(:\w+)', bygroups(Text, Name.Label)),
(r'(?i)^(\s*)(:[a-z0-9_]+)', bygroups(Text, Name.Label)),
],
'commands': [
(
Expand Down Expand Up @@ -259,7 +259,7 @@ class TeraTermLexer(RegexLexer):
r')\b',
Keyword,
),
(r'(?i)(call|goto)([ \t]+)(\w+)',
(r'(?i)(call|goto)([ \t]+)([a-z0-9_]+)',
bygroups(Keyword, Text, Name.Label)),
],
'builtin-variables': [
Expand Down Expand Up @@ -295,7 +295,7 @@ class TeraTermLexer(RegexLexer):
),
],
'user-variables': [
(r'(?i)[A-Z_]\w*', Name.Variable),
(r'(?i)[a-z_][a-z0-9_]*', Name.Variable),
],
'numeric-literals': [
(r'(-?)([0-9]+)', bygroups(Operator, Number.Integer)),
Expand Down
2 changes: 1 addition & 1 deletion pygments/lexers/webidl.py
Expand Up @@ -32,7 +32,7 @@
# other
'any', 'void', 'object', 'RegExp',
)
_identifier = r'_?[A-Za-z][\w-]*'
_identifier = r'_?[A-Za-z][a-zA-Z0-9_-]*'
_keyword_suffix = r'(?![\w-])'
_string = r'"[^"]*"'

Expand Down

0 comments on commit 080bbeb

Please sign in to comment.