-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[MLIR][Pygments] Refine the pygments MLIR lexer #166406
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
94821d0
72d7512
11793af
49a5f16
0534a17
47fac06
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,37 +2,132 @@ | |
| # See https://llvm.org/LICENSE.txt for license information. | ||
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| from pygments.lexer import RegexLexer | ||
| from pygments.lexer import RegexLexer, bygroups, include, using | ||
| from pygments.token import * | ||
| import re | ||
|
|
||
|
|
||
| class MlirLexer(RegexLexer): | ||
| """Pygments lexer for MLIR. | ||
| This lexer focuses on accurate tokenization of common MLIR constructs: | ||
| - SSA values (%%... / %...) | ||
| - attribute and type aliases (#name =, !name =) | ||
| - types (builtin and dialect types, parametric types) | ||
| - attribute dictionaries and nested containers to a reasonable depth | ||
| - numbers (ints, floats with exponents, hex) | ||
| - strings with common escapes | ||
| - line comments (// ...) | ||
| - block labels (^foo) and operations | ||
| """ | ||
|
|
||
| name = "MLIR" | ||
| aliases = ["mlir"] | ||
| filenames = ["*.mlir"] | ||
|
|
||
| flags = re.MULTILINE | ||
|
|
||
| class VariableList(RegexLexer): | ||
| """Lexer for lists of SSA variables separated by commas.""" | ||
|
|
||
| tokens = { | ||
| "root": [ | ||
| (r"\s+", Text), | ||
| (r",", Punctuation), | ||
| (r"%[_A-Za-z0-9\.\$\-:#]+", Name.Variable), | ||
| ] | ||
| } | ||
|
|
||
| tokens = { | ||
| "root": [ | ||
| (r"%[a-zA-Z0-9_]+", Name.Variable), | ||
| (r"@[a-zA-Z_][a-zA-Z0-9_]+", Name.Function), | ||
| (r"\^[a-zA-Z0-9_]+", Name.Label), | ||
| (r"#[a-zA-Z0-9_]+", Name.Constant), | ||
| (r"![a-zA-Z0-9_]+", Keyword.Type), | ||
| (r"[a-zA-Z_][a-zA-Z0-9_]*\.", Name.Entity), | ||
| (r"memref[^.]", Keyword.Type), | ||
| (r"index", Keyword.Type), | ||
| (r"i[0-9]+", Keyword.Type), | ||
| (r"f[0-9]+", Keyword.Type), | ||
| # Comments | ||
| (r"//.*?$", Comment.Single), | ||
| # operation name with assignment: %... = op.name | ||
| ( | ||
| r"^(\s*)(%[\%_A-Za-z0-9\:#\,\s]+)(=)(\s*)([A-Za-z0-9_\.\$\-]+)\b", | ||
| bygroups(Text, using(VariableList), Operator, Text, Name.Builtin), | ||
| ), | ||
| # operation name without result | ||
| (r"^(\s*)([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])", bygroups(Text, Name.Builtin)), | ||
| # Attribute alias definition: #name = | ||
| ( | ||
| r"^(\s*)(#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)", | ||
| bygroups(Text, Name.Constant, Text, Operator), | ||
| ), | ||
| # Type alias definition: !name = | ||
| ( | ||
| r"^(\s*)(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)", | ||
| bygroups(Text, Keyword.Type, Text, Operator), | ||
| ), | ||
| # SSA values (uses) | ||
| (r"%[_A-Za-z0-9\.\$\-:#]+", Name.Variable), | ||
| # attribute refs, constants and named attributes | ||
| (r"#[_A-Za-z0-9\$\-\.]+\b", Name.Constant), | ||
| # symbol refs / function-like names | ||
| (r"@[_A-Za-z][_A-Za-z0-9\$\-\.]*\b", Name.Function), | ||
| # blocks | ||
| (r"\^[A-Za-z0-9_\$\.\-]+", Name.Label), | ||
| # types by exclamation or builtin names | ||
| (r"![_A-Za-z0-9\$\-\.]+\b", Keyword.Type), | ||
| # NOTE: please sync changes to corresponding builtin type rule in "angled-type" | ||
| (r"\b(bf16|f16|f32|f64|f80|f128|index|none|(u|s)?i[0-9]+)\b", Keyword.Type), | ||
| # container-like dialect types (tensor<...>, memref<...>, vector<...>) | ||
| ( | ||
| r"\b(complex|memref|tensor|tuple|vector)\s*(<)", | ||
| bygroups(Keyword.Type, Punctuation), | ||
| "angled-type", | ||
| ), | ||
| # affine constructs | ||
| (r"\b(affine_map|affine_set)\b", Keyword.Reserved), | ||
| # common builtin operators / functions inside affine_map | ||
| (r"\b(ceildiv|floordiv|mod|symbol)\b", Name.Other), | ||
| # identifiers / bare words | ||
| (r"\b[_A-Za-z][_A-Za-z0-9\.-]*\b", Name.Other), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know how exactly leading
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For example, if we add a rule
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know it is a word boundary. I don't know what precisely it means here. Will it still match in
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| # numbers: hex, float (with exponent), integer | ||
| (r"\b0x[0-9A-Fa-f]+\b", Number.Hex), | ||
| (r"\b([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?\b", Number.Float), | ||
| (r"\b[0-9]+\b", Number.Integer), | ||
| # strings | ||
| (r'"', String.Double, "string"), | ||
| # punctuation and arrow-like tokens | ||
| (r"->|>=|<=|\>=|\<=|\->|\=>", Operator), | ||
| (r"[()\[\]{}<>,.:=]", Punctuation), | ||
| # operators | ||
| (r"[-+*/%]", Operator), | ||
| ], | ||
| # string state with common escapes | ||
| "string": [ | ||
| (r'\\[ntr"\\]', String.Escape), | ||
| (r'[^"\\]+', String.Double), | ||
| (r'"', String.Double, "#pop"), | ||
| ], | ||
| # angled-type content | ||
| "angled-type": [ | ||
| # match nested '<' and '>' | ||
| (r"<", Punctuation, "#push"), | ||
| (r">", Punctuation, "#pop"), | ||
| # dimensions like 3x or 3x3x... and standalone numbers: | ||
| # - match numbers that are followed by an 'x' (dimension separator) | ||
| (r"([0-9]+)(?=(?:x))", Number.Integer), | ||
| # - match bare numbers (sizes) | ||
| (r"[0-9]+", Number.Integer), | ||
| (r"[0-9]*\.[0-9]*", Number.Float), | ||
| (r'"[^"]*"', String.Double), | ||
| (r"affine_map", Keyword.Reserved), | ||
| # TODO: this should be within affine maps only | ||
| (r"\+-\*\/", Operator), | ||
| (r"floordiv", Operator.Word), | ||
| (r"ceildiv", Operator.Word), | ||
| (r"mod", Operator.Word), | ||
| (r"()\[\]<>,{}", Punctuation), | ||
| (r"\/\/.*\n", Comment.Single), | ||
| ] | ||
| # dynamic dimension '?' | ||
| (r"\?", Name.Integer), | ||
| # the 'x' dimension separator (treat as punctuation) | ||
| (r"x", Punctuation), | ||
| # element / builtin types inside angle brackets (no word-boundary) | ||
| # NOTE: please sync changes to corresponding builtin type rule in "root" | ||
| ( | ||
| r"(?:bf16|f16|f32|f64|f80|f128|index|none|(?:[us]?i[0-9]+))", | ||
PragmaTwice marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Keyword.Type, | ||
| ), | ||
| # also allow nested container-like types to be recognized | ||
| ( | ||
| r"\b(complex|memref|tensor|tuple|vector)\s*(<)", | ||
| bygroups(Keyword.Type, Punctuation), | ||
| "angled-type", | ||
| ), | ||
| # fall back to root rules for anything else | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if this is possible, we shouldn't need the special logic above, I think
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup but the parsing logic inside and outside the angle is quite different. One of the reason is stated here: #166406 (comment). |
||
| include("root"), | ||
| ], | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.