- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[utils][UpdateTestChecks] Extract MIR functionality into separate mir.py module #165535
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
….py module This commit extracts some MIR-related code from common.py and update_mir_test_checks.py into a dedicated mir.py module to improve code organization. All code intentionally moved verbatim with minimal necessary adaptations (log() calls converted to print(..., file=sys.stderr) due to function availability).
| @llvm/pr-subscribers-testing-tools Author: Valery Pykhtin (vpykhtin) ChangesThis commit extracts some MIR-related code from  All code intentionally moved verbatim with minimal necessary adaptations: 
 Going to fix formatting with a subsequent commit in this PR. Patch is 26.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165535.diff 4 Files Affected: 
 diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index 8cd200c93a482..b6b80ea117672 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -2396,244 +2396,6 @@ def add_analyze_checks(
     )
 
 
-IR_FUNC_NAME_RE = re.compile(
-    r"^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\("
-)
-IR_PREFIX_DATA_RE = re.compile(r"^ *(;|$)")
-MIR_FUNC_NAME_RE = re.compile(r" *name: *(?P<func>[A-Za-z0-9_.-]+)")
-MIR_BODY_BEGIN_RE = re.compile(r" *body: *\|")
-MIR_BASIC_BLOCK_RE = re.compile(r" *bb\.[0-9]+.*:$")
-MIR_PREFIX_DATA_RE = re.compile(r"^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)")
-
-
-def find_mir_functions_with_one_bb(lines, verbose=False):
-    result = []
-    cur_func = None
-    bbs = 0
-    for line in lines:
-        m = MIR_FUNC_NAME_RE.match(line)
-        if m:
-            if bbs == 1:
-                result.append(cur_func)
-            cur_func = m.group("func")
-            bbs = 0
-        m = MIR_BASIC_BLOCK_RE.match(line)
-        if m:
-            bbs += 1
-    if bbs == 1:
-        result.append(cur_func)
-    return result
-
-
-def add_mir_checks_for_function(
-    test,
-    output_lines,
-    run_list,
-    func_dict,
-    func_name,
-    single_bb,
-    print_fixed_stack,
-    first_check_is_next,
-    at_the_function_name,
-):
-    printed_prefixes = set()
-    for run in run_list:
-        for prefix in run[0]:
-            if prefix in printed_prefixes:
-                break
-            if not func_dict[prefix][func_name]:
-                continue
-            if printed_prefixes:
-                # Add some space between different check prefixes.
-                indent = len(output_lines[-1]) - len(output_lines[-1].lstrip(" "))
-                output_lines.append(" " * indent + ";")
-            printed_prefixes.add(prefix)
-            add_mir_check_lines(
-                test,
-                output_lines,
-                prefix,
-                ("@" if at_the_function_name else "") + func_name,
-                single_bb,
-                func_dict[prefix][func_name],
-                print_fixed_stack,
-                first_check_is_next,
-            )
-            break
-        else:
-            warn(
-                "Found conflicting asm for function: {}".format(func_name),
-                test_file=test,
-            )
-    return output_lines
-
-
-def add_mir_check_lines(
-    test,
-    output_lines,
-    prefix,
-    func_name,
-    single_bb,
-    func_info,
-    print_fixed_stack,
-    first_check_is_next,
-):
-    func_body = str(func_info).splitlines()
-    if single_bb:
-        # Don't bother checking the basic block label for a single BB
-        func_body.pop(0)
-
-    if not func_body:
-        warn(
-            "Function has no instructions to check: {}".format(func_name),
-            test_file=test,
-        )
-        return
-
-    first_line = func_body[0]
-    indent = len(first_line) - len(first_line.lstrip(" "))
-    # A check comment, indented the appropriate amount
-    check = "{:>{}}; {}".format("", indent, prefix)
-
-    output_lines.append("{}-LABEL: name: {}".format(check, func_name))
-
-    if print_fixed_stack:
-        output_lines.append("{}: fixedStack:".format(check))
-        for stack_line in func_info.extrascrub.splitlines():
-            filecheck_directive = check + "-NEXT"
-            output_lines.append("{}: {}".format(filecheck_directive, stack_line))
-
-    first_check = not first_check_is_next
-    for func_line in func_body:
-        if not func_line.strip():
-            # The mir printer prints leading whitespace so we can't use CHECK-EMPTY:
-            output_lines.append(check + "-NEXT: {{" + func_line + "$}}")
-            continue
-        filecheck_directive = check if first_check else check + "-NEXT"
-        first_check = False
-        check_line = "{}: {}".format(filecheck_directive, func_line[indent:]).rstrip()
-        output_lines.append(check_line)
-
-
-def should_add_mir_line_to_output(input_line, prefix_set):
-    # Skip any check lines that we're handling as well as comments
-    m = CHECK_RE.match(input_line)
-    if (m and m.group(1) in prefix_set) or input_line.strip() == ";":
-        return False
-    return True
-
-
-def add_mir_checks(
-    input_lines,
-    prefix_set,
-    autogenerated_note,
-    test,
-    run_list,
-    func_dict,
-    print_fixed_stack,
-    first_check_is_next,
-    at_the_function_name,
-):
-    simple_functions = find_mir_functions_with_one_bb(input_lines)
-
-    output_lines = []
-    output_lines.append(autogenerated_note)
-
-    func_name = None
-    state = "toplevel"
-    for input_line in input_lines:
-        if input_line == autogenerated_note:
-            continue
-
-        if state == "toplevel":
-            m = IR_FUNC_NAME_RE.match(input_line)
-            if m:
-                state = "ir function prefix"
-                func_name = m.group("func")
-            if input_line.rstrip("| \r\n") == "---":
-                state = "document"
-            output_lines.append(input_line)
-        elif state == "document":
-            m = MIR_FUNC_NAME_RE.match(input_line)
-            if m:
-                state = "mir function metadata"
-                func_name = m.group("func")
-            if input_line.strip() == "...":
-                state = "toplevel"
-                func_name = None
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-        elif state == "mir function metadata":
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-            m = MIR_BODY_BEGIN_RE.match(input_line)
-            if m:
-                if func_name in simple_functions:
-                    # If there's only one block, put the checks inside it
-                    state = "mir function prefix"
-                    continue
-                state = "mir function body"
-                add_mir_checks_for_function(
-                    test,
-                    output_lines,
-                    run_list,
-                    func_dict,
-                    func_name,
-                    single_bb=False,
-                    print_fixed_stack=print_fixed_stack,
-                    first_check_is_next=first_check_is_next,
-                    at_the_function_name=at_the_function_name,
-                )
-        elif state == "mir function prefix":
-            m = MIR_PREFIX_DATA_RE.match(input_line)
-            if not m:
-                state = "mir function body"
-                add_mir_checks_for_function(
-                    test,
-                    output_lines,
-                    run_list,
-                    func_dict,
-                    func_name,
-                    single_bb=True,
-                    print_fixed_stack=print_fixed_stack,
-                    first_check_is_next=first_check_is_next,
-                    at_the_function_name=at_the_function_name,
-                )
-
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-        elif state == "mir function body":
-            if input_line.strip() == "...":
-                state = "toplevel"
-                func_name = None
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-        elif state == "ir function prefix":
-            m = IR_PREFIX_DATA_RE.match(input_line)
-            if not m:
-                state = "ir function body"
-                add_mir_checks_for_function(
-                    test,
-                    output_lines,
-                    run_list,
-                    func_dict,
-                    func_name,
-                    single_bb=False,
-                    print_fixed_stack=print_fixed_stack,
-                    first_check_is_next=first_check_is_next,
-                    at_the_function_name=at_the_function_name,
-                )
-
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-        elif state == "ir function body":
-            if input_line.strip() == "}":
-                state = "toplevel"
-                func_name = None
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-    return output_lines
-
-
 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
     for nameless_value in ginfo.get_nameless_values():
         if nameless_value.global_ir_rhs_regexp is None:
diff --git a/llvm/utils/UpdateTestChecks/mir.py b/llvm/utils/UpdateTestChecks/mir.py
new file mode 100644
index 0000000000000..24bb8b341d335
--- /dev/null
+++ b/llvm/utils/UpdateTestChecks/mir.py
@@ -0,0 +1,362 @@
+"""MIR test utility functions for UpdateTestChecks scripts."""
+
+import re
+import sys
+from UpdateTestChecks import common
+from UpdateTestChecks.common import (
+    CHECK_RE,
+    warn,
+)
+
+IR_FUNC_NAME_RE = re.compile(
+    r"^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\("
+)
+IR_PREFIX_DATA_RE = re.compile(r"^ *(;|$)")
+MIR_FUNC_NAME_RE = re.compile(r" *name: *(?P<func>[A-Za-z0-9_.-]+)")
+MIR_BODY_BEGIN_RE = re.compile(r" *body: *\|")
+MIR_BASIC_BLOCK_RE = re.compile(r" *bb\.[0-9]+.*:$")
+MIR_PREFIX_DATA_RE = re.compile(r"^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)")
+
+VREG_RE = re.compile(r"(%[0-9]+)(?:\.[a-z0-9_]+)?(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?")
+MI_FLAGS_STR = (
+    r"(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn "
+    r"|reassoc |nuw |nsw |exact |nofpexcept |nomerge |unpredictable "
+    r"|noconvergent |nneg |disjoint |nusw |samesign |inbounds )*"
+)
+VREG_DEF_FLAGS_STR = r"(?:dead |undef )*"
+
+# Pattern to match the defined vregs and the opcode of an instruction that
+# defines vregs. Opcodes starting with a lower-case 't' are allowed to match
+# ARM's thumb instructions, like tADDi8 and t2ADDri.
+VREG_DEF_RE = re.compile(
+    r"^ *(?P<vregs>{2}{0}(?:, {2}{0})*) = "
+    r"{1}(?P<opcode>[A-Zt][A-Za-z0-9_]+)".format(
+        VREG_RE.pattern, MI_FLAGS_STR, VREG_DEF_FLAGS_STR
+    )
+)
+
+MIR_FUNC_RE = re.compile(
+    r"^---$"
+    r"\n"
+    r"^ *name: *(?P<func>[A-Za-z0-9_.-]+)$"
+    r".*?"
+    r"(?:^ *fixedStack: *(\[\])? *\n"
+    r"(?P<fixedStack>.*?)\n?"
+    r"^ *stack:"
+    r".*?)?"
+    r"^ *body: *\|\n"
+    r"(?P<body>.*?)\n"
+    r"^\.\.\.$",
+    flags=(re.M | re.S),
+)
+
+
+def build_function_info_dictionary(
+    test, raw_tool_output, triple, prefixes, func_dict, verbose
+):
+    for m in MIR_FUNC_RE.finditer(raw_tool_output):
+        func = m.group("func")
+        fixedStack = m.group("fixedStack")
+        body = m.group("body")
+        if verbose:
+            print("Processing function: {}".format(func), file=sys.stderr)
+            for l in body.splitlines():
+                print("  {}".format(l), file=sys.stderr)
+
+        # Vreg mangling
+        mangled = []
+        vreg_map = {}
+        for func_line in body.splitlines(keepends=True):
+            m = VREG_DEF_RE.match(func_line)
+            if m:
+                for vreg in VREG_RE.finditer(m.group("vregs")):
+                    if vreg.group(1) in vreg_map:
+                        name = vreg_map[vreg.group(1)]
+                    else:
+                        name = mangle_vreg(m.group("opcode"), vreg_map.values())
+                        vreg_map[vreg.group(1)] = name
+                    func_line = func_line.replace(
+                        vreg.group(1), "[[{}:%[0-9]+]]".format(name), 1
+                    )
+            for number, name in vreg_map.items():
+                func_line = re.sub(
+                    r"{}\b".format(number), "[[{}]]".format(name), func_line
+                )
+            mangled.append(func_line)
+        body = "".join(mangled)
+
+        for prefix in prefixes:
+            info = common.function_body(
+                body, fixedStack, None, None, None, None, ginfo=None
+            )
+            if func in func_dict[prefix]:
+                if (
+                    not func_dict[prefix][func]
+                    or func_dict[prefix][func].scrub != info.scrub
+                    or func_dict[prefix][func].extrascrub != info.extrascrub
+                ):
+                    func_dict[prefix][func] = None
+            else:
+                func_dict[prefix][func] = info
+
+
+def mangle_vreg(opcode, current_names):
+    base = opcode
+    # Simplify some common prefixes and suffixes
+    if opcode.startswith("G_"):
+        base = base[len("G_") :]
+    if opcode.endswith("_PSEUDO"):
+        base = base[: len("_PSEUDO")]
+    # Shorten some common opcodes with long-ish names
+    base = dict(
+        IMPLICIT_DEF="DEF",
+        GLOBAL_VALUE="GV",
+        CONSTANT="C",
+        FCONSTANT="C",
+        MERGE_VALUES="MV",
+        UNMERGE_VALUES="UV",
+        INTRINSIC="INT",
+        INTRINSIC_W_SIDE_EFFECTS="INT",
+        INSERT_VECTOR_ELT="IVEC",
+        EXTRACT_VECTOR_ELT="EVEC",
+        SHUFFLE_VECTOR="SHUF",
+    ).get(base, base)
+    # Avoid ambiguity when opcodes end in numbers
+    if len(base.rstrip("0123456789")) < len(base):
+        base += "_"
+
+    i = 0
+    for name in current_names:
+        if name.rstrip("0123456789") == base:
+            i += 1
+    if i:
+        return "{}{}".format(base, i)
+    return base
+
+
+def find_mir_functions_with_one_bb(lines, verbose=False):
+    result = []
+    cur_func = None
+    bbs = 0
+    for line in lines:
+        m = MIR_FUNC_NAME_RE.match(line)
+        if m:
+            if bbs == 1:
+                result.append(cur_func)
+            cur_func = m.group("func")
+            bbs = 0
+        m = MIR_BASIC_BLOCK_RE.match(line)
+        if m:
+            bbs += 1
+    if bbs == 1:
+        result.append(cur_func)
+    return result
+
+
+def add_mir_checks_for_function(
+    test,
+    output_lines,
+    run_list,
+    func_dict,
+    func_name,
+    single_bb,
+    print_fixed_stack,
+    first_check_is_next,
+    at_the_function_name,
+):
+    printed_prefixes = set()
+    for run in run_list:
+        for prefix in run[0]:
+            if prefix in printed_prefixes:
+                break
+            if not func_dict[prefix][func_name]:
+                continue
+            if printed_prefixes:
+                # Add some space between different check prefixes.
+                indent = len(output_lines[-1]) - len(output_lines[-1].lstrip(" "))
+                output_lines.append(" " * indent + ";")
+            printed_prefixes.add(prefix)
+            add_mir_check_lines(
+                test,
+                output_lines,
+                prefix,
+                ("@" if at_the_function_name else "") + func_name,
+                single_bb,
+                func_dict[prefix][func_name],
+                print_fixed_stack,
+                first_check_is_next,
+            )
+            break
+        else:
+            warn(
+                "Found conflicting asm for function: {}".format(func_name),
+                test_file=test,
+            )
+    return output_lines
+
+
+def add_mir_check_lines(
+    test,
+    output_lines,
+    prefix,
+    func_name,
+    single_bb,
+    func_info,
+    print_fixed_stack,
+    first_check_is_next,
+):
+    func_body = str(func_info).splitlines()
+    if single_bb:
+        # Don't bother checking the basic block label for a single BB
+        func_body.pop(0)
+
+    if not func_body:
+        warn(
+            "Function has no instructions to check: {}".format(func_name),
+            test_file=test,
+        )
+        return
+
+    first_line = func_body[0]
+    indent = len(first_line) - len(first_line.lstrip(" "))
+    # A check comment, indented the appropriate amount
+    check = "{:>{}}; {}".format("", indent, prefix)
+
+    output_lines.append("{}-LABEL: name: {}".format(check, func_name))
+
+    if print_fixed_stack:
+        output_lines.append("{}: fixedStack:".format(check))
+        for stack_line in func_info.extrascrub.splitlines():
+            filecheck_directive = check + "-NEXT"
+            output_lines.append("{}: {}".format(filecheck_directive, stack_line))
+
+    first_check = not first_check_is_next
+    for func_line in func_body:
+        if not func_line.strip():
+            # The mir printer prints leading whitespace so we can't use CHECK-EMPTY:
+            output_lines.append(check + "-NEXT: {{" + func_line + "$}}")
+            continue
+        filecheck_directive = check if first_check else check + "-NEXT"
+        first_check = False
+        check_line = "{}: {}".format(filecheck_directive, func_line[indent:]).rstrip()
+        output_lines.append(check_line)
+
+
+def should_add_mir_line_to_output(input_line, prefix_set):
+    # Skip any check lines that we're handling as well as comments
+    m = CHECK_RE.match(input_line)
+    if (m and m.group(1) in prefix_set) or input_line.strip() == ";":
+        return False
+    return True
+
+
+def add_mir_checks(
+    input_lines,
+    prefix_set,
+    autogenerated_note,
+    test,
+    run_list,
+    func_dict,
+    print_fixed_stack,
+    first_check_is_next,
+    at_the_function_name,
+):
+    simple_functions = find_mir_functions_with_one_bb(input_lines)
+
+    output_lines = []
+    output_lines.append(autogenerated_note)
+
+    func_name = None
+    state = "toplevel"
+    for input_line in input_lines:
+        if input_line == autogenerated_note:
+            continue
+
+        if state == "toplevel":
+            m = IR_FUNC_NAME_RE.match(input_line)
+            if m:
+                state = "ir function prefix"
+                func_name = m.group("func")
+            if input_line.rstrip("| \r\n") == "---":
+                state = "document"
+            output_lines.append(input_line)
+        elif state == "document":
+            m = MIR_FUNC_NAME_RE.match(input_line)
+            if m:
+                state = "mir function metadata"
+                func_name = m.group("func")
+            if input_line.strip() == "...":
+                state = "toplevel"
+                func_name = None
+            if should_add_mir_line_to_output(input_line, prefix_set):
+                output_lines.append(input_line)
+        elif state == "mir function metadata":
+            if should_add_mir_line_to_output(input_line, prefix_set):
+                output_lines.append(input_line)
+            m = MIR_BODY_BEGIN_RE.match(input_line)
+            if m:
+                if func_name in simple_functions:
+                    # If there's only one block, put the checks inside it
+                    state = "mir function prefix"
+                    continue
+                state = "mir function body"
+                add_mir_checks_for_function(
+                    test,
+                    output_lines,
+                    run_list,
+                    func_dict,
+                    func_name,
+                    single_bb=False,
+                    print_fixed_stack=print_fixed_stack,
+                    first_check_is_next=first_check_is_next,
+                    at_the_function_name=at_the_function_name,
+                )
+        elif state == "mir function prefix":
+            m = MIR_PREFIX_DATA_RE.match(input_line)
+            if not m:
+                state = "mir function body"
+                add_mir_checks_for_function(
+                    test,
+                    output_lines,
+                    run_list,
+                    func_dict,
+                    func_name,
+                    single_bb=True,
+                    print_fixed_stack=print_fixed_stack,
+                    first_check_is_next=first_check_is_next,
+                    at_the_function_name=at_the_function_name,
+                )
+
+            if should_add_mir_line_to_output(input_line, prefix_set):
+                output_lines.append(input_line)
+        elif state == "mir function body":
+            if input_line.strip() == "...":
+                state = "toplevel"
+                func_name = None
+            if should_add_mir_line_to_output(input_line, prefix_set):
+                output_lines.append(input_line)
+        elif state == "ir function prefix":
+           ...
[truncated]
 | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did not review this in detail but it looks like this is just a straightforward move.
Overall this makes a lot of sense!
….py module (llvm#165535) This commit extracts some MIR-related code from `common.py` and `update_mir_test_checks.py` into a dedicated `mir.py` module to improve code organization. This is a preparation step for llvm#164965 and also moves some pieces already moved by llvm#140296 All code intentionally moved verbatim with minimal necessary adaptations: * `log()` calls converted to `print(..., file=sys.stderr)` at `mir.py` lines 62, 64 due to a `log` locality.
This commit extracts some MIR-related code from
common.pyandupdate_mir_test_checks.pyinto a dedicatedmir.pymodule to improve code organization. This is a preparation step for #164965 and also moves some pieces already moved by #140296All code intentionally moved verbatim with minimal necessary adaptations:
log()calls converted toprint(..., file=sys.stderr)atmir.pylines 62, 64 due to aloglocality.Going to fix formatting with a subsequent commit in this PR.