-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[clang][utils] Add auto mode to reduction script #163282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,12 +18,22 @@ | |
import subprocess | ||
import shlex | ||
import tempfile | ||
import shutil | ||
import multiprocessing | ||
from enum import StrEnum, auto | ||
from typing import List, Optional | ||
|
||
verbose = False | ||
creduce_cmd = None | ||
clang_cmd = None | ||
opt_cmd = None | ||
llc_cmd = None | ||
|
||
|
||
class FailureType(StrEnum): | ||
FrontEnd = auto() | ||
MiddleEnd = auto() | ||
BackEnd = auto() | ||
Unknown = auto() | ||
|
||
|
||
def verbose_print(*args, **kwargs): | ||
|
@@ -70,6 +80,44 @@ def write_to_script(text, filename): | |
os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) | ||
|
||
|
||
def extract_opt_level(args_list: List[str]) -> Optional[str]: | ||
""" | ||
Finds the last optimization flag (-O...) from a list of arguments. | ||
|
||
Args: | ||
args_list: A list of string arguments passed to the compiler. | ||
|
||
Returns: | ||
The last matching optimization flag string if found, otherwise None. | ||
""" | ||
valid_opt_flags = {"-O0", "-O1", "-O2", "-O3", "-Os", "-Oz", "-Og", "-Ofast"} | ||
|
||
# Iterate in reverse to find the last occurrence | ||
for arg in reversed(args_list): | ||
if arg in valid_opt_flags: | ||
return arg | ||
return None | ||
|
||
|
||
def remove_first_line(file_path): | ||
""" | ||
Removes the first line from a specified file. | ||
""" | ||
try: | ||
with open(file_path, "r") as f: | ||
lines = f.readlines() | ||
|
||
# If the file is not empty, write all lines except the first one back. | ||
if lines: | ||
with open(file_path, "w") as f: | ||
f.writelines(lines[1:]) | ||
|
||
except FileNotFoundError: | ||
print(f"Error: File '{file_path}' not found.") | ||
except Exception as e: | ||
print(f"An error occurred: {e}") | ||
|
||
|
||
class Reduce(object): | ||
def __init__(self, crash_script, file_to_reduce, creduce_flags): | ||
crash_script_name, crash_script_ext = os.path.splitext(crash_script) | ||
|
@@ -85,6 +133,9 @@ def __init__(self, crash_script, file_to_reduce, creduce_flags): | |
self.expected_output = [] | ||
self.needs_stack_trace = False | ||
self.creduce_flags = ["--tidy"] + creduce_flags | ||
self.opt = opt_cmd | ||
self.llc = llc_cmd | ||
self.ir_file = "crash.ll" | ||
|
||
self.read_clang_args(crash_script, file_to_reduce) | ||
self.read_expected_output() | ||
|
@@ -186,22 +237,30 @@ def skip_function(func_name): | |
|
||
self.expected_output = result | ||
|
||
def check_expected_output(self, args=None, filename=None): | ||
def check_expected_output(self, cmd=None, args=None, filename=None): | ||
if not cmd: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. having all these default arguments makes it very hard to reason, it would be nice to clean up the callers to explicitly pass things in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack. I'll look at doing that. |
||
cmd = self.clang | ||
if not args: | ||
args = self.clang_args | ||
if not filename: | ||
filename = self.file_to_reduce | ||
|
||
p = subprocess.Popen( | ||
self.get_crash_cmd(args=args, filename=filename), | ||
self.get_crash_cmd(cmd=cmd, args=args, filename=filename), | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.STDOUT, | ||
) | ||
crash_output, _ = p.communicate() | ||
return all(msg in crash_output.decode("utf-8") for msg in self.expected_output) | ||
|
||
def write_interestingness_test(self): | ||
def write_interestingness_test(self, cmd=None, use_llvm_reduce=False): | ||
print("\nCreating the interestingness test...") | ||
if not cmd: | ||
cmd = self.get_crash_cmd() | ||
|
||
# llvm-reduce interestingness tests take the file as the first argument. | ||
# NOTE: this cannot be escaped by quote_cmd(), since it needs expansion. | ||
filename = '< "$1"' if use_llvm_reduce else "" | ||
|
||
# Disable symbolization if it's not required to avoid slow symbolization. | ||
disable_symbolization = "" | ||
|
@@ -210,32 +269,39 @@ def write_interestingness_test(self): | |
|
||
output = """#!/bin/bash | ||
%s | ||
if %s >& t.log ; then | ||
if %s %s >& t.log ; then | ||
exit 1 | ||
fi | ||
""" % ( | ||
disable_symbolization, | ||
quote_cmd(self.get_crash_cmd()), | ||
quote_cmd(cmd), | ||
filename, | ||
) | ||
|
||
for msg in self.expected_output: | ||
output += "grep -F %s t.log || exit 1\n" % shlex.quote(msg) | ||
|
||
write_to_script(output, self.testfile) | ||
self.check_interestingness() | ||
self.check_interestingness(cmd, use_llvm_reduce=use_llvm_reduce) | ||
|
||
def check_interestingness(self): | ||
testfile = os.path.abspath(self.testfile) | ||
def check_interestingness(self, cmd, use_llvm_reduce=False): | ||
test_cmd = [os.path.abspath(self.testfile)] | ||
|
||
# llvm-reduce interestingness tests take the file as the first arg. | ||
if use_llvm_reduce: | ||
test_cmd += [self.ir_file] | ||
# Check that the test considers the original file interesting | ||
returncode = subprocess.call(testfile, stdout=subprocess.DEVNULL) | ||
if returncode: | ||
result = subprocess.run(args=test_cmd, stdout=subprocess.DEVNULL) | ||
if result.returncode: | ||
sys.exit("The interestingness test does not pass for the original file.") | ||
|
||
# Check that an empty file is not interesting | ||
# Instead of modifying the filename in the test file, just run the command | ||
with tempfile.NamedTemporaryFile() as empty_file: | ||
is_interesting = self.check_expected_output(filename=empty_file.name) | ||
new_args = cmd[1:] if use_llvm_reduce else cmd[1:-1] | ||
is_interesting = self.check_expected_output( | ||
cmd=cmd[0], args=new_args, filename=empty_file.name | ||
) | ||
if is_interesting: | ||
sys.exit("The interestingness test passes for an empty file.") | ||
|
||
|
@@ -424,11 +490,129 @@ def run_creduce(self): | |
print("\n\nctrl-c detected, killed reduction tool") | ||
p.kill() | ||
|
||
def run_llvm_reduce(self): | ||
full_llvm_reduce_cmd = [ | ||
llvm_reduce_cmd, | ||
f"--test={self.testfile}", | ||
self.ir_file, | ||
] | ||
print("\nRunning llvm-reduce tool...") | ||
verbose_print(quote_cmd(full_llvm_reduce_cmd)) | ||
try: | ||
p = subprocess.Popen(full_llvm_reduce_cmd) | ||
p.communicate() | ||
except KeyboardInterrupt: | ||
print("\n\nctrl-c detected, killed reduction tool") | ||
aeubanks marked this conversation as resolved.
Show resolved
Hide resolved
|
||
p.kill() | ||
|
||
def classify_crash(self) -> FailureType: | ||
print("Classifying crash ...") | ||
if self.check_expected_output(args=self.clang_args + ["-fsyntax-only"]): | ||
print("Found Frontend Crash") | ||
return FailureType.FrontEnd | ||
|
||
print("Found Middle/Backend failure") | ||
|
||
self.opt_level = extract_opt_level(self.clang_args) or "-O2" | ||
backend_result = self.check_backend() | ||
if backend_result == FailureType.BackEnd: | ||
return backend_result | ||
|
||
# Try running w/ -emit-llvm to generate an IR file, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd also try changing the opt-level and/or disabling LLVM passes when creating the IR file. In https://github.com/CTSRD-CHERI/llvm-project/blob/238a953f5c7ca3de3cff67bf2433a0f59c9677ee/clang/utils/creduce_crash_testcase.py#L899C75-L899C96 I try the normal command first, then A lot of the time we also need many of the llc command flags to actually reproduce the crash so I tried mapping some of the clang args to llc args, but it's not very reliable. Luckily you can still use llvm-reduce with clang in the reproducer script. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1, at some point if llc doesn't repro the backend crash we should also attempt to run this through There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I think the strategy is still WIP. Likely this bit will get the most refactoring/has most room for improvement. |
||
# if it succeeds we have a backend failure, and can use llc. | ||
if not self.check_expected_output( | ||
args=self.clang_args + ["-emit-llvm", "-o", self.ir_file] | ||
): | ||
print("Checking llc for failure") | ||
if self.check_expected_output( | ||
cmd=self.llc, | ||
args=[self.opt_level, "-filetype=obj"], | ||
filename=self.ir_file, | ||
): | ||
print("Found BackEnd Crash") | ||
return FailureType.BackEnd | ||
elif os.path.exists(self.ir_file): | ||
# clean up the IR file if we generated it, but won't use it. | ||
print(f"clean up {self.ir_file}, since we can't repro w/ llc") | ||
os.remove(self.ir_file) | ||
|
||
# The IR file will be in 'self.ir_file' | ||
self.extract_crashing_ir() | ||
return self.check_middle_end() | ||
|
||
def check_llc_failure(self) -> bool: | ||
return self.check_expected_output( | ||
cmd=self.llc, args=[self.opt_level, "-filetype=obj"], filename=self.ir_file | ||
) | ||
|
||
def extract_backend_ir(self) -> bool: | ||
return not self.check_expected_output( | ||
args=self.clang_args + ["-emit-llvm", "-o", self.ir_file] | ||
) | ||
|
||
def check_backend(self) -> Optional[FailureType]: | ||
# Try running w/ -emit-llvm to generate an IR file, | ||
# if it succeeds we have a backend failure, and can use llc. | ||
if self.extract_backend_ir(): | ||
print("Checking llc for failure") | ||
if self.check_llc_failure(): | ||
print("Found BackEnd Crash") | ||
return FailureType.BackEnd | ||
elif os.path.exists(self.ir_file): | ||
# clean up the IR file if we generated it, but won't use it. | ||
print(f"clean up {self.ir_file}, since we can't repro w/ llc") | ||
os.remove(self.ir_file) | ||
|
||
def extract_crashing_ir(self): | ||
""" | ||
Extract IR just before the crash using --print-on-crash | ||
""" | ||
args = self.clang_args + [ | ||
"-mllvm", | ||
"--print-on-crash", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its nice in that its very simple to use this mechanism, vs. something like getting the pass number and then dumping that IR w/ -print-before. It certainly made scripting this simpler. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I factored this logic into some methods, that way if we want to switch the mechanism it should be slightly easier. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. at some point we should also incorporate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, that'd be a nice improvement. |
||
"-mllvm", | ||
f"--print-on-crash-path={self.ir_file}", | ||
"-mllvm", | ||
"--print-module-scope", | ||
] | ||
|
||
if not self.check_expected_output(args=args): | ||
sys.exit("The interestingness test does not pass with '--print-on-crash'.") | ||
|
||
# The output from --print-on-crash has an invalid first line (pass name). | ||
remove_first_line(self.ir_file) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should change the extra line to start with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, yeah, that'd be way simpler. |
||
|
||
def check_middle_end(self) -> FailureType: | ||
# TODO: parse the exact pass from the backtrace and set the pass | ||
# pipeline directly via -passes="...". | ||
print("Checking opt for failure") | ||
if self.check_expected_output( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. running the entire pipeline on the IR at the point of the crash isn't precisely matching what the crashing pass would see since we're running all the passes up to that point twice ideally we'd go through a couple of steps:
and similarly for llc reduction it should run on the output post-optimization ( if you want to push this off to another PR, please add a TODO There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I'd like to have something more sophisticated. I have a TODO around line 640, but maybe that's better here. The backtrace seems to contain the exact pass spelling that things crash on, so I think it will be not that hard to adapt, but I wanted something we could iterate on w/o worrying about every detail. But, I agree there's lots of room for improvement here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. iiuc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let me double check, but I think opt definitly still crashes here (at least in my test case). it certainly was in my earlier testing (and I'm pretty sure I confirmed it after I refactored this yesterday). but I'd actually like to get the precise pass from the backtrace and use that to just craft the
This is the backtrace I'm seeing locally. I'm not 100% on if any of this is gated on a specific cmake flag we set in a typical build of our toolchain, but having the pass string printed seems like a really easy way to get it to reproduce if we get the IR from -print-on-crash.
|
||
cmd=self.opt, | ||
args=[self.opt_level, "-disable-output"], | ||
filename=self.ir_file, | ||
): | ||
print("Found MiddleEnd Crash") | ||
return FailureType.MiddleEnd | ||
print( | ||
"Could not automatically detect crash type, falling back to creduce/cvise." | ||
) | ||
return FailureType.Unknown | ||
|
||
def reduce_ir_crash(self, new_cmd: List[str]): | ||
print("Writing interestingness test...") | ||
self.write_interestingness_test(cmd=new_cmd, use_llvm_reduce=True) | ||
print("Starting llvm-reduce with llc test case") | ||
self.run_llvm_reduce() | ||
print("Done Reducing IR file.") | ||
|
||
|
||
def main(): | ||
global verbose | ||
global creduce_cmd | ||
global llvm_reduce_cmd | ||
global clang_cmd | ||
global opt_cmd | ||
global llc_cmd | ||
|
||
parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) | ||
parser.add_argument( | ||
|
@@ -450,20 +634,50 @@ def main(): | |
help="The path to the `clang` executable. " | ||
"By default uses the llvm-bin directory.", | ||
) | ||
parser.add_argument( | ||
"--opt", | ||
dest="opt", | ||
type=str, | ||
help="The path to the `opt` executable. " | ||
"By default uses the llvm-bin directory.", | ||
) | ||
parser.add_argument( | ||
"--llc", | ||
dest="llc", | ||
type=str, | ||
help="The path to the `llc` executable. " | ||
"By default uses the llvm-bin directory.", | ||
) | ||
parser.add_argument( | ||
"--creduce", | ||
dest="creduce", | ||
type=str, | ||
help="The path to the `creduce` or `cvise` executable. " | ||
"Required if neither `creduce` nor `cvise` are on PATH.", | ||
) | ||
parser.add_argument( | ||
"--llvm-reduce", | ||
dest="llvm_reduce", | ||
type=str, | ||
help="The path to the `llvm-reduce` executable. " | ||
"By default uses the llvm-bin directory.", | ||
) | ||
parser.add_argument( | ||
"--auto", | ||
action="store_true", | ||
help="Use auto reduction mode, that uses `creduce`/`cvise`" | ||
"for frontend crashes and llvm-reduce for middle/backend crashes.", | ||
) | ||
parser.add_argument("-v", "--verbose", action="store_true") | ||
args, creduce_flags = parser.parse_known_args() | ||
verbose = args.verbose | ||
llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None | ||
creduce_cmd = check_cmd("creduce", None, args.creduce) | ||
creduce_cmd = check_cmd("cvise", None, args.creduce) | ||
llvm_reduce_cmd = check_cmd("llvm-reduce", llvm_bin, args.llvm_reduce) | ||
clang_cmd = check_cmd("clang", llvm_bin, args.clang) | ||
opt_cmd = check_cmd("opt", llvm_bin, args.opt) | ||
llc_cmd = check_cmd("llc", llvm_bin, args.llc) | ||
|
||
crash_script = check_file(args.crash_script[0]) | ||
file_to_reduce = check_file(args.file_to_reduce[0]) | ||
|
@@ -472,6 +686,20 @@ def main(): | |
creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))] | ||
|
||
r = Reduce(crash_script, file_to_reduce, creduce_flags) | ||
if args.auto: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe we only try to reduce in opt/llc if those binaries are provided to us, rather than on the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to gate everything behind a flag to start and be sure the default flow wasn't changed. Happy to modify the mechanism if we're by and large happy w/ the direction of this patch. |
||
crash_type = r.classify_crash() | ||
match crash_type: | ||
case FailureType.FrontEnd | FailureType.Unknown: | ||
print("Starting reduction with creduce/cvise") | ||
pass | ||
case FailureType.MiddleEnd: | ||
new_cmd = [opt_cmd, "-disable-output", r.opt_level] | ||
r.reduce_ir_crash(new_cmd) | ||
return | ||
case FailureType.BackEnd: | ||
new_cmd = [llc_cmd, "-filetype=obj", r.opt_level] | ||
r.reduce_ir_crash(new_cmd) | ||
return | ||
|
||
r.simplify_clang_args() | ||
r.write_interestingness_test() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llc/opt won't recognize -Og/-Ofast
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
crap, so much for simple.