Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
252 changes: 240 additions & 12 deletions clang/utils/reduce-clang-crash.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,22 @@
import subprocess
import shlex
import tempfile
import shutil
import multiprocessing
from enum import StrEnum, auto
from typing import List, Optional

verbose = False
creduce_cmd = None
clang_cmd = None
opt_cmd = None
llc_cmd = None


class FailureType(StrEnum):
FrontEnd = auto()
MiddleEnd = auto()
BackEnd = auto()
Unknown = auto()


def verbose_print(*args, **kwargs):
Expand Down Expand Up @@ -70,6 +80,44 @@ def write_to_script(text, filename):
os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)


def extract_opt_level(args_list: List[str]) -> Optional[str]:
"""
Finds the last optimization flag (-O...) from a list of arguments.

Args:
args_list: A list of string arguments passed to the compiler.

Returns:
The last matching optimization flag string if found, otherwise None.
"""
valid_opt_flags = {"-O0", "-O1", "-O2", "-O3", "-Os", "-Oz", "-Og", "-Ofast"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

llc/opt won't recognize -Og/-Ofast

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

crap, so much for simple.


# Iterate in reverse to find the last occurrence
for arg in reversed(args_list):
if arg in valid_opt_flags:
return arg
return None


def remove_first_line(file_path):
"""
Removes the first line from a specified file.
"""
try:
with open(file_path, "r") as f:
lines = f.readlines()

# If the file is not empty, write all lines except the first one back.
if lines:
with open(file_path, "w") as f:
f.writelines(lines[1:])

except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
except Exception as e:
print(f"An error occurred: {e}")


class Reduce(object):
def __init__(self, crash_script, file_to_reduce, creduce_flags):
crash_script_name, crash_script_ext = os.path.splitext(crash_script)
Expand All @@ -85,6 +133,9 @@ def __init__(self, crash_script, file_to_reduce, creduce_flags):
self.expected_output = []
self.needs_stack_trace = False
self.creduce_flags = ["--tidy"] + creduce_flags
self.opt = opt_cmd
self.llc = llc_cmd
self.ir_file = "crash.ll"

self.read_clang_args(crash_script, file_to_reduce)
self.read_expected_output()
Expand Down Expand Up @@ -186,22 +237,30 @@ def skip_function(func_name):

self.expected_output = result

def check_expected_output(self, args=None, filename=None):
def check_expected_output(self, cmd=None, args=None, filename=None):
if not cmd:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

having all these default arguments makes it very hard to reason, it would be nice to clean up the callers to explicitly pass things in

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack. I'll look at doing that.

cmd = self.clang
if not args:
args = self.clang_args
if not filename:
filename = self.file_to_reduce

p = subprocess.Popen(
self.get_crash_cmd(args=args, filename=filename),
self.get_crash_cmd(cmd=cmd, args=args, filename=filename),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
crash_output, _ = p.communicate()
return all(msg in crash_output.decode("utf-8") for msg in self.expected_output)

def write_interestingness_test(self):
def write_interestingness_test(self, cmd=None, use_llvm_reduce=False):
print("\nCreating the interestingness test...")
if not cmd:
cmd = self.get_crash_cmd()

# llvm-reduce interestingness tests take the file as the first argument.
# NOTE: this cannot be escaped by quote_cmd(), since it needs expansion.
filename = '< "$1"' if use_llvm_reduce else ""

# Disable symbolization if it's not required to avoid slow symbolization.
disable_symbolization = ""
Expand All @@ -210,32 +269,39 @@ def write_interestingness_test(self):

output = """#!/bin/bash
%s
if %s >& t.log ; then
if %s %s >& t.log ; then
exit 1
fi
""" % (
disable_symbolization,
quote_cmd(self.get_crash_cmd()),
quote_cmd(cmd),
filename,
)

for msg in self.expected_output:
output += "grep -F %s t.log || exit 1\n" % shlex.quote(msg)

write_to_script(output, self.testfile)
self.check_interestingness()
self.check_interestingness(cmd, use_llvm_reduce=use_llvm_reduce)

def check_interestingness(self):
testfile = os.path.abspath(self.testfile)
def check_interestingness(self, cmd, use_llvm_reduce=False):
test_cmd = [os.path.abspath(self.testfile)]

# llvm-reduce interestingness tests take the file as the first arg.
if use_llvm_reduce:
test_cmd += [self.ir_file]
# Check that the test considers the original file interesting
returncode = subprocess.call(testfile, stdout=subprocess.DEVNULL)
if returncode:
result = subprocess.run(args=test_cmd, stdout=subprocess.DEVNULL)
if result.returncode:
sys.exit("The interestingness test does not pass for the original file.")

# Check that an empty file is not interesting
# Instead of modifying the filename in the test file, just run the command
with tempfile.NamedTemporaryFile() as empty_file:
is_interesting = self.check_expected_output(filename=empty_file.name)
new_args = cmd[1:] if use_llvm_reduce else cmd[1:-1]
is_interesting = self.check_expected_output(
cmd=cmd[0], args=new_args, filename=empty_file.name
)
if is_interesting:
sys.exit("The interestingness test passes for an empty file.")

Expand Down Expand Up @@ -424,11 +490,129 @@ def run_creduce(self):
print("\n\nctrl-c detected, killed reduction tool")
p.kill()

def run_llvm_reduce(self):
full_llvm_reduce_cmd = [
llvm_reduce_cmd,
f"--test={self.testfile}",
self.ir_file,
]
print("\nRunning llvm-reduce tool...")
verbose_print(quote_cmd(full_llvm_reduce_cmd))
try:
p = subprocess.Popen(full_llvm_reduce_cmd)
p.communicate()
except KeyboardInterrupt:
print("\n\nctrl-c detected, killed reduction tool")
p.kill()

def classify_crash(self) -> FailureType:
print("Classifying crash ...")
if self.check_expected_output(args=self.clang_args + ["-fsyntax-only"]):
print("Found Frontend Crash")
return FailureType.FrontEnd

print("Found Middle/Backend failure")

self.opt_level = extract_opt_level(self.clang_args) or "-O2"
backend_result = self.check_backend()
if backend_result == FailureType.BackEnd:
return backend_result

# Try running w/ -emit-llvm to generate an IR file,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd also try changing the opt-level and/or disabling LLVM passes when creating the IR file. In https://github.com/CTSRD-CHERI/llvm-project/blob/238a953f5c7ca3de3cff67bf2433a0f59c9677ee/clang/utils/creduce_crash_testcase.py#L899C75-L899C96 I try the normal command first, then -O1 and then -O0 -disable-O0-optnone.

A lot of the time we also need many of the llc command flags to actually reproduce the crash so I tried mapping some of the clang args to llc args, but it's not very reliable. Luckily you can still use llvm-reduce with clang in the reproducer script.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, at some point if llc doesn't repro the backend crash we should also attempt to run this through clang -Xclang -disable-llvm-passes -c -o /dev/null which skips the opt pipeline and only runs the codegen pipeline

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I think the strategy is still WIP. Likely this bit will get the most refactoring/has most room for improvement.

# if it succeeds we have a backend failure, and can use llc.
if not self.check_expected_output(
args=self.clang_args + ["-emit-llvm", "-o", self.ir_file]
):
print("Checking llc for failure")
if self.check_expected_output(
cmd=self.llc,
args=[self.opt_level, "-filetype=obj"],
filename=self.ir_file,
):
print("Found BackEnd Crash")
return FailureType.BackEnd
elif os.path.exists(self.ir_file):
# clean up the IR file if we generated it, but won't use it.
print(f"clean up {self.ir_file}, since we can't repro w/ llc")
os.remove(self.ir_file)

# The IR file will be in 'self.ir_file'
self.extract_crashing_ir()
return self.check_middle_end()

def check_llc_failure(self) -> bool:
return self.check_expected_output(
cmd=self.llc, args=[self.opt_level, "-filetype=obj"], filename=self.ir_file
)

def extract_backend_ir(self) -> bool:
return not self.check_expected_output(
args=self.clang_args + ["-emit-llvm", "-o", self.ir_file]
)

def check_backend(self) -> Optional[FailureType]:
# Try running w/ -emit-llvm to generate an IR file,
# if it succeeds we have a backend failure, and can use llc.
if self.extract_backend_ir():
print("Checking llc for failure")
if self.check_llc_failure():
print("Found BackEnd Crash")
return FailureType.BackEnd
elif os.path.exists(self.ir_file):
# clean up the IR file if we generated it, but won't use it.
print(f"clean up {self.ir_file}, since we can't repro w/ llc")
os.remove(self.ir_file)

def extract_crashing_ir(self):
"""
Extract IR just before the crash using --print-on-crash
"""
args = self.clang_args + [
"-mllvm",
"--print-on-crash",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

--print-on-crash can be very slow since it's saving the entire module's IR after every pass, but we can reevaluate this in the future if this turns out to actually be slow in practice

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its nice in that its very simple to use this mechanism, vs. something like getting the pass number and then dumping that IR w/ -print-before. It certainly made scripting this simpler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I factored this logic into some methods, that way if we want to switch the mechanism it should be slightly easier.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at some point we should also incorporate llvm/utils/reduce_pipeline.py, but that's for the future

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, that'd be a nice improvement.

"-mllvm",
f"--print-on-crash-path={self.ir_file}",
"-mllvm",
"--print-module-scope",
]

if not self.check_expected_output(args=args):
sys.exit("The interestingness test does not pass with '--print-on-crash'.")

# The output from --print-on-crash has an invalid first line (pass name).
remove_first_line(self.ir_file)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should change the extra line to start with ; to avoid having to do this (I've also been annoyed with this in the past). lemme have someone make this change to get into the LLVM development workflow

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, yeah, that'd be way simpler.


def check_middle_end(self) -> FailureType:
# TODO: parse the exact pass from the backtrace and set the pass
# pipeline directly via -passes="...".
print("Checking opt for failure")
if self.check_expected_output(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

running the entire pipeline on the IR at the point of the crash isn't precisely matching what the crashing pass would see since we're running all the passes up to that point twice

ideally we'd go through a couple of steps:

  1. run the specific pass that crashed (maybe extractable in the crash output?) on the --print-on-crash output rather than the entire pipeline
  2. if that doesn't work, run the entire pipeline on the output of the frontend clang -Xclang -disable-llvm-passes -S -emit-llvm

and similarly for llc reduction it should run on the output post-optimization (clang -S -emit-llvm) rather than the IR on crash/IR output from frontend

if you want to push this off to another PR, please add a TODO

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'd like to have something more sophisticated. I have a TODO around line 640, but maybe that's better here.

The backtrace seems to contain the exact pass spelling that things crash on, so I think it will be not that hard to adapt, but I wanted something we could iterate on w/o worrying about every detail. But, I agree there's lots of room for improvement here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iiuc opt won't crash here because the IR we're passing it has already been through the optimization pipeline. we need to generate the IR with -Xclang -disable-llvm-passes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me double check, but I think opt definitly still crashes here (at least in my test case). it certainly was in my earlier testing (and I'm pretty sure I confirmed it after I refactored this yesterday).

but I'd actually like to get the precise pass from the backtrace and use that to just craft the

opt -passes="function<eager-inv>(drop-unnecessary-assumes,float2int,lower-constant-intrinsics,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,slp-vectorizer,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O2>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>)" < crash.ll

This is the backtrace I'm seeing locally. I'm not 100% on if any of this is gated on a specific cmake flag we set in a typical build of our toolchain, but having the pass string printed seems like a really easy way to get it to reproduce if we get the IR from -print-on-crash.

 ../bin/opt < reduced.ll -O2 -disable-output
opt: /usr/local/google/home/paulkirth/llvm-fork/llvm/include/llvm/Support/Casting.h:109: static bool llvm::isa_impl_cl<llvm::VPWidenCastRecipe, const llvm::VPRecipeBase *>::doit(const From *) [To = llvm::VPWidenCastRecipe, From = const llvm::VPRecipeBase *]: Assertion `Val && "isa<> used on a null pointer"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug.
Stack dump:
0.      Program arguments: ../bin/opt -O2 -disable-output
1.      Running pass "function<eager-inv>(drop-unnecessary-assumes,float2int,lower-constant-intrinsics,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,slp-vectorizer,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O2>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>)" on module "<stdin>"
2.      Running pass "loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>" on function "_Z1db"
#0 0x0000555739c1b686 ___interceptor_backtrace ../../../../../../../../../llvm-llvm-project/compiler-rt/lib/asan/../sanitizer_common/sanitizer_common_interceptors.inc:4556:13
#1 0x0000555739daf2e4 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /usr/local/google/home/paulkirth/llvm-fork/llvm/lib/Support/Unix/Signals.inc:838:8
[1]    3448508 IOT instruction  ../bin/opt -O2 -disable-output < reduced.ll

cmd=self.opt,
args=[self.opt_level, "-disable-output"],
filename=self.ir_file,
):
print("Found MiddleEnd Crash")
return FailureType.MiddleEnd
print(
"Could not automatically detect crash type, falling back to creduce/cvise."
)
return FailureType.Unknown

def reduce_ir_crash(self, new_cmd: List[str]):
print("Writing interestingness test...")
self.write_interestingness_test(cmd=new_cmd, use_llvm_reduce=True)
print("Starting llvm-reduce with llc test case")
self.run_llvm_reduce()
print("Done Reducing IR file.")


def main():
global verbose
global creduce_cmd
global llvm_reduce_cmd
global clang_cmd
global opt_cmd
global llc_cmd

parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
parser.add_argument(
Expand All @@ -450,20 +634,50 @@ def main():
help="The path to the `clang` executable. "
"By default uses the llvm-bin directory.",
)
parser.add_argument(
"--opt",
dest="opt",
type=str,
help="The path to the `opt` executable. "
"By default uses the llvm-bin directory.",
)
parser.add_argument(
"--llc",
dest="llc",
type=str,
help="The path to the `llc` executable. "
"By default uses the llvm-bin directory.",
)
parser.add_argument(
"--creduce",
dest="creduce",
type=str,
help="The path to the `creduce` or `cvise` executable. "
"Required if neither `creduce` nor `cvise` are on PATH.",
)
parser.add_argument(
"--llvm-reduce",
dest="llvm_reduce",
type=str,
help="The path to the `llvm-reduce` executable. "
"By default uses the llvm-bin directory.",
)
parser.add_argument(
"--auto",
action="store_true",
help="Use auto reduction mode, that uses `creduce`/`cvise`"
"for frontend crashes and llvm-reduce for middle/backend crashes.",
)
parser.add_argument("-v", "--verbose", action="store_true")
args, creduce_flags = parser.parse_known_args()
verbose = args.verbose
llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
creduce_cmd = check_cmd("creduce", None, args.creduce)
creduce_cmd = check_cmd("cvise", None, args.creduce)
llvm_reduce_cmd = check_cmd("llvm-reduce", llvm_bin, args.llvm_reduce)
clang_cmd = check_cmd("clang", llvm_bin, args.clang)
opt_cmd = check_cmd("opt", llvm_bin, args.opt)
llc_cmd = check_cmd("llc", llvm_bin, args.llc)

crash_script = check_file(args.crash_script[0])
file_to_reduce = check_file(args.file_to_reduce[0])
Expand All @@ -472,6 +686,20 @@ def main():
creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))]

r = Reduce(crash_script, file_to_reduce, creduce_flags)
if args.auto:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we only try to reduce in opt/llc if those binaries are provided to us, rather than on the --auto flag?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to gate everything behind a flag to start and be sure the default flow wasn't changed. Happy to modify the mechanism if we're by and large happy w/ the direction of this patch.

crash_type = r.classify_crash()
match crash_type:
case FailureType.FrontEnd | FailureType.Unknown:
print("Starting reduction with creduce/cvise")
pass
case FailureType.MiddleEnd:
new_cmd = [opt_cmd, "-disable-output", r.opt_level]
r.reduce_ir_crash(new_cmd)
return
case FailureType.BackEnd:
new_cmd = [llc_cmd, "-filetype=obj", r.opt_level]
r.reduce_ir_crash(new_cmd)
return

r.simplify_clang_args()
r.write_interestingness_test()
Expand Down