Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8274986: max code printed in hs-err logs should be configurable #5875

Closed
wants to merge 6 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -653,7 +653,12 @@ void CodeBlob::dump_for_addr(address addr, outputStream* st, bool verbose) const
nm->method()->print_value_on(st);
}
st->cr();
nm->print_nmethod(verbose);
if (verbose && st == tty) {
// verbose is only ever true when called from findpc in debug.cpp
nm->print_nmethod(true);
} else {
nm->print(st);
}
Copy link
Contributor

@tkrodriguez tkrodriguez Oct 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be something like this:

     ResourceMark rm;
     st->print(INTPTR_FORMAT " is at entry_point+%d in (nmethod*)" INTPTR_FORMAT,
               p2i(addr), (int)(addr - nm->entry_point()), p2i(nm));
-    if (verbose) {
-      st->print(" for ");
-      nm->method()->print_value_on(st);
-    }
     st->cr();
-    nm->print_nmethod(verbose);
+    if (verbose && st == tty) {
+      nm->print_nmethod(verbose);
+    } else {
+      nm->print_on(st);
+    }
     return;
   }
   st->print_cr(INTPTR_FORMAT " is at code_begin+%d in ", p2i(addr), (int)(addr - code_begin()));

Copy link
Member Author

@dougxc dougxc Oct 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. As far as I can see, the intent is to have a single line of output for an nmethod followed by an optional " for ..." suffix when verbose == true.
The nm->print_nmethod(verbose) call then prints extra multi-line detail for the nmethod with the number of lines printed governed by verbose.
This code seems like it went from being hard coded to always go to tty and was then parameterized to go to an arbitrary stream but the evolution accidentally overlooked some code that still goes to tty.
I don't want to make extensive changes here as there really should be a single effort to rationalize all dumping to ensure it's parameterized.

Copy link
Contributor

@tkrodriguez tkrodriguez Oct 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree you might not want to bite this off in this PR, but this piece of code is reason you commonly see random nmethods appearing on the tty just before a crash. verbose is only ever true when called from findpc in debug.cpp. All the other non-verbose work print_nmethod does is useless, like writing to the xtty, and otherwise boils down to nm->print_on(tty). But all these printing paths could use a rework so I'm fine if you skip it.

Copy link
Member Author

@dougxc dougxc Oct 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you convinced me in a side conversation, your suggestion has the advantage that we now at least get this output in a hs_err file instead of loosing it altogether.

return;
}
st->print_cr(INTPTR_FORMAT " is at code_begin+%d in ", p2i(addr), (int)(addr - code_begin()));
@@ -34,6 +34,7 @@
#include "gc/shared/referenceProcessor.hpp"
#include "oops/markWord.hpp"
#include "runtime/task.hpp"
#include "utilities/vmError.hpp"
Copy link
Member

@dholmes-ora dholmes-ora Oct 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed?

Copy link
Member Author

@dougxc dougxc Oct 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the definition of VMError::max_error_log_print_code to be available just like task.hpp is included to make PeriodicTask::min_interval (used in defining the range of PerfDataSamplingInterval) available.

Copy link
Member

@dholmes-ora dholmes-ora Oct 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing there is some macro weirdness involved when defining the flag in globals.hpp.


//----------------------------------------------------------------------
// Build flagLimitTable[]
@@ -1336,6 +1336,10 @@ const intx ObjectAlignmentInBytes = 8;
develop(intx, StackPrintLimit, 100, \
"number of stack frames to print in VM-level stack dump") \
\
product(int, ErrorLogPrintCodeLimit, 3, DIAGNOSTIC, \
"max number of compiled code units to print in error log") \
range(0, VMError::max_error_log_print_code) \
\
notproduct(intx, MaxElementPrintSize, 256, \
"maximum number of elements to print") \
\
@@ -245,7 +245,8 @@ void VMError::print_stack_trace(outputStream* st, JavaThread* jt,
/**
* Adds `value` to `list` iff it's not already present and there is sufficient
* capacity (i.e. length(list) < `list_capacity`). The length of the list
* is the index of the first nullptr entry.
* is the index of the first nullptr entry or `list_capacity` if there are
* no nullptr entries.
*
* @ return true if the value was added, false otherwise
*/
@@ -910,25 +911,44 @@ void VMError::report(outputStream* st, bool _verbose) {
STEP("printing code blobs if possible")

if (_verbose && _context) {
if (!_print_native_stack_used) {
// Only try print code of the crashing frame since
// we cannot walk the native stack using next_frame.
const int printed_capacity = 1;
address printed_singleton = nullptr;
address* printed = &printed_singleton;
print_code(st, _thread, _pc, true, printed, 1);
} else {
// Print up to the first 5 unique code units on the stack
const int printed_capacity = 5;
address printed[printed_capacity];
printed[0] = nullptr; // length(list) == index of first nullptr

frame fr = os::fetch_frame_from_context(_context);
for (int count = 0; count < printed_capacity && fr.pc() != nullptr; ) {
if (print_code(st, _thread, fr.pc(), fr.pc() == _pc, printed, printed_capacity)) {
count++;
const int printed_capacity = max_error_log_print_code;
address printed[printed_capacity];
printed[0] = nullptr;
int printed_len = 0;
// Even though ErrorLogPrintCodeLimit is ranged checked
// during argument parsing, there's no way to prevent it
// subsequently (i.e., after parsing) being set to a
// value outside the range.
Copy link
Member

@dholmes-ora dholmes-ora Oct 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is overly defensive IMO. Flags should never be touched after initialization is complete and we assume we can trust ourselves.

Copy link
Member Author

@dougxc dougxc Oct 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general I agree but in error reporting code I get extra defensive plus the defensive code is small and not on a hot path.
That said, I won't object to it being undone in a subsequent PR.

Copy link
Member

@tstuefe tstuefe Oct 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the point of MIN2 here is to handle ErrorLogPrintCodeLimit < max. IMHO ErrorLogPrintCodeLimit > max would be just an assert-worthy error, since as David wrote flag values should not be changed after initialization.

Copy link
Member Author

@dougxc dougxc Oct 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point is to ensure that we don't run off the end of the stack allocated printed array in the (granted, unlikely) case that ErrorLogPrintCodeLimit is (accidentally) updated after arg parsing .
I'm not sure an assert is the best thing as it would cause error reporting to recurse.
Maybe I was being too defensive but I figured the overhead is negligible so why not be ultra-safe.

Copy link
Member

@tstuefe tstuefe Oct 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Recursive asserts would be caught by secondary error handling and show up as "Error occurred during error reporting" printout. Not ideal, but at least won't endanger the rest of the printing.

Copy link
Member Author

@dougxc dougxc Oct 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes but in a production scenario (which is where robust error reporting is critical), the assert is ignored and we end up with potential buffer overflow.

Copy link
Member

@tstuefe tstuefe Oct 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be a guarantee then.

int limit = MIN2(ErrorLogPrintCodeLimit, printed_capacity);
if (limit > 0) {
// Scan the native stack
if (!_print_native_stack_used) {
// Only try to print code of the crashing frame since
// the native stack cannot be walked with next_frame.
if (print_code(st, _thread, _pc, true, printed, printed_capacity)) {
printed_len++;
}
} else {
frame fr = os::fetch_frame_from_context(_context);
while (printed_len < limit && fr.pc() != nullptr) {
if (print_code(st, _thread, fr.pc(), fr.pc() == _pc, printed, printed_capacity)) {
printed_len++;
}
fr = next_frame(fr, _thread);
}
}

// Scan the Java stack
if (_thread != nullptr && _thread->is_Java_thread()) {
JavaThread* jt = JavaThread::cast(_thread);
if (jt->has_last_Java_frame()) {
for (StackFrameStream sfs(jt, true /* update */, true /* process_frames */); printed_len < limit && !sfs.is_done(); sfs.next()) {
address pc = sfs.current()->pc();
if (print_code(st, _thread, pc, pc == _pc, printed, printed_capacity)) {
printed_len++;
}
}
}
fr = next_frame(fr, _thread);
}
}
}
@@ -185,6 +185,9 @@ class VMError : public AllStatic {
// which is not NULL and contains bits in every word.
static const intptr_t segfault_address = LP64_ONLY(0xABC0000000000ABCULL) NOT_LP64(0x00000ABC);

// Max value for the ErrorLogPrintCodeLimit flag.
static const int max_error_log_print_code = 10;

// Needed when printing signal handlers.
NOT_WINDOWS(static const void* crash_handler_address;)

@@ -34,13 +34,17 @@
* @run driver MachCodeFramesInErrorFile
*/

import java.lang.annotation.Annotation;
import java.lang.reflect.Method;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.ArrayList;
import java.util.stream.Stream;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

@@ -51,45 +55,73 @@
import jdk.internal.misc.Unsafe;

public class MachCodeFramesInErrorFile {

private static class Crasher {
// Need to make unsafe a compile-time constant so that
// C2 intrinsifies the call to Unsafe.getLong in method3.
// Make Crasher.unsafe a compile-time constant so that
// C2 intrinsifies calls to Unsafe intrinsics.
private static final Unsafe unsafe = Unsafe.getUnsafe();
public static void main(String[] args) {
method1(10);

public static void main(String[] args) throws Exception {
if (args[0].equals("crashInJava")) {
// This test relies on Unsafe.putLong(Object, long, long) being intrinsified
if (!Stream.of(Unsafe.class.getDeclaredMethod("putLong", Object.class, long.class, long.class).getAnnotations()).
anyMatch(a -> a.annotationType().getName().equals("jdk.internal.vm.annotation.IntrinsicCandidate"))) {
throw new RuntimeException("Unsafe.putLong(Object, long, long) is not an intrinsic");
}
crashInJava1(10);
} else {
assert args[0].equals("crashInVM");
crashInNative1(10);
}
}

static void method1(long address) {
System.out.println("in method1");
method2(address);
static void crashInJava1(long address) {
System.out.println("in crashInJava1");
crashInJava2(address);
}
static void method2(long address) {
System.out.println("in method2");
method3(address);
static void crashInJava2(long address) {
System.out.println("in crashInJava2");
crashInJava3(address);
}
static void method3(long address) {
System.out.println("in method3");
// Keep chasing pointers until we crash
while (true) {
address = unsafe.getLong(address);
}
static void crashInJava3(long address) {
unsafe.putLong(null, address, 42);
System.out.println("wrote value to 0x" + Long.toHexString(address));
}

static void crashInNative1(long address) {
System.out.println("in crashInNative1");
crashInNative2(address);
}
static void crashInNative2(long address) {
System.out.println("in crashInNative2");
crashInNative3(address);
}
static void crashInNative3(long address) {
System.out.println("read value " + unsafe.getLong(address) + " from 0x" + Long.toHexString(address));
}
}

public static void main(String[] args) throws Exception {
run(true);
run(false);
}

/**
* Runs Crasher and tries to force compile the methods in Crasher. The inner
* Runs Crasher in Xcomp mode. The inner
* most method crashes the VM with Unsafe. The resulting hs-err log is
* expected to have a min number of MachCode sections.
*/
public static void main(String[] args) throws Exception {
private static void run(boolean crashInJava) throws Exception {
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
"-Xmx64m", "--add-exports=java.base/jdk.internal.misc=ALL-UNNAMED",
"-XX:-CreateCoredumpOnCrash",
"-Xcomp",
"-XX:-TieredCompilation", // ensure C2 compiles Crasher.method3
"-XX:CompileCommand=compileonly,MachCodeFramesInErrorFile$Crasher.m*",
"-XX:CompileCommand=dontinline,MachCodeFramesInErrorFile$Crasher.m*",
Crasher.class.getName());
"-XX:-TieredCompilation",
"-XX:CompileCommand=compileonly,MachCodeFramesInErrorFile$Crasher.crashIn*",
"-XX:CompileCommand=dontinline,MachCodeFramesInErrorFile$Crasher.crashIn*",
"-XX:CompileCommand=dontinline,*/Unsafe.getLong", // ensures VM call when crashInJava == false
Crasher.class.getName(),
crashInJava ? "crashInJava" : "crashInVM");
OutputAnalyzer output = new OutputAnalyzer(pb.start());

// Extract hs_err_pid file.
@@ -104,11 +136,17 @@ public static void main(String[] args) throws Exception {
throw new RuntimeException("hs_err_pid file missing at " + hsErrPath + ".\n");
}
String hsErr = Files.readString(hsErrPath);
if (!crashedInCrasherMethod(hsErr)) {
return;
if (System.getenv("DEBUG") != null) {
System.err.println(hsErr);
}
Set<String> frames = new HashSet<>();
extractFrames(hsErr, frames, true);
if (!crashInJava) {
// A crash in native will have Java frames in the hs-err log
// as there is a Java frame anchor on the stack.
extractFrames(hsErr, frames, false);
}
List<String> nativeFrames = extractNativeFrames(hsErr);
int compiledJavaFrames = (int) nativeFrames.stream().filter(f -> f.startsWith("J ")).count();
int compiledJavaFrames = (int) frames.stream().filter(f -> f.startsWith("J ")).count();

Matcher matcherDisasm = Pattern.compile("\\[Disassembly\\].*\\[/Disassembly\\]", Pattern.DOTALL).matcher(hsErr);
if (matcherDisasm.find()) {
@@ -118,63 +156,30 @@ public static void main(String[] args) throws Exception {

Matcher matcher = Pattern.compile("\\[MachCode\\]\\s*\\[Verified Entry Point\\]\\s* # \\{method\\} \\{[^}]*\\} '([^']+)' '([^']+)' in '([^']+)'", Pattern.DOTALL).matcher(hsErr);
List<String> machCodeHeaders = matcher.results().map(mr -> String.format("'%s' '%s' in '%s'", mr.group(1), mr.group(2), mr.group(3))).collect(Collectors.toList());
String message = "Mach code headers: " + machCodeHeaders +
"\n\nExtracted MachCode:\n" + extractMachCode(hsErr) +
"\n\nExtracted native frames:\n" + String.join("\n", nativeFrames);
int minExpectedMachCodeSections = Math.max(1, compiledJavaFrames);
Asserts.assertTrue(machCodeHeaders.size() >= minExpectedMachCodeSections, message);
}

/**
* Checks whether the crashing frame is in {@code Crasher.method3}.
*/
private static boolean crashedInCrasherMethod(String hsErr) {
boolean checkProblematicFrame = false;
for (String line : hsErr.split(System.lineSeparator())) {
if (line.startsWith("# Problematic frame:")) {
checkProblematicFrame = true;
} else if (checkProblematicFrame) {
String crasherMethod = Crasher.class.getSimpleName() + ".method3";
if (!line.contains(crasherMethod)) {
// There's any number of things that can subvert the attempt
// to crash in the expected method.
System.out.println("Crashed in method other than " + crasherMethod + "\n\n" + line + "\n\nSkipping rest of test.");
return false;
}
return true;
}
if (machCodeHeaders.size() < minExpectedMachCodeSections) {
Asserts.fail(machCodeHeaders.size() + " < " + minExpectedMachCodeSections);
}
throw new RuntimeException("\"# Problematic frame:\" line missing in hs_err_pid file:\n" + hsErr);
}

/**
* Gets the lines in {@code hsErr} below the line starting with "Native frames:" up to the next blank line.
* Extracts the lines in {@code hsErr} below the line starting with
* "Native frames:" or "Java frames:" up to the next blank line
* and adds them to {@code frames}.
*/
private static List<String> extractNativeFrames(String hsErr) {
List<String> res = new ArrayList<>();
boolean inNativeFrames = false;
private static void extractFrames(String hsErr, Set<String> frames, boolean nativeStack) {
String marker = (nativeStack ? "Native" : "Java") + " frames: ";
boolean seenMarker = false;
for (String line : hsErr.split(System.lineSeparator())) {
if (line.startsWith("Native frames: ")) {
inNativeFrames = true;
} else if (inNativeFrames) {
if (line.startsWith(marker)) {
seenMarker = true;
} else if (seenMarker) {
if (line.trim().isEmpty()) {
return res;
return;
}
res.add(line);
}
}
throw new RuntimeException("\"Native frames:\" line missing in hs_err_pid file:\n" + hsErr);
}

private static String extractMachCode(String hsErr) {
int start = hsErr.indexOf("[MachCode]");
if (start != -1) {
int end = hsErr.lastIndexOf("[/MachCode]");
if (end != -1) {
return hsErr.substring(start, end + "[/MachCode]".length());
frames.add(line);
}
return hsErr.substring(start);
}
return null;
throw new RuntimeException("\"" + marker + "\" line missing in hs_err_pid file:\n" + hsErr);
}
}