diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp index 7f9da69ab5e..1dfcae87bad 100644 --- a/src/hotspot/share/runtime/thread.cpp +++ b/src/hotspot/share/runtime/thread.cpp @@ -1491,8 +1491,8 @@ void WatcherThread::run() { os::die(); } - // Wait a second, then recheck for timeout. - os::naked_short_sleep(999); + // Wait a bit, then recheck for timeout. + os::naked_short_sleep(250); } } diff --git a/src/hotspot/share/utilities/vmError.cpp b/src/hotspot/share/utilities/vmError.cpp index 4c0b285044f..350498181d2 100644 --- a/src/hotspot/share/utilities/vmError.cpp +++ b/src/hotspot/share/utilities/vmError.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, Red Hat, Inc. and/or its affiliates. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1689,42 +1690,71 @@ void VMError::show_message_box(char *buf, int buflen) { } while (yes); } -// Timeout handling: check if a timeout happened (either a single step did -// timeout or the whole of error reporting hit ErrorLogTimeout). Interrupt -// the reporting thread if that is the case. +// Fatal error handling is subject to several timeouts: +// - a global timeout (controlled via ErrorLogTimeout) +// - local error reporting step timeouts. +// +// The latter aims to "give the JVM a kick" if it gets stuck in one particular place during +// error reporting. This prevents one error reporting step from hogging all the time allotted +// to error reporting under ErrorLogTimeout. +// +// VMError::check_timeout() is called from the watcher thread and checks for either global +// or step timeout. If a timeout happened, we interrupt the reporting thread and set either +// _reporting_did_timeout or _step_did_timeout to signal which timeout fired. Function returns +// true if the *global* timeout fired, which will cause WatcherThread to shut down the JVM +// immediately. bool VMError::check_timeout() { + // This function is supposed to be called from watcher thread during fatal error handling only. + assert(VMError::is_error_reported(), "Only call during error handling"); + assert(Thread::current()->is_Watcher_thread(), "Only call from watcher thread"); + if (ErrorLogTimeout == 0) { return false; } - // Do not check for timeouts if we still have a message box to show to the - // user or if there are OnError handlers to be run. - if (ShowMessageBoxOnError - || (OnError != NULL && OnError[0] != '\0') - || Arguments::abort_hook() != NULL) { - return false; - } + // There are three situations where we suppress the *global* error timeout: + // - if the JVM is embedded and the launcher has its abort hook installed. + // That must be allowed to run. + // - if the user specified one or more OnError commands to run, and these + // did not yet run. These must have finished. + // - if the user (typically developer) specified ShowMessageBoxOnError, + // and the error box has not yet been shown + const bool ignore_global_timeout = + (ShowMessageBoxOnError + || (OnError != NULL && OnError[0] != '\0') + || Arguments::abort_hook() != NULL); - const jlong reporting_start_time_l = get_reporting_start_time(); const jlong now = get_current_timestamp(); - // Timestamp is stored in nanos. - if (reporting_start_time_l > 0) { - const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR; - if (end <= now) { - _reporting_did_timeout = true; - interrupt_reporting_thread(); - return true; // global timeout + + // Global timeout hit? + if (!ignore_global_timeout) { + const jlong reporting_start_time = get_reporting_start_time(); + // Timestamp is stored in nanos. + if (reporting_start_time > 0) { + const jlong end = reporting_start_time + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR; + if (end <= now && !_reporting_did_timeout) { + // We hit ErrorLogTimeout and we haven't interrupted the reporting + // thread yet. + _reporting_did_timeout = true; + interrupt_reporting_thread(); + return true; // global timeout + } } } - const jlong step_start_time_l = get_step_start_time(); - if (step_start_time_l > 0) { + // Reporting step timeout? + const jlong step_start_time = get_step_start_time(); + if (step_start_time > 0) { // A step times out after a quarter of the total timeout. Steps are mostly fast unless they // hang for some reason, so this simple rule allows for three hanging step and still // hopefully leaves time enough for the rest of the steps to finish. - const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4; - if (end <= now) { + const int max_step_timeout_secs = 5; + const jlong timeout_duration = MAX2((jlong)max_step_timeout_secs, (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4); + const jlong end = step_start_time + timeout_duration; + if (end <= now && !_step_did_timeout) { + // The step timed out and we haven't interrupted the reporting + // thread yet. _step_did_timeout = true; interrupt_reporting_thread(); return false; // (Not a global timeout) diff --git a/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java b/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java index 51ae88f31c5..dd2c782e035 100644 --- a/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java +++ b/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, Red Hat, Inc. and/or its affiliates. + * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +26,9 @@ import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.regex.Pattern; import jdk.test.lib.process.OutputAnalyzer; @@ -32,7 +36,7 @@ import jdk.test.lib.process.ProcessTools; /* - * @test + * @test id=default * @bug 8166944 * @summary Hanging Error Reporting steps may lead to torn error logs * @modules java.base/jdk.internal.misc @@ -41,6 +45,16 @@ * @author Thomas Stuefe (SAP) */ +/* + * @test id=with-on-error + * @bug 8303861 + * @summary Error handling step timeouts should never be blocked by OnError etc. + * @modules java.base/jdk.internal.misc + * @library /test/lib + * @requires (vm.debug == true) & (os.family != "windows") + * @run driver TimeoutInErrorHandlingTest with-on-error + */ + public class TimeoutInErrorHandlingTest { @@ -67,14 +81,28 @@ public static void main(String[] args) throws Exception { * little timeout messages to see that repeated timeout handling is basically working. */ - ProcessBuilder pb = ProcessTools.createJavaProcessBuilder( - "-XX:+UnlockDiagnosticVMOptions", - "-Xmx100M", - "-XX:ErrorHandlerTest=14", - "-XX:+TestUnresponsiveErrorHandler", - "-XX:ErrorLogTimeout=16", // 16 seconds big timeout = 4 seconds per little timeout - "-XX:-CreateCoredumpOnCrash", - "-version"); + boolean withOnError = false; + + if (args.length > 0) { + switch (args[0]) { + case "with-on-error": withOnError = true; break; + default: throw new RuntimeException("Invalid argument " + args[1]); + } + } + + List arguments = new ArrayList<>(); + Collections.addAll(arguments, + "-XX:+UnlockDiagnosticVMOptions", + "-Xmx100M", + "-XX:ErrorHandlerTest=14", + "-XX:+TestUnresponsiveErrorHandler", + "-XX:ErrorLogTimeout=" + ERROR_LOG_TIMEOUT, + "-XX:-CreateCoredumpOnCrash"); + if (withOnError) { + arguments.add("-XX:OnError=echo hi"); + } + arguments.add("-version"); + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(arguments); OutputAnalyzer output_detail = new OutputAnalyzer(pb.start()); @@ -82,8 +110,10 @@ public static void main(String[] args) throws Exception { output_detail.shouldMatch("# A fatal error has been detected by the Java Runtime Environment:.*"); output_detail.shouldMatch("# +(?:SIGSEGV|SIGBUS|EXCEPTION_ACCESS_VIOLATION).*"); - // VM should have been aborted by WatcherThread - output_detail.shouldMatch(".*timer expired, abort.*"); + // Unless we specified OnError, VM should have been aborted by WatcherThread + if (!withOnError) { + output_detail.shouldMatch(".*timer expired, abort.*"); + } // extract hs-err file String hs_err_file = output_detail.firstMatch("# *(\\S*hs_err_pid\\d+\\.log)", 1);