From 24a16f9b1bb4d2ba042706f757858d9702c44b1a Mon Sep 17 00:00:00 2001 From: Thomas Schatzl Date: Wed, 8 Sep 2021 14:40:32 +0200 Subject: [PATCH 1/5] Move evac failure handling to young gc --- src/hotspot/share/gc/g1/g1CollectedHeap.cpp | 12 +--- src/hotspot/share/gc/g1/g1CollectedHeap.hpp | 8 +-- .../share/gc/g1/g1CollectedHeap.inline.hpp | 4 -- .../share/gc/g1/g1EvacFailureRegions.cpp | 25 ++++---- .../share/gc/g1/g1EvacFailureRegions.hpp | 10 ++- src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp | 10 +-- src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp | 4 +- src/hotspot/share/gc/g1/g1Policy.cpp | 21 +++---- src/hotspot/share/gc/g1/g1Policy.hpp | 15 +++-- src/hotspot/share/gc/g1/g1YoungCollector.cpp | 62 +++++++++++++------ src/hotspot/share/gc/g1/g1YoungCollector.hpp | 14 +++-- .../gc/g1/g1YoungGCPostEvacuateTasks.cpp | 22 ++----- .../gc/g1/g1YoungGCPostEvacuateTasks.hpp | 10 +-- 13 files changed, 113 insertions(+), 104 deletions(-) diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp index eca8f551e6d90..bffd51d5e4ea5 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp @@ -1754,8 +1754,6 @@ jint G1CollectedHeap::initialize() { _collection_set.initialize(max_reserved_regions()); - _evac_failure_regions.initialize(max_reserved_regions()); - evac_failure_injector()->reset(); G1InitLogger::print(); @@ -2775,10 +2773,6 @@ void G1CollectedHeap::verify_after_young_collection(G1HeapVerifier::G1VerifyType return; } Ticks start = Ticks::now(); - // Inject evacuation failure tag into type if needed. - if (evacuation_failed()) { - type = (G1HeapVerifier::G1VerifyType)(type | G1HeapVerifier::G1VerifyYoungEvacFail); - } if (VerifyRememberedSets) { log_info(gc, verify)("[Verifying RemSets after GC]"); VerifyRegionRemSetClosure v_cl; @@ -2887,7 +2881,7 @@ void G1CollectedHeap::do_collection_pause_at_safepoint_helper(double target_paus bool should_start_concurrent_mark_operation = collector_state()->in_concurrent_start_gc(); // Perform the collection. - G1YoungCollector collector(gc_cause(), target_pause_time_ms, &_evac_failure_regions); + G1YoungCollector collector(gc_cause(), target_pause_time_ms); collector.collect(); // It should now be safe to tell the concurrent mark thread to start @@ -3394,8 +3388,8 @@ void G1CollectedHeap::unregister_nmethod(nmethod* nm) { nm->oops_do(®_cl, true); } -void G1CollectedHeap::update_used_after_gc() { - if (evacuation_failed()) { +void G1CollectedHeap::update_used_after_gc(bool evacuation_failed) { + if (evacuation_failed) { // Reset the G1EvacuationFailureALot counters and flags evac_failure_injector()->reset(); diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp index c3dfb31d013f3..61c8d3c1229dc 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp @@ -34,7 +34,6 @@ #include "gc/g1/g1ConcurrentMark.hpp" #include "gc/g1/g1EdenRegions.hpp" #include "gc/g1/g1EvacStats.hpp" -#include "gc/g1/g1EvacFailureRegions.hpp" #include "gc/g1/g1GCPauseType.hpp" #include "gc/g1/g1HeapTransition.hpp" #include "gc/g1/g1HeapVerifier.hpp" @@ -831,8 +830,6 @@ class G1CollectedHeap : public CollectedHeap { // The parallel task queues G1ScannerTasksQueueSet *_task_queues; - G1EvacFailureRegions _evac_failure_regions; - // ("Weak") Reference processing support. // // G1 has 2 instances of the reference processor class. @@ -1050,9 +1047,6 @@ class G1CollectedHeap : public CollectedHeap { void start_concurrent_gc_for_metadata_allocation(GCCause::Cause gc_cause); - // True iff an evacuation has failed in the most-recent collection. - inline bool evacuation_failed() const; - void remove_from_old_gen_sets(const uint old_regions_removed, const uint archive_regions_removed, const uint humongous_regions_removed); @@ -1306,7 +1300,7 @@ class G1CollectedHeap : public CollectedHeap { // Recalculate amount of used memory after GC. Must be called after all allocation // has finished. - void update_used_after_gc(); + void update_used_after_gc(bool evacuation_failed); // Reset and re-enable the hot card cache. // Note the counts for the cards in the regions in the // collection set are reset when the collection set is freed. diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp index cb7b3d9034ebd..5a46df4e03a95 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp @@ -196,10 +196,6 @@ void G1CollectedHeap::register_optional_region_with_region_attr(HeapRegion* r) { _region_attr.set_optional(r->hrm_index(), r->rem_set()->is_tracked()); } -bool G1CollectedHeap::evacuation_failed() const { - return _evac_failure_regions.num_regions_failed_evacuation() > 0; -} - inline bool G1CollectedHeap::is_in_young(const oop obj) { if (obj == NULL) { return false; diff --git a/src/hotspot/share/gc/g1/g1EvacFailureRegions.cpp b/src/hotspot/share/gc/g1/g1EvacFailureRegions.cpp index a6b7a16ce2816..a8424ece626fb 100644 --- a/src/hotspot/share/gc/g1/g1EvacFailureRegions.cpp +++ b/src/hotspot/share/gc/g1/g1EvacFailureRegions.cpp @@ -25,27 +25,35 @@ #include "precompiled.hpp" #include "gc/g1/g1CollectedHeap.hpp" -#include "gc/g1/g1EvacFailureRegions.hpp" +#include "gc/g1/g1EvacFailureRegions.inline.hpp" #include "gc/g1/heapRegion.hpp" #include "memory/allocation.hpp" #include "runtime/atomic.hpp" - G1EvacFailureRegions::G1EvacFailureRegions() : - _regions_failed_evacuation(mtGC) { -} + _regions_failed_evacuation(mtGC), + _evac_failure_regions(nullptr), + _evac_failure_regions_cur_length(0), + _max_regions(0) { } G1EvacFailureRegions::~G1EvacFailureRegions() { - FREE_C_HEAP_ARRAY(uint, _evac_failure_regions); + assert(_evac_failure_regions == nullptr, "not cleaned up"); } -void G1EvacFailureRegions::initialize(uint max_regions) { +void G1EvacFailureRegions::pre_collection(uint max_regions) { Atomic::store(&_evac_failure_regions_cur_length, 0u); _max_regions = max_regions; _regions_failed_evacuation.resize(_max_regions); _evac_failure_regions = NEW_C_HEAP_ARRAY(uint, _max_regions, mtGC); } +void G1EvacFailureRegions::post_collection() { + _regions_failed_evacuation.resize(0); + FREE_C_HEAP_ARRAY(uint, _evac_failure_regions); + _evac_failure_regions = nullptr; + _max_regions = 0; // To have any record() attempt fail in the future. +} + void G1EvacFailureRegions::par_iterate(HeapRegionClosure* closure, HeapRegionClaimer* _hrclaimer, uint worker_id) { @@ -57,11 +65,6 @@ void G1EvacFailureRegions::par_iterate(HeapRegionClosure* closure, worker_id); } -void G1EvacFailureRegions::reset() { - Atomic::store(&_evac_failure_regions_cur_length, 0u); - _regions_failed_evacuation.clear(); -} - bool G1EvacFailureRegions::contains(uint region_idx) const { assert(region_idx < _max_regions, "must be"); return _regions_failed_evacuation.par_at(region_idx, memory_order_relaxed); diff --git a/src/hotspot/share/gc/g1/g1EvacFailureRegions.hpp b/src/hotspot/share/gc/g1/g1EvacFailureRegions.hpp index c47dcfefa2593..d4aa8346199ba 100644 --- a/src/hotspot/share/gc/g1/g1EvacFailureRegions.hpp +++ b/src/hotspot/share/gc/g1/g1EvacFailureRegions.hpp @@ -47,9 +47,11 @@ class G1EvacFailureRegions { public: G1EvacFailureRegions(); ~G1EvacFailureRegions(); - void initialize(uint max_regions); - void reset(); + // Sets up the bitmap and failed regions array for addition. + void pre_collection(uint max_regions); + // Drops memory for internal data structures, but keep counts. + void post_collection(); bool contains(uint region_idx) const; void par_iterate(HeapRegionClosure* closure, @@ -60,6 +62,10 @@ class G1EvacFailureRegions { return Atomic::load(&_evac_failure_regions_cur_length); } + bool evacuation_failed() const { + return num_regions_failed_evacuation() > 0; + } + // Record that the garbage collection encountered an evacuation failure in the // given region. Returns whether this has been the first occurrence of an evacuation // failure in that region. diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp index bc20609520809..cf3e6938cc038 100644 --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp @@ -457,7 +457,7 @@ double G1GCPhaseTimes::print_evacuate_initial_collection_set() const { return _cur_collection_initial_evac_time_ms + _cur_merge_heap_roots_time_ms; } -double G1GCPhaseTimes::print_post_evacuate_collection_set() const { +double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed) const { const double sum_ms = _cur_collection_nmethod_list_cleanup_time_ms + _recorded_preserve_cm_referents_time_ms + _cur_ref_proc_time_ms + @@ -481,13 +481,13 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set() const { debug_phase(_gc_par_phases[MergePSS], 1); debug_phase(_gc_par_phases[ClearCardTable], 1); debug_phase(_gc_par_phases[RecalculateUsed], 1); - if (G1CollectedHeap::heap()->evacuation_failed()) { + if (evacuation_failed) { debug_phase(_gc_par_phases[RemoveSelfForwardingPtr], 1); } trace_phase(_gc_par_phases[RedirtyCards]); debug_time("Post Evacuate Cleanup 2", _cur_post_evacuate_cleanup_2_time_ms); - if (G1CollectedHeap::heap()->evacuation_failed()) { + if (_gc_par_phases[RestorePreservedMarks]->uninitialized()) { debug_phase(_gc_par_phases[RecalculateUsed], 1); debug_phase(_gc_par_phases[RestorePreservedMarks], 1); } @@ -526,7 +526,7 @@ void G1GCPhaseTimes::print_other(double accounted_ms) const { info_time("Other", _gc_pause_time_ms - accounted_ms); } -void G1GCPhaseTimes::print() { +void G1GCPhaseTimes::print(bool evacuation_failed) { // Check if some time has been recorded for verification and only then print // the message. We do not use Verify*GC here to print because VerifyGCType // further limits actual verification. @@ -538,7 +538,7 @@ void G1GCPhaseTimes::print() { accounted_ms += print_pre_evacuate_collection_set(); accounted_ms += print_evacuate_initial_collection_set(); accounted_ms += print_evacuate_optional_collection_set(); - accounted_ms += print_post_evacuate_collection_set(); + accounted_ms += print_post_evacuate_collection_set(evacuation_failed); print_other(accounted_ms); // See above comment on the _cur_verify_before_time_ms check. diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp index bfe503c30324b..255aab6f96ee9 100644 --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp @@ -227,14 +227,14 @@ class G1GCPhaseTimes : public CHeapObj { double print_merge_heap_roots_time() const; double print_evacuate_initial_collection_set() const; double print_evacuate_optional_collection_set() const; - double print_post_evacuate_collection_set() const; + double print_post_evacuate_collection_set(bool evacuation_failed) const; void print_other(double accounted_ms) const; public: G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads); void record_gc_pause_start(); void record_gc_pause_end(); - void print(); + void print(bool evacuation_failed); static const char* phase_name(GCParPhases phase); // record the time a phase took in seconds diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp index bf40ddf1a1875..180c9ed50bcb9 100644 --- a/src/hotspot/share/gc/g1/g1Policy.cpp +++ b/src/hotspot/share/gc/g1/g1Policy.cpp @@ -629,7 +629,7 @@ double G1Policy::logged_cards_processing_time() const { // Anything below that is considered to be zero #define MIN_TIMER_GRANULARITY 0.0000001 -void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mark) { +void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mark, bool update_stats) { G1GCPhaseTimes* p = phase_times(); double start_time_sec = phase_times()->cur_collection_start_sec(); @@ -638,8 +638,6 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar G1GCPauseType this_pause = collector_state()->young_gc_pause_type(concurrent_operation_is_full_mark); - bool update_stats = should_update_gc_stats(); - if (G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause)) { record_concurrent_mark_init_end(); } else { @@ -668,7 +666,7 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar _analytics->report_alloc_rate_ms(alloc_rate_ms); } - record_pause(this_pause, start_time_sec, end_time_sec); + record_pause(this_pause, start_time_sec, end_time_sec, update_stats); if (G1GCPauseTypeHelper::is_last_young_pause(this_pause)) { assert(!G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause), @@ -891,9 +889,9 @@ void G1Policy::report_ihop_statistics() { _ihop_control->print(); } -void G1Policy::record_young_gc_pause_end() { +void G1Policy::record_young_gc_pause_end(bool evacuation_failed) { phase_times()->record_gc_pause_end(); - phase_times()->print(); + phase_times()->print(evacuation_failed); } double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards, @@ -1176,12 +1174,6 @@ void G1Policy::maybe_start_marking() { } } -bool G1Policy::should_update_gc_stats() { - // Evacuation failures skew the timing too much to be considered for statistics updates. - // We make the assumption that these are rare. - return !_g1h->evacuation_failed(); -} - void G1Policy::update_gc_pause_time_ratios(G1GCPauseType gc_type, double start_time_sec, double end_time_sec) { double pause_time_sec = end_time_sec - start_time_sec; @@ -1199,13 +1191,14 @@ void G1Policy::update_gc_pause_time_ratios(G1GCPauseType gc_type, double start_t void G1Policy::record_pause(G1GCPauseType gc_type, double start, - double end) { + double end, + bool should_update_gc_pause_time_ratios) { // Manage the MMU tracker. For some reason it ignores Full GCs. if (gc_type != G1GCPauseType::FullGC) { _mmu_tracker->add_pause(start, end); } - if (should_update_gc_stats()) { + if (should_update_gc_pause_time_ratios) { update_gc_pause_time_ratios(gc_type, start, end); } diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp index f9c2030470422..c6e37706a56a5 100644 --- a/src/hotspot/share/gc/g1/g1Policy.hpp +++ b/src/hotspot/share/gc/g1/g1Policy.hpp @@ -268,10 +268,13 @@ class G1Policy: public CHeapObj { void maybe_start_marking(); // Manage time-to-mixed tracking. void update_time_to_mixed_tracking(G1GCPauseType gc_type, double start, double end); - // Record the given STW pause with the given start and end times (in s). - void record_pause(G1GCPauseType gc_type, double start, double end); - - bool should_update_gc_stats(); + // Record the given STW pause with the given start and end times (in s). Optionally + // update pause time ratios - in some situations (evacuation failure) we do not want + // to do this since timings of these garbage collections are very skewed. + void record_pause(G1GCPauseType gc_type, + double start, + double end, + bool should_update_gc_pause_time_ratios = true); void update_gc_pause_time_ratios(G1GCPauseType gc_type, double start_sec, double end_sec); @@ -303,7 +306,7 @@ class G1Policy: public CHeapObj { // Record the start and end of the young gc pause. void record_young_gc_pause_start(); - void record_young_gc_pause_end(); + void record_young_gc_pause_end(bool evacuation_failed); bool need_to_start_conc_mark(const char* source, size_t alloc_word_size = 0); @@ -313,7 +316,7 @@ class G1Policy: public CHeapObj { // Record the start and end of the actual collection part of the evacuation pause. void record_young_collection_start(); - void record_young_collection_end(bool concurrent_operation_is_full_mark); + void record_young_collection_end(bool concurrent_operation_is_full_mark, bool update_stats); // Record the start and end of a full collection. void record_full_collection_start(); diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.cpp b/src/hotspot/share/gc/g1/g1YoungCollector.cpp index 483b1eb2d050b..85314530ba53d 100644 --- a/src/hotspot/share/gc/g1/g1YoungCollector.cpp +++ b/src/hotspot/share/gc/g1/g1YoungCollector.cpp @@ -104,6 +104,8 @@ void G1YoungCollector::reset_taskqueue_stats() { // The code relies on the fact that GCTraceTimeWrapper stores the string passed // initially as a reference only, so that we can modify it as needed. class G1YoungGCTraceTime { + G1YoungCollector* _collector; + G1GCPauseType _pause_type; GCCause::Cause _pause_cause; @@ -118,16 +120,17 @@ class G1YoungGCTraceTime { "Pause Young (%s) (%s)%s", G1GCPauseTypeHelper::to_string(_pause_type), GCCause::to_string(_pause_cause), - G1CollectedHeap::heap()->evacuation_failed() ? " (Evacuation Failure)" : ""); + _collector->evacuation_failed() ? " (Evacuation Failure)" : ""); return _young_gc_name_data; } public: - G1YoungGCTraceTime(GCCause::Cause cause) : + G1YoungGCTraceTime(G1YoungCollector* collector, GCCause::Cause cause) : + _collector(collector), // Take snapshot of current pause type at start as it may be modified during gc. // The strings for all Concurrent Start pauses are the same, so the parameter // does not matter here. - _pause_type(G1CollectedHeap::heap()->collector_state()->young_gc_pause_type(false /* concurrent_operation_is_full_mark */)), + _pause_type(_collector->collector_state()->young_gc_pause_type(false /* concurrent_operation_is_full_mark */)), _pause_cause(cause), // Fake a "no cause" and manually add the correct string in update_young_gc_name() // to make the string look more natural. @@ -140,9 +143,16 @@ class G1YoungGCTraceTime { }; class G1YoungGCNotifyPauseMark : public StackObj { + G1YoungCollector* _collector; + public: - G1YoungGCNotifyPauseMark() { G1CollectedHeap::heap()->policy()->record_young_gc_pause_start(); } - ~G1YoungGCNotifyPauseMark() { G1CollectedHeap::heap()->policy()->record_young_gc_pause_end(); } + G1YoungGCNotifyPauseMark(G1YoungCollector* collector) : _collector(collector) { + G1CollectedHeap::heap()->policy()->record_young_gc_pause_start(); + } + + ~G1YoungGCNotifyPauseMark() { + G1CollectedHeap::heap()->policy()->record_young_gc_pause_end(_collector->evacuation_failed()); + } }; class G1YoungGCJFRTracerMark : public G1JFRTracerMark { @@ -170,6 +180,7 @@ class G1YoungGCJFRTracerMark : public G1JFRTracerMark { }; class G1YoungGCVerifierMark : public StackObj { + G1YoungCollector* _collector; G1HeapVerifier::G1VerifyType _type; static G1HeapVerifier::G1VerifyType young_collection_verify_type() { @@ -184,12 +195,17 @@ class G1YoungGCVerifierMark : public StackObj { } public: - G1YoungGCVerifierMark() : _type(young_collection_verify_type()) { + G1YoungGCVerifierMark(G1YoungCollector* collector) : _collector(collector), _type(young_collection_verify_type()) { G1CollectedHeap::heap()->verify_before_young_collection(_type); } ~G1YoungGCVerifierMark() { - G1CollectedHeap::heap()->verify_after_young_collection(_type); + // Inject evacuation failure tag into type if needed. + G1HeapVerifier::G1VerifyType type = _type; + if (_collector->evacuation_failed()) { + type = (G1HeapVerifier::G1VerifyType)(type | G1HeapVerifier::G1VerifyYoungEvacFail); + } + G1CollectedHeap::heap()->verify_after_young_collection(type); } }; @@ -498,7 +514,7 @@ void G1YoungCollector::pre_evacuate_collection_set(G1EvacuationInfo* evacuation_ // reference processing currently works in G1. ref_processor_stw()->start_discovery(false /* always_clear */); - _evac_failure_regions->reset(); + _evac_failure_regions.pre_collection(_g1h->max_reserved_regions()); _g1h->gc_prologue(false); @@ -794,7 +810,7 @@ void G1YoungCollector::evacuate_next_optional_regions(G1ParScanThreadStateSet* p void G1YoungCollector::evacuate_optional_collection_set(G1ParScanThreadStateSet* per_thread_states) { const double collection_start_time_ms = phase_times()->cur_collection_start_sec() * 1000.0; - while (!_g1h->evacuation_failed() && collection_set()->optional_region_length() > 0) { + while (!evacuation_failed() && collection_set()->optional_region_length() > 0) { double time_used_ms = os::elapsedTime() * 1000.0 - collection_start_time_ms; double time_left_ms = MaxGCPauseMillis - time_used_ms; @@ -955,7 +971,7 @@ bool G1STWIsAliveClosure::do_object_b(oop p) { void G1YoungCollector::post_evacuate_cleanup_1(G1ParScanThreadStateSet* per_thread_states) { Ticks start = Ticks::now(); { - G1PostEvacuateCollectionSetCleanupTask1 cl(per_thread_states, _evac_failure_regions); + G1PostEvacuateCollectionSetCleanupTask1 cl(per_thread_states, &_evac_failure_regions); _g1h->run_batch_task(&cl); } phase_times()->record_post_evacuate_cleanup_task_1_time((Ticks::now() - start).seconds() * 1000.0); @@ -965,7 +981,7 @@ void G1YoungCollector::post_evacuate_cleanup_2(G1ParScanThreadStateSet* per_thre G1EvacuationInfo* evacuation_info) { Ticks start = Ticks::now(); { - G1PostEvacuateCollectionSetCleanupTask2 cl(per_thread_states, evacuation_info, _evac_failure_regions); + G1PostEvacuateCollectionSetCleanupTask2 cl(per_thread_states, evacuation_info, &_evac_failure_regions); _g1h->run_batch_task(&cl); } phase_times()->record_post_evacuate_cleanup_task_2_time((Ticks::now() - start).seconds() * 1000.0); @@ -993,6 +1009,8 @@ void G1YoungCollector::post_evacuate_collection_set(G1EvacuationInfo* evacuation post_evacuate_cleanup_2(per_thread_states, evacuation_info); + _evac_failure_regions.post_collection(); + assert_used_and_recalculate_used_equal(_g1h); _g1h->rebuild_free_region_list(); @@ -1011,6 +1029,10 @@ void G1YoungCollector::post_evacuate_collection_set(G1EvacuationInfo* evacuation _g1h->expand_heap_after_young_collection(); } +bool G1YoungCollector::evacuation_failed() const { + return _evac_failure_regions.evacuation_failed(); +} + class G1PreservedMarksSet : public PreservedMarksSet { public: @@ -1025,13 +1047,12 @@ class G1PreservedMarksSet : public PreservedMarksSet { }; G1YoungCollector::G1YoungCollector(GCCause::Cause gc_cause, - double target_pause_time_ms, - G1EvacFailureRegions* evac_failure_regions) : + double target_pause_time_ms) : _g1h(G1CollectedHeap::heap()), _gc_cause(gc_cause), _target_pause_time_ms(target_pause_time_ms), _concurrent_operation_is_full_mark(false), - _evac_failure_regions(evac_failure_regions) + _evac_failure_regions() { } @@ -1042,7 +1063,7 @@ void G1YoungCollector::collect() { // The G1YoungGCTraceTime message depends on collector state, so must come after // determining collector state. - G1YoungGCTraceTime tm(_gc_cause); + G1YoungGCTraceTime tm(this, _gc_cause); // JFR G1YoungGCJFRTracerMark jtm(gc_timer_stw(), gc_tracer_stw(), _gc_cause); @@ -1054,7 +1075,7 @@ void G1YoungCollector::collect() { // heap information printed as last part of detailed GC log. G1HeapPrinterMark hpm(_g1h); // Young GC internal pause timing - G1YoungGCNotifyPauseMark npm; + G1YoungGCNotifyPauseMark npm(this); // Verification may use the gang workers, so they must be set up before. // Individual parallel phases may override this. @@ -1065,7 +1086,7 @@ void G1YoungCollector::collect() { // just to do that). wait_for_root_region_scanning(); - G1YoungGCVerifierMark vm; + G1YoungGCVerifierMark vm(this); { // Actual collection work starts and is executed (only) in this scope. @@ -1084,7 +1105,8 @@ void G1YoungCollector::collect() { workers()->active_workers(), collection_set()->young_region_length(), collection_set()->optional_region_length(), - _evac_failure_regions); + &_evac_failure_regions); + pre_evacuate_collection_set(jtm.evacuation_info(), &per_thread_states); bool may_do_optional_evacuation = collection_set()->optional_region_length() != 0; @@ -1104,7 +1126,9 @@ void G1YoungCollector::collect() { // modifies it to the next state. jtm.report_pause_type(collector_state()->young_gc_pause_type(_concurrent_operation_is_full_mark)); - policy()->record_young_collection_end(_concurrent_operation_is_full_mark); + // Evacuation failures skew the timing too much to be considered for some statistics updates. + // We make the assumption that these are rare. + policy()->record_young_collection_end(_concurrent_operation_is_full_mark, !evacuation_failed()); } TASKQUEUE_STATS_ONLY(print_taskqueue_stats()); TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.hpp b/src/hotspot/share/gc/g1/g1YoungCollector.hpp index f4e6b93323132..70c0662f9e929 100644 --- a/src/hotspot/share/gc/g1/g1YoungCollector.hpp +++ b/src/hotspot/share/gc/g1/g1YoungCollector.hpp @@ -25,6 +25,7 @@ #ifndef SHARE_GC_G1_G1YOUNGCOLLECTOR_HPP #define SHARE_GC_G1_G1YOUNGCOLLECTOR_HPP +#include "gc/g1/g1EvacFailureRegions.hpp" #include "gc/g1/g1YoungGCEvacFailureInjector.hpp" #include "gc/shared/gcCause.hpp" #include "gc/shared/taskqueue.hpp" @@ -37,7 +38,6 @@ class G1CollectedHeap; class G1CollectionSet; class G1CollectorState; class G1ConcurrentMark; -class G1EvacFailureRegions; class G1EvacuationInfo; class G1GCPhaseTimes; class G1HotCardCache; @@ -56,6 +56,9 @@ class WorkGang; class outputStream; class G1YoungCollector { + friend class G1YoungGCNotifyPauseMark; + friend class G1YoungGCTraceTime; + friend class G1YoungGCVerifierMark; G1CollectedHeap* _g1h; @@ -82,6 +85,9 @@ class G1YoungCollector { bool _concurrent_operation_is_full_mark; + // Evacuation failure tracking. + G1EvacFailureRegions _evac_failure_regions; + // Runs the given AbstractGangTask with the current active workers, // returning the total time taken. Tickspan run_task_timed(AbstractGangTask* task); @@ -124,7 +130,8 @@ class G1YoungCollector { void post_evacuate_collection_set(G1EvacuationInfo* evacuation_info, G1ParScanThreadStateSet* per_thread_states); - G1EvacFailureRegions* _evac_failure_regions; + // True iff an evacuation has failed in the most-recent collection. + bool evacuation_failed() const; #if TASKQUEUE_STATS uint num_task_queues() const; @@ -136,8 +143,7 @@ class G1YoungCollector { public: G1YoungCollector(GCCause::Cause gc_cause, - double target_pause_time_ms, - G1EvacFailureRegions* evac_failure_regions); + double target_pause_time_ms); void collect(); bool concurrent_operation_is_full_mark() const { return _concurrent_operation_is_full_mark; } diff --git a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp index 51a3a2acd4228..5289f93ec34fa 100644 --- a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp +++ b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp @@ -44,11 +44,11 @@ G1PostEvacuateCollectionSetCleanupTask1::G1PostEvacuateCollectionSetCleanupTask1 G1BatchedGangTask("Post Evacuate Cleanup 1", G1CollectedHeap::heap()->phase_times()) { add_serial_task(new MergePssTask(per_thread_states)); - add_serial_task(new RecalculateUsedTask()); + add_serial_task(new RecalculateUsedTask(evac_failure_regions->evacuation_failed())); if (SampleCollectionSetCandidatesTask::should_execute()) { add_serial_task(new SampleCollectionSetCandidatesTask()); } - if (RemoveSelfForwardPtrsTask::should_execute()) { + if (evac_failure_regions->evacuation_failed()) { add_parallel_task(new RemoveSelfForwardPtrsTask(per_thread_states->rdcqs(), evac_failure_regions)); } add_parallel_task(G1CollectedHeap::heap()->rem_set()->create_cleanup_after_scan_heap_roots_task()); @@ -64,11 +64,11 @@ void G1PostEvacuateCollectionSetCleanupTask1::MergePssTask::do_work(uint worker_ double G1PostEvacuateCollectionSetCleanupTask1::RecalculateUsedTask::worker_cost() const { // If there is no evacuation failure, the work to perform is minimal. - return G1CollectedHeap::heap()->evacuation_failed() ? 1.0 : AlmostNoWork; + return _evacuation_failed ? 1.0 : AlmostNoWork; } void G1PostEvacuateCollectionSetCleanupTask1::RecalculateUsedTask::do_work(uint worker_id) { - G1CollectedHeap::heap()->update_used_after_gc(); + G1CollectedHeap::heap()->update_used_after_gc(_evacuation_failed); } bool G1PostEvacuateCollectionSetCleanupTask1::SampleCollectionSetCandidatesTask::should_execute() { @@ -97,10 +97,6 @@ void G1PostEvacuateCollectionSetCleanupTask1::SampleCollectionSetCandidatesTask: g1h->set_collection_set_candidates_stats(cl._total); } -bool G1PostEvacuateCollectionSetCleanupTask1::RemoveSelfForwardPtrsTask::should_execute() { - return G1CollectedHeap::heap()->evacuation_failed(); -} - G1PostEvacuateCollectionSetCleanupTask1:: RemoveSelfForwardPtrsTask::RemoveSelfForwardPtrsTask(G1RedirtyCardsQueueSet* rdcqs, G1EvacFailureRegions* evac_failure_regions) : @@ -109,14 +105,13 @@ G1PostEvacuateCollectionSetCleanupTask1:: _evac_failure_regions(evac_failure_regions) { } G1PostEvacuateCollectionSetCleanupTask1::RemoveSelfForwardPtrsTask::~RemoveSelfForwardPtrsTask() { - G1CollectedHeap* g1h = G1CollectedHeap::heap(); assert(_task.num_failed_regions() == _evac_failure_regions->num_regions_failed_evacuation(), "Removed regions %u inconsistent with expected %u", _task.num_failed_regions(), _evac_failure_regions->num_regions_failed_evacuation()); } double G1PostEvacuateCollectionSetCleanupTask1::RemoveSelfForwardPtrsTask::worker_cost() const { - assert(should_execute(), "Should not call this if not executed"); + assert(_evac_failure_regions->evacuation_failed(), "Should not call this if not executed"); return _evac_failure_regions->num_regions_failed_evacuation(); } @@ -271,10 +266,6 @@ G1PostEvacuateCollectionSetCleanupTask2::EagerlyReclaimHumongousObjectsTask::~Ea g1h->decrement_summary_bytes(_bytes_freed); } -bool G1PostEvacuateCollectionSetCleanupTask2::RestorePreservedMarksTask::should_execute() { - return G1CollectedHeap::heap()->evacuation_failed(); -} - G1PostEvacuateCollectionSetCleanupTask2::RestorePreservedMarksTask::RestorePreservedMarksTask(PreservedMarksSet* preserved_marks) : G1AbstractSubTask(G1GCPhaseTimes::RestorePreservedMarks), _preserved_marks(preserved_marks), _task(preserved_marks->create_task()) { } @@ -283,7 +274,6 @@ G1PostEvacuateCollectionSetCleanupTask2::RestorePreservedMarksTask::~RestorePres } double G1PostEvacuateCollectionSetCleanupTask2::RestorePreservedMarksTask::worker_cost() const { - assert(should_execute(), "Should not call this if not executed"); return _preserved_marks->num(); } @@ -635,7 +625,7 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2 add_serial_task(new EagerlyReclaimHumongousObjectsTask()); } - if (RestorePreservedMarksTask::should_execute()) { + if (evac_failure_regions->evacuation_failed()) { add_parallel_task(new RestorePreservedMarksTask(per_thread_states->preserved_marks_set())); } add_parallel_task(new RedirtyLoggedCardsTask(per_thread_states->rdcqs(), evac_failure_regions)); diff --git a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp index c06cfe4599763..2754dd80c6298 100644 --- a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp +++ b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp @@ -64,8 +64,12 @@ class G1PostEvacuateCollectionSetCleanupTask1::MergePssTask : public G1AbstractS }; class G1PostEvacuateCollectionSetCleanupTask1::RecalculateUsedTask : public G1AbstractSubTask { + bool _evacuation_failed; + public: - RecalculateUsedTask() : G1AbstractSubTask(G1GCPhaseTimes::RecalculateUsed) { } + RecalculateUsedTask(bool evacuation_failed) : + G1AbstractSubTask(G1GCPhaseTimes::RecalculateUsed), + _evacuation_failed(evacuation_failed) { } double worker_cost() const override; void do_work(uint worker_id) override; @@ -89,8 +93,6 @@ class G1PostEvacuateCollectionSetCleanupTask1::RemoveSelfForwardPtrsTask : publi RemoveSelfForwardPtrsTask(G1RedirtyCardsQueueSet* rdcqs, G1EvacFailureRegions* evac_failure_regions); ~RemoveSelfForwardPtrsTask(); - static bool should_execute(); - double worker_cost() const override; void do_work(uint worker_id) override; }; @@ -169,8 +171,6 @@ class G1PostEvacuateCollectionSetCleanupTask2::RestorePreservedMarksTask : publi RestorePreservedMarksTask(PreservedMarksSet* preserved_marks); virtual ~RestorePreservedMarksTask(); - static bool should_execute(); - double worker_cost() const override; void do_work(uint worker_id) override; }; From 1a636a0a5f04217872d9cbb1f43e7d4238ca14d5 Mon Sep 17 00:00:00 2001 From: Thomas Schatzl Date: Mon, 20 Sep 2021 13:33:05 +0200 Subject: [PATCH 2/5] sjohanss review --- src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp index cf3e6938cc038..c77dd7196f540 100644 --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp @@ -487,7 +487,7 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed trace_phase(_gc_par_phases[RedirtyCards]); debug_time("Post Evacuate Cleanup 2", _cur_post_evacuate_cleanup_2_time_ms); - if (_gc_par_phases[RestorePreservedMarks]->uninitialized()) { + if (evacuation_failed) { debug_phase(_gc_par_phases[RecalculateUsed], 1); debug_phase(_gc_par_phases[RestorePreservedMarks], 1); } From 466f499854868bb6af27b32a0d8c54f22a3b8041 Mon Sep 17 00:00:00 2001 From: Thomas Schatzl Date: Fri, 24 Sep 2021 14:41:32 +0200 Subject: [PATCH 3/5] ayang review --- src/hotspot/share/gc/g1/g1Policy.cpp | 6 +++++- src/hotspot/share/gc/g1/g1Policy.hpp | 2 +- src/hotspot/share/gc/g1/g1YoungCollector.cpp | 4 +--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp index 180c9ed50bcb9..b3cb2a71faf51 100644 --- a/src/hotspot/share/gc/g1/g1Policy.cpp +++ b/src/hotspot/share/gc/g1/g1Policy.cpp @@ -629,7 +629,7 @@ double G1Policy::logged_cards_processing_time() const { // Anything below that is considered to be zero #define MIN_TIMER_GRANULARITY 0.0000001 -void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mark, bool update_stats) { +void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mark, bool evacuation_failure) { G1GCPhaseTimes* p = phase_times(); double start_time_sec = phase_times()->cur_collection_start_sec(); @@ -652,6 +652,10 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar app_time_ms = 1.0; } + // Evacuation failures skew the timing too much to be considered for some statistics updates. + // We make the assumption that these are rare. + bool update_stats = !evacuation_failure; + if (update_stats) { // We maintain the invariant that all objects allocated by mutator // threads will be allocated out of eden regions. So, we can use diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp index c6e37706a56a5..b964b77ec504b 100644 --- a/src/hotspot/share/gc/g1/g1Policy.hpp +++ b/src/hotspot/share/gc/g1/g1Policy.hpp @@ -316,7 +316,7 @@ class G1Policy: public CHeapObj { // Record the start and end of the actual collection part of the evacuation pause. void record_young_collection_start(); - void record_young_collection_end(bool concurrent_operation_is_full_mark, bool update_stats); + void record_young_collection_end(bool concurrent_operation_is_full_mark, bool evacuation_failure); // Record the start and end of a full collection. void record_full_collection_start(); diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.cpp b/src/hotspot/share/gc/g1/g1YoungCollector.cpp index 88c0ce7601de4..13296026a3462 100644 --- a/src/hotspot/share/gc/g1/g1YoungCollector.cpp +++ b/src/hotspot/share/gc/g1/g1YoungCollector.cpp @@ -1127,9 +1127,7 @@ void G1YoungCollector::collect() { // modifies it to the next state. jtm.report_pause_type(collector_state()->young_gc_pause_type(_concurrent_operation_is_full_mark)); - // Evacuation failures skew the timing too much to be considered for some statistics updates. - // We make the assumption that these are rare. - policy()->record_young_collection_end(_concurrent_operation_is_full_mark, !evacuation_failed()); + policy()->record_young_collection_end(_concurrent_operation_is_full_mark, evacuation_failed()); } TASKQUEUE_STATS_ONLY(print_taskqueue_stats()); TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); From 3c27f705dd9c74a75f9a7f5f0aadd2d1a929558d Mon Sep 17 00:00:00 2001 From: Thomas Schatzl Date: Fri, 24 Sep 2021 14:49:23 +0200 Subject: [PATCH 4/5] ayang review (2) --- src/hotspot/share/gc/g1/g1Policy.cpp | 6 +++--- src/hotspot/share/gc/g1/g1Policy.hpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp index b3cb2a71faf51..ef704c2240d47 100644 --- a/src/hotspot/share/gc/g1/g1Policy.cpp +++ b/src/hotspot/share/gc/g1/g1Policy.cpp @@ -670,7 +670,7 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar _analytics->report_alloc_rate_ms(alloc_rate_ms); } - record_pause(this_pause, start_time_sec, end_time_sec, update_stats); + record_pause(this_pause, start_time_sec, end_time_sec, evacuation_failure); if (G1GCPauseTypeHelper::is_last_young_pause(this_pause)) { assert(!G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause), @@ -1196,13 +1196,13 @@ void G1Policy::update_gc_pause_time_ratios(G1GCPauseType gc_type, double start_t void G1Policy::record_pause(G1GCPauseType gc_type, double start, double end, - bool should_update_gc_pause_time_ratios) { + bool evacuation_failure) { // Manage the MMU tracker. For some reason it ignores Full GCs. if (gc_type != G1GCPauseType::FullGC) { _mmu_tracker->add_pause(start, end); } - if (should_update_gc_pause_time_ratios) { + if (!evacuation_failure) { update_gc_pause_time_ratios(gc_type, start, end); } diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp index b964b77ec504b..e3fb16b556662 100644 --- a/src/hotspot/share/gc/g1/g1Policy.hpp +++ b/src/hotspot/share/gc/g1/g1Policy.hpp @@ -274,7 +274,7 @@ class G1Policy: public CHeapObj { void record_pause(G1GCPauseType gc_type, double start, double end, - bool should_update_gc_pause_time_ratios = true); + bool evacuation_failure = false); void update_gc_pause_time_ratios(G1GCPauseType gc_type, double start_sec, double end_sec); From 1326464b206d67d9fc2dec637cf0f0b04b4a2f15 Mon Sep 17 00:00:00 2001 From: Thomas Schatzl Date: Fri, 24 Sep 2021 15:00:08 +0200 Subject: [PATCH 5/5] Fix comment --- src/hotspot/share/gc/g1/g1Policy.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp index e3fb16b556662..4a4efc0b32b54 100644 --- a/src/hotspot/share/gc/g1/g1Policy.hpp +++ b/src/hotspot/share/gc/g1/g1Policy.hpp @@ -268,9 +268,7 @@ class G1Policy: public CHeapObj { void maybe_start_marking(); // Manage time-to-mixed tracking. void update_time_to_mixed_tracking(G1GCPauseType gc_type, double start, double end); - // Record the given STW pause with the given start and end times (in s). Optionally - // update pause time ratios - in some situations (evacuation failure) we do not want - // to do this since timings of these garbage collections are very skewed. + // Record the given STW pause with the given start and end times (in s). void record_pause(G1GCPauseType gc_type, double start, double end,