diff --git a/src/hotspot/share/gc/g1/g1Analytics.cpp b/src/hotspot/share/gc/g1/g1Analytics.cpp index a36e7fb131e..d57c70a9d52 100644 --- a/src/hotspot/share/gc/g1/g1Analytics.cpp +++ b/src/hotspot/share/gc/g1/g1Analytics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -79,7 +79,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) : _prev_collection_pause_end_ms(0.0), _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _dirtied_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), _mixed_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), _young_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -107,7 +107,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) : // Start with inverse of maximum STW cost. _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]); // Some applications have very low rates for logging cards. - _logged_cards_rate_ms_seq->add(0.0); + _dirtied_cards_rate_ms_seq->add(0.0); _young_card_merge_to_scan_ratio_seq->add(young_card_merge_to_scan_ratio_defaults[index]); _young_cost_per_card_scan_ms_seq->add(young_only_cost_per_card_scan_ms_defaults[index]); @@ -168,8 +168,8 @@ void G1Analytics::report_concurrent_refine_rate_ms(double cards_per_ms) { _concurrent_refine_rate_ms_seq->add(cards_per_ms); } -void G1Analytics::report_logged_cards_rate_ms(double cards_per_ms) { - _logged_cards_rate_ms_seq->add(cards_per_ms); +void G1Analytics::report_dirtied_cards_rate_ms(double cards_per_ms) { + _dirtied_cards_rate_ms_seq->add(cards_per_ms); } void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_gc) { @@ -236,8 +236,8 @@ double G1Analytics::predict_concurrent_refine_rate_ms() const { return predict_zero_bounded(_concurrent_refine_rate_ms_seq); } -double G1Analytics::predict_logged_cards_rate_ms() const { - return predict_zero_bounded(_logged_cards_rate_ms_seq); +double G1Analytics::predict_dirtied_cards_rate_ms() const { + return predict_zero_bounded(_dirtied_cards_rate_ms_seq); } double G1Analytics::predict_young_card_merge_to_scan_ratio() const { diff --git a/src/hotspot/share/gc/g1/g1Analytics.hpp b/src/hotspot/share/gc/g1/g1Analytics.hpp index f3e566155b1..2acba9f9c5b 100644 --- a/src/hotspot/share/gc/g1/g1Analytics.hpp +++ b/src/hotspot/share/gc/g1/g1Analytics.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,7 +47,7 @@ class G1Analytics: public CHeapObj { TruncatedSeq* _rs_length_diff_seq; TruncatedSeq* _concurrent_refine_rate_ms_seq; - TruncatedSeq* _logged_cards_rate_ms_seq; + TruncatedSeq* _dirtied_cards_rate_ms_seq; // The ratio between the number of merged cards and actually scanned cards, for // young-only and mixed gcs. TruncatedSeq* _young_card_merge_to_scan_ratio_seq; @@ -115,7 +115,7 @@ class G1Analytics: public CHeapObj { void report_concurrent_mark_cleanup_times_ms(double ms); void report_alloc_rate_ms(double alloc_rate); void report_concurrent_refine_rate_ms(double cards_per_ms); - void report_logged_cards_rate_ms(double cards_per_ms); + void report_dirtied_cards_rate_ms(double cards_per_ms); void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc); void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc); void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc); @@ -131,7 +131,7 @@ class G1Analytics: public CHeapObj { int num_alloc_rate_ms() const; double predict_concurrent_refine_rate_ms() const; - double predict_logged_cards_rate_ms() const; + double predict_dirtied_cards_rate_ms() const; double predict_young_card_merge_to_scan_ratio() const; double predict_mixed_card_merge_to_scan_ratio() const; diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.cpp b/src/hotspot/share/gc/g1/g1BarrierSet.cpp index 62dba813d51..9243e1f319f 100644 --- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp +++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp @@ -157,5 +157,5 @@ void G1BarrierSet::on_thread_detach(Thread* thread) { // Flush any deferred card marks. CardTableBarrierSet::on_thread_detach(thread); G1ThreadLocalData::satb_mark_queue(thread).flush(); - G1ThreadLocalData::dirty_card_queue(thread).flush(); + G1ThreadLocalData::dirty_card_queue(thread).on_thread_detach(); } diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp index 08ee547b9b6..45fc8ae046b 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp @@ -1054,10 +1054,11 @@ void G1CollectedHeap::abort_refinement() { _hot_card_cache->reset_hot_cache(); } - // Discard all remembered set updates. + // Discard all remembered set updates and reset refinement statistics. G1BarrierSet::dirty_card_queue_set().abandon_logs(); assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0, "DCQS should be empty"); + concurrent_refine()->get_and_reset_refinement_stats(); } void G1CollectedHeap::verify_after_full_collection() { @@ -2684,9 +2685,22 @@ void G1CollectedHeap::gc_prologue(bool full) { } // Fill TLAB's and such - double start = os::elapsedTime(); - ensure_parsability(true); - phase_times()->record_prepare_tlab_time_ms((os::elapsedTime() - start) * 1000.0); + { + Ticks start = Ticks::now(); + ensure_parsability(true); + Tickspan dt = Ticks::now() - start; + phase_times()->record_prepare_tlab_time_ms(dt.seconds() * MILLIUNITS); + } + + if (!full) { + // Flush dirty card queues to qset, so later phases don't need to account + // for partially filled per-thread queues and such. Not needed for full + // collections, which ignore those logs. + Ticks start = Ticks::now(); + G1BarrierSet::dirty_card_queue_set().concatenate_logs(); + Tickspan dt = Ticks::now() - start; + phase_times()->record_concatenate_dirty_card_logs_time_ms(dt.seconds() * MILLIUNITS); + } } void G1CollectedHeap::gc_epilogue(bool full) { @@ -2759,20 +2773,6 @@ void G1CollectedHeap::do_concurrent_mark() { } } -size_t G1CollectedHeap::pending_card_num() { - struct CountCardsClosure : public ThreadClosure { - size_t _cards; - CountCardsClosure() : _cards(0) {} - virtual void do_thread(Thread* t) { - _cards += G1ThreadLocalData::dirty_card_queue(t).size(); - } - } count_from_threads; - Threads::threads_do(&count_from_threads); - - G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); - return dcqs.num_cards() + count_from_threads._cards; -} - bool G1CollectedHeap::is_potential_eager_reclaim_candidate(HeapRegion* r) const { // We don't nominate objects with many remembered set entries, on // the assumption that such objects are likely still live. diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp index 2e542d668ed..7b2463d4c94 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp @@ -1465,8 +1465,6 @@ class G1CollectedHeap : public CollectedHeap { // Used to print information about locations in the hs_err file. virtual bool print_location(outputStream* st, void* addr) const; - - size_t pending_card_num(); }; class G1ParEvacuateFollowersClosure : public VoidClosure { diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp index ee5817dab03..2607f48a127 100644 --- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp @@ -408,20 +408,18 @@ void G1ConcurrentRefine::adjust(double logged_cards_scan_time, dcqs.notify_if_necessary(); } -G1ConcurrentRefine::RefinementStats G1ConcurrentRefine::total_refinement_stats() const { - struct CollectData : public ThreadClosure { - Tickspan _total_time; - size_t _total_cards; - CollectData() : _total_time(), _total_cards(0) {} +G1ConcurrentRefineStats G1ConcurrentRefine::get_and_reset_refinement_stats() { + struct CollectStats : public ThreadClosure { + G1ConcurrentRefineStats _total_stats; virtual void do_thread(Thread* t) { G1ConcurrentRefineThread* crt = static_cast(t); - _total_time += crt->total_refinement_time(); - _total_cards += crt->total_refined_cards(); + G1ConcurrentRefineStats& stats = *crt->refinement_stats(); + _total_stats += stats; + stats.reset(); } } collector; - // Cast away const so we can call non-modifying closure on threads. - const_cast(this)->threads_do(&collector); - return RefinementStats(collector._total_time, collector._total_cards); + threads_do(&collector); + return collector._total_stats; } size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const { @@ -445,7 +443,7 @@ void G1ConcurrentRefine::maybe_activate_more_threads(uint worker_id, size_t num_ } bool G1ConcurrentRefine::do_refinement_step(uint worker_id, - size_t* total_refined_cards) { + G1ConcurrentRefineStats* stats) { G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); size_t curr_cards = dcqs.num_cards(); @@ -460,5 +458,5 @@ bool G1ConcurrentRefine::do_refinement_step(uint worker_id, // Process the next buffer, if there are enough left. return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(), deactivation_threshold(worker_id), - total_refined_cards); + stats); } diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp index 8a9a041578e..3f32a5c3199 100644 --- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #ifndef SHARE_GC_G1_G1CONCURRENTREFINE_HPP #define SHARE_GC_G1_G1CONCURRENTREFINE_HPP +#include "gc/g1/g1ConcurrentRefineStats.hpp" #include "memory/allocation.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/ticks.hpp" @@ -119,13 +120,9 @@ class G1ConcurrentRefine : public CHeapObj { // Adjust refinement thresholds based on work done during the pause and the goal time. void adjust(double logged_cards_scan_time, size_t processed_logged_cards, double goal_ms); - struct RefinementStats { - Tickspan _time; - size_t _cards; - RefinementStats(Tickspan time, size_t cards) : _time(time), _cards(cards) {} - }; - - RefinementStats total_refinement_stats() const; + // Return total of concurrent refinement stats for the + // ConcurrentRefineThreads. Also reset the stats for the threads. + G1ConcurrentRefineStats get_and_reset_refinement_stats(); // Cards in the dirty card queue set. size_t activation_threshold(uint worker_id) const; @@ -133,8 +130,8 @@ class G1ConcurrentRefine : public CHeapObj { // Perform a single refinement step; called by the refinement // threads. Returns true if there was refinement work available. - // Increments *total_refined_cards. - bool do_refinement_step(uint worker_id, size_t* total_refined_cards); + // Updates stats. + bool do_refinement_step(uint worker_id, G1ConcurrentRefineStats* stats); // Iterate over all concurrent refinement threads applying the given closure. void threads_do(ThreadClosure *tc); diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp new file mode 100644 index 00000000000..055fd8b891d --- /dev/null +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/g1/g1ConcurrentRefineStats.hpp" + +G1ConcurrentRefineStats::G1ConcurrentRefineStats() : + _refinement_time(), + _refined_cards(0), + _precleaned_cards(0), + _dirtied_cards(0) +{} + +G1ConcurrentRefineStats& +G1ConcurrentRefineStats::operator+=(const G1ConcurrentRefineStats& other) { + _refinement_time += other._refinement_time; + _refined_cards += other._refined_cards; + _precleaned_cards += other._precleaned_cards; + _dirtied_cards += other._dirtied_cards; + return *this; +} + +template +static T clipped_sub(T x, T y) { + return (x < y) ? T() : (x - y); +} + +G1ConcurrentRefineStats& +G1ConcurrentRefineStats::operator-=(const G1ConcurrentRefineStats& other) { + _refinement_time = clipped_sub(_refinement_time, other._refinement_time); + _refined_cards = clipped_sub(_refined_cards, other._refined_cards); + _precleaned_cards = clipped_sub(_precleaned_cards, other._precleaned_cards); + _dirtied_cards = clipped_sub(_dirtied_cards, other._dirtied_cards); + return *this; +} + +void G1ConcurrentRefineStats::reset() { + *this = G1ConcurrentRefineStats(); +} diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp new file mode 100644 index 00000000000..d0e17301a5f --- /dev/null +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_G1_G1CONCURRENTREFINESTATS_HPP +#define SHARE_GC_G1_G1CONCURRENTREFINESTATS_HPP + +#include "memory/allocation.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/ticks.hpp" + +// Collection of statistics for concurrent refinement processing. +// Used for collecting per-thread statistics and for summaries over a +// collection of threads. +class G1ConcurrentRefineStats : public CHeapObj { + Tickspan _refinement_time; + size_t _refined_cards; + size_t _precleaned_cards; + size_t _dirtied_cards; + +public: + G1ConcurrentRefineStats(); + + // Time spent performing concurrent refinement. + Tickspan refinement_time() const { return _refinement_time; } + + // Number of refined cards. + size_t refined_cards() const { return _refined_cards; } + + // Number of cards for which refinement was skipped because some other + // thread had already refined them. + size_t precleaned_cards() const { return _precleaned_cards; } + + // Number of cards marked dirty and in need of refinement. + size_t dirtied_cards() const { return _dirtied_cards; } + + void inc_refinement_time(Tickspan t) { _refinement_time += t; } + void inc_refined_cards(size_t cards) { _refined_cards += cards; } + void inc_precleaned_cards(size_t cards) { _precleaned_cards += cards; } + void inc_dirtied_cards(size_t cards) { _dirtied_cards += cards; } + + G1ConcurrentRefineStats& operator+=(const G1ConcurrentRefineStats& other); + G1ConcurrentRefineStats& operator-=(const G1ConcurrentRefineStats& other); + + friend G1ConcurrentRefineStats operator+(G1ConcurrentRefineStats x, + const G1ConcurrentRefineStats& y) { + return x += y; + } + + friend G1ConcurrentRefineStats operator-(G1ConcurrentRefineStats x, + const G1ConcurrentRefineStats& y) { + return x -= y; + } + + void reset(); +}; + +#endif // SHARE_GC_G1_G1CONCURRENTREFINESTATS_HPP diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp index 55a274486d7..e57617114d2 100644 --- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "gc/g1/g1BarrierSet.hpp" #include "gc/g1/g1ConcurrentRefine.hpp" +#include "gc/g1/g1ConcurrentRefineStats.hpp" #include "gc/g1/g1ConcurrentRefineThread.hpp" #include "gc/g1/g1DirtyCardQueue.hpp" #include "gc/shared/suspendibleThreadSet.hpp" @@ -36,8 +37,7 @@ G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint ConcurrentGCThread(), _vtime_start(0.0), _vtime_accum(0.0), - _total_refinement_time(), - _total_refined_cards(0), + _refinement_stats(new G1ConcurrentRefineStats()), _worker_id(worker_id), _notifier(new Semaphore(0)), _should_notify(true), @@ -48,6 +48,11 @@ G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint create_and_start(); } +G1ConcurrentRefineThread::~G1ConcurrentRefineThread() { + delete _refinement_stats; + delete _notifier; +} + void G1ConcurrentRefineThread::wait_for_completed_buffers() { assert(this == Thread::current(), "precondition"); while (Atomic::load_acquire(&_should_notify)) { @@ -103,32 +108,35 @@ void G1ConcurrentRefineThread::run_service() { _worker_id, _cr->activation_threshold(_worker_id), G1BarrierSet::dirty_card_queue_set().num_cards()); - size_t start_total_refined_cards = _total_refined_cards; // For logging. + // For logging. + G1ConcurrentRefineStats start_stats = *_refinement_stats; + G1ConcurrentRefineStats total_stats; // Accumulate over activation. { SuspendibleThreadSetJoiner sts_join; while (!should_terminate()) { if (sts_join.should_yield()) { + // Accumulate changed stats before possible GC that resets stats. + total_stats += *_refinement_stats - start_stats; sts_join.yield(); + // Reinitialize baseline stats after safepoint. + start_stats = *_refinement_stats; continue; // Re-check for termination after yield delay. } - Ticks start_time = Ticks::now(); - bool more_work = _cr->do_refinement_step(_worker_id, &_total_refined_cards); - _total_refinement_time += (Ticks::now() - start_time); - + bool more_work = _cr->do_refinement_step(_worker_id, _refinement_stats); if (maybe_deactivate(more_work)) break; } } + total_stats += *_refinement_stats - start_stats; log_debug(gc, refine)("Deactivated worker %d, off threshold: " SIZE_FORMAT - ", current: " SIZE_FORMAT ", refined cards: " - SIZE_FORMAT ", total refined cards: " SIZE_FORMAT, + ", current: " SIZE_FORMAT + ", refined cards: " SIZE_FORMAT, _worker_id, _cr->deactivation_threshold(_worker_id), G1BarrierSet::dirty_card_queue_set().num_cards(), - _total_refined_cards - start_total_refined_cards, - _total_refined_cards); + total_stats.refined_cards()); if (os::supports_vtime()) { _vtime_accum = (os::elapsedVTime() - _vtime_start); diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp index 428c07c4250..cdcab6b2285 100644 --- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp @@ -30,6 +30,7 @@ // Forward Decl. class G1ConcurrentRefine; +class G1ConcurrentRefineStats; // One or more G1 Concurrent Refinement Threads may be active if concurrent // refinement is in progress. @@ -40,8 +41,7 @@ class G1ConcurrentRefineThread: public ConcurrentGCThread { double _vtime_start; // Initial virtual time. double _vtime_accum; // Accumulated virtual time. - Tickspan _total_refinement_time; - size_t _total_refined_cards; + G1ConcurrentRefineStats* _refinement_stats; uint _worker_id; @@ -71,12 +71,14 @@ class G1ConcurrentRefineThread: public ConcurrentGCThread { public: G1ConcurrentRefineThread(G1ConcurrentRefine* cg1r, uint worker_id); + virtual ~G1ConcurrentRefineThread(); // Activate this thread. void activate(); - Tickspan total_refinement_time() const { return _total_refinement_time; } - size_t total_refined_cards() const { return _total_refined_cards; } + G1ConcurrentRefineStats* refinement_stats() const { + return _refinement_stats; + } // Total virtual time so far. double vtime_accum() { return _vtime_accum; } diff --git a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp index edd3f4e9c6a..05b51af06a2 100644 --- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp +++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp @@ -26,6 +26,7 @@ #include "gc/g1/g1BufferNodeList.hpp" #include "gc/g1/g1CardTableEntryClosure.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1ConcurrentRefineStats.hpp" #include "gc/g1/g1ConcurrentRefineThread.hpp" #include "gc/g1/g1DirtyCardQueue.hpp" #include "gc/g1/g1FreeIdSet.hpp" @@ -36,6 +37,8 @@ #include "gc/shared/suspendibleThreadSet.hpp" #include "memory/iterator.hpp" #include "runtime/atomic.hpp" +#include "runtime/mutex.hpp" +#include "runtime/mutexLocker.hpp" #include "runtime/os.hpp" #include "runtime/safepoint.hpp" #include "runtime/thread.inline.hpp" @@ -43,22 +46,37 @@ #include "utilities/globalCounter.inline.hpp" #include "utilities/macros.hpp" #include "utilities/quickSort.hpp" +#include "utilities/ticks.hpp" G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) : // Dirty card queues are always active, so we create them with their // active field set to true. - PtrQueue(qset, true /* active */) + PtrQueue(qset, true /* active */), + _refinement_stats(new G1ConcurrentRefineStats()) { } G1DirtyCardQueue::~G1DirtyCardQueue() { flush(); + delete _refinement_stats; +} + +void G1DirtyCardQueue::flush() { + _refinement_stats->inc_dirtied_cards(size()); + flush_impl(); +} + +void G1DirtyCardQueue::on_thread_detach() { + assert(this == &G1ThreadLocalData::dirty_card_queue(Thread::current()), "precondition"); + flush(); + dirty_card_qset()->record_detached_refinement_stats(_refinement_stats); } void G1DirtyCardQueue::handle_completed_buffer() { assert(!is_empty(), "precondition"); + _refinement_stats->inc_dirtied_cards(size()); BufferNode* node = BufferNode::make_node_from_buffer(_buf, index()); allocate_buffer(); - dirty_card_qset()->handle_completed_buffer(node); + dirty_card_qset()->handle_completed_buffer(node, _refinement_stats); } // Assumed to be zero by concurrent threads. @@ -74,15 +92,13 @@ G1DirtyCardQueueSet::G1DirtyCardQueueSet(BufferNode::Allocator* allocator) : _process_cards_threshold(ProcessCardsThresholdNever), _max_cards(MaxCardsUnlimited), _padded_max_cards(MaxCardsUnlimited), - _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC)) + _detached_refinement_stats() { - ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t)); _all_active = true; } G1DirtyCardQueueSet::~G1DirtyCardQueueSet() { abandon_completed_buffers(); - FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters); } // Determines how many mutator threads can process the buffers in parallel. @@ -90,14 +106,6 @@ uint G1DirtyCardQueueSet::num_par_ids() { return (uint)os::initial_active_processor_count(); } -size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const { - size_t sum = 0; - for (uint i = 0; i < num_par_ids(); ++i) { - sum += _mutator_refined_cards_counters[i]; - } - return sum; -} - void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) { G1ThreadLocalData::dirty_card_queue(t).handle_zero_index(); } @@ -422,7 +430,7 @@ class G1RefineBufferedCards : public StackObj { CardTable::CardValue** const _node_buffer; const size_t _node_buffer_size; const uint _worker_id; - size_t* _total_refined_cards; + G1ConcurrentRefineStats* _stats; G1RemSet* const _g1rs; static inline int compare_card(const CardTable::CardValue* p1, @@ -472,7 +480,8 @@ class G1RefineBufferedCards : public StackObj { const size_t first_clean = dst - _node_buffer; assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant"); // Discarded cards are considered as refined. - *_total_refined_cards += first_clean - start; + _stats->inc_refined_cards(first_clean - start); + _stats->inc_precleaned_cards(first_clean - start); return first_clean; } @@ -488,7 +497,7 @@ class G1RefineBufferedCards : public StackObj { _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id); } _node->set_index(i); - *_total_refined_cards += i - start_index; + _stats->inc_refined_cards(i - start_index); return result; } @@ -502,12 +511,12 @@ class G1RefineBufferedCards : public StackObj { G1RefineBufferedCards(BufferNode* node, size_t node_buffer_size, uint worker_id, - size_t* total_refined_cards) : + G1ConcurrentRefineStats* stats) : _node(node), _node_buffer(reinterpret_cast(BufferNode::make_buffer_from_node(node))), _node_buffer_size(node_buffer_size), _worker_id(worker_id), - _total_refined_cards(total_refined_cards), + _stats(stats), _g1rs(G1CollectedHeap::heap()->rem_set()) {} bool refine() { @@ -532,12 +541,15 @@ class G1RefineBufferedCards : public StackObj { bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id, - size_t* total_refined_cards) { + G1ConcurrentRefineStats* stats) { + Ticks start_time = Ticks::now(); G1RefineBufferedCards buffered_cards(node, buffer_size(), worker_id, - total_refined_cards); - return buffered_cards.refine(); + stats); + bool result = buffered_cards.refine(); + stats->inc_refinement_time(Ticks::now() - start_time); + return result; } void G1DirtyCardQueueSet::handle_refined_buffer(BufferNode* node, @@ -555,7 +567,8 @@ void G1DirtyCardQueueSet::handle_refined_buffer(BufferNode* node, } } -void G1DirtyCardQueueSet::handle_completed_buffer(BufferNode* new_node) { +void G1DirtyCardQueueSet::handle_completed_buffer(BufferNode* new_node, + G1ConcurrentRefineStats* stats) { enqueue_completed_buffer(new_node); // No need for mutator refinement if number of cards is below limit. @@ -574,9 +587,7 @@ void G1DirtyCardQueueSet::handle_completed_buffer(BufferNode* new_node) { // Refine cards in buffer. uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id - uint counter_index = worker_id - par_ids_start(); - size_t* counter = &_mutator_refined_cards_counters[counter_index]; - bool fully_processed = refine_buffer(node, worker_id, counter); + bool fully_processed = refine_buffer(node, worker_id, stats); _free_ids.release_par_id(worker_id); // release the id // Deal with buffer after releasing id, to let another thread use id. @@ -585,14 +596,14 @@ void G1DirtyCardQueueSet::handle_completed_buffer(BufferNode* new_node) { bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, size_t stop_at, - size_t* total_refined_cards) { + G1ConcurrentRefineStats* stats) { // Not enough cards to trigger processing. if (Atomic::load(&_num_cards) <= stop_at) return false; BufferNode* node = get_completed_buffer(); if (node == NULL) return false; // Didn't get a buffer to process. - bool fully_processed = refine_buffer(node, worker_id, total_refined_cards); + bool fully_processed = refine_buffer(node, worker_id, stats); handle_refined_buffer(node, fully_processed); return true; } @@ -600,12 +611,15 @@ bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, void G1DirtyCardQueueSet::abandon_logs() { assert_at_safepoint(); abandon_completed_buffers(); + _detached_refinement_stats.reset(); // Since abandon is done only at safepoints, we can safely manipulate // these queues. struct AbandonThreadLogClosure : public ThreadClosure { virtual void do_thread(Thread* t) { - G1ThreadLocalData::dirty_card_queue(t).reset(); + G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t); + dcq.reset(); + dcq.refinement_stats()->reset(); } } closure; Threads::threads_do(&closure); @@ -637,6 +651,40 @@ void G1DirtyCardQueueSet::concatenate_logs() { set_max_cards(old_limit); } +G1ConcurrentRefineStats G1DirtyCardQueueSet::get_and_reset_refinement_stats() { + assert_at_safepoint(); + + // Since we're at a safepoint, there aren't any races with recording of + // detached refinement stats. In particular, there's no risk of double + // counting a thread that detaches after we've examined it but before + // we've processed the detached stats. + + // Collect and reset stats for attached threads. + struct CollectStats : public ThreadClosure { + G1ConcurrentRefineStats _total_stats; + virtual void do_thread(Thread* t) { + G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t); + G1ConcurrentRefineStats& stats = *dcq.refinement_stats(); + _total_stats += stats; + stats.reset(); + } + } closure; + Threads::threads_do(&closure); + + // Collect and reset stats from detached threads. + MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag); + closure._total_stats += _detached_refinement_stats; + _detached_refinement_stats.reset(); + + return closure._total_stats; +} + +void G1DirtyCardQueueSet::record_detached_refinement_stats(G1ConcurrentRefineStats* stats) { + MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag); + _detached_refinement_stats += *stats; + stats->reset(); +} + size_t G1DirtyCardQueueSet::max_cards() const { return _max_cards; } diff --git a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp index 375676cbf53..2b8b445154d 100644 --- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp +++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp @@ -27,6 +27,7 @@ #include "gc/g1/g1BufferNodeList.hpp" #include "gc/g1/g1FreeIdSet.hpp" +#include "gc/g1/g1ConcurrentRefineStats.hpp" #include "gc/shared/ptrQueue.hpp" #include "memory/allocation.hpp" #include "memory/padded.hpp" @@ -38,6 +39,8 @@ class Thread; // A ptrQueue whose elements are "oops", pointers to object heads. class G1DirtyCardQueue: public PtrQueue { + G1ConcurrentRefineStats* _refinement_stats; + protected: virtual void handle_completed_buffer(); @@ -49,10 +52,18 @@ class G1DirtyCardQueue: public PtrQueue { ~G1DirtyCardQueue(); // Process queue entries and release resources. - void flush() { flush_impl(); } + void flush(); inline G1DirtyCardQueueSet* dirty_card_qset() const; + G1ConcurrentRefineStats* refinement_stats() const { + return _refinement_stats; + } + + // To be called by the barrier set's on_thread_detach, to notify this + // object of the corresponding state change of its owning thread. + void on_thread_detach(); + // Compiler support. static ByteSize byte_offset_of_index() { return PtrQueue::byte_offset_of_index(); @@ -215,9 +226,7 @@ class G1DirtyCardQueueSet: public PtrQueueSet { volatile size_t _padded_max_cards; static const size_t MaxCardsUnlimited = SIZE_MAX; - // Array of cumulative dirty cards refined by mutator threads. - // Array has an entry per id in _free_ids. - size_t* _mutator_refined_cards_counters; + G1ConcurrentRefineStats _detached_refinement_stats; // Verify _num_cards == sum of cards in the completed queue. void verify_num_cards() const NOT_DEBUG_RETURN; @@ -241,9 +250,10 @@ class G1DirtyCardQueueSet: public PtrQueueSet { // is a pending yield request. The node's index is updated to exclude // the processed elements, e.g. up to the element before processing // stopped, or one past the last element if the entire buffer was - // processed. Increments *total_refined_cards by the number of cards - // processed and removed from the buffer. - bool refine_buffer(BufferNode* node, uint worker_id, size_t* total_refined_cards); + // processed. Updates stats. + bool refine_buffer(BufferNode* node, + uint worker_id, + G1ConcurrentRefineStats* stats); // Deal with buffer after a call to refine_buffer. If fully processed, // deallocate the buffer. Otherwise, record it as paused. @@ -296,26 +306,23 @@ class G1DirtyCardQueueSet: public PtrQueueSet { // Enqueue the buffer, and optionally perform refinement by the mutator. // Mutator refinement is only done by Java threads, and only if there // are more than max_cards (possibly padded) cards in the completed - // buffers. + // buffers. Updates stats. // // Mutator refinement, if performed, stops processing a buffer if // SuspendibleThreadSet::should_yield(), recording the incompletely // processed buffer for later processing of the remainder. - void handle_completed_buffer(BufferNode* node); + void handle_completed_buffer(BufferNode* node, G1ConcurrentRefineStats* stats); // If there are more than stop_at cards in the completed buffers, pop // a buffer, refine its contents, and return true. Otherwise return - // false. + // false. Updates stats. // // Stops processing a buffer if SuspendibleThreadSet::should_yield(), // recording the incompletely processed buffer for later processing of // the remainder. - // - // Increments *total_refined_cards by the number of cards processed and - // removed from the buffer. bool refine_completed_buffer_concurrently(uint worker_id, size_t stop_at, - size_t* total_refined_cards); + G1ConcurrentRefineStats* stats); // If a full collection is happening, reset partial logs, and release // completed ones: the full collection will make them all irrelevant. @@ -324,6 +331,14 @@ class G1DirtyCardQueueSet: public PtrQueueSet { // If any threads have partial logs, add them to the global list of logs. void concatenate_logs(); + // Return the total of mutator refinement stats for all threads. + // Also resets the stats for the threads. + // precondition: at safepoint. + G1ConcurrentRefineStats get_and_reset_refinement_stats(); + + // Accumulate refinement stats from threads that are detaching. + void record_detached_refinement_stats(G1ConcurrentRefineStats* stats); + // Threshold for mutator threads to also do refinement when there // are concurrent refinement threads. size_t max_cards() const; @@ -336,9 +351,6 @@ class G1DirtyCardQueueSet: public PtrQueueSet { // Discard artificial increase of mutator refinement threshold. void discard_max_cards_padding(); - - // Total dirty cards refined by mutator threads. - size_t total_mutator_refined_cards() const; }; inline G1DirtyCardQueueSet* G1DirtyCardQueue::dirty_card_qset() const { diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp index 88fd9a7a7c8..feb9c033ca5 100644 --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -157,6 +157,7 @@ void G1GCPhaseTimes::reset() { _cur_string_deduplication_time_ms = 0.0; _cur_prepare_tlab_time_ms = 0.0; _cur_resize_tlab_time_ms = 0.0; + _cur_concatenate_dirty_card_logs_time_ms = 0.0; _cur_derived_pointer_table_update_time_ms = 0.0; _cur_clear_ct_time_ms = 0.0; _cur_expand_heap_time_ms = 0.0; @@ -377,6 +378,8 @@ void G1GCPhaseTimes::trace_count(const char* name, size_t value) const { double G1GCPhaseTimes::print_pre_evacuate_collection_set() const { const double sum_ms = _root_region_scan_wait_time_ms + + _cur_prepare_tlab_time_ms + + _cur_concatenate_dirty_card_logs_time_ms + _recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms + _cur_region_register_time + @@ -389,6 +392,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const { debug_time("Root Region Scan Waiting", _root_region_scan_wait_time_ms); } debug_time("Prepare TLABs", _cur_prepare_tlab_time_ms); + debug_time("Concatenate Dirty Card Logs", _cur_concatenate_dirty_card_logs_time_ms); debug_time("Choose Collection Set", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms)); debug_time("Region Register", _cur_region_register_time); if (G1EagerReclaimHumongousObjects) { diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp index 5aa46240732..98680d396f6 100644 --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -143,6 +143,8 @@ class G1GCPhaseTimes : public CHeapObj { double _cur_prepare_tlab_time_ms; double _cur_resize_tlab_time_ms; + double _cur_concatenate_dirty_card_logs_time_ms; + double _cur_derived_pointer_table_update_time_ms; double _cur_clear_ct_time_ms; @@ -252,6 +254,10 @@ class G1GCPhaseTimes : public CHeapObj { _cur_resize_tlab_time_ms = ms; } + void record_concatenate_dirty_card_logs_time_ms(double ms) { + _cur_concatenate_dirty_card_logs_time_ms = ms; + } + void record_derived_pointer_table_update_time(double ms) { _cur_derived_pointer_table_update_time_ms = ms; } diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp index 5f18cdfba0d..c046bbc0318 100644 --- a/src/hotspot/share/gc/g1/g1Policy.cpp +++ b/src/hotspot/share/gc/g1/g1Policy.cpp @@ -31,6 +31,7 @@ #include "gc/g1/g1ConcurrentMark.hpp" #include "gc/g1/g1ConcurrentMarkThread.inline.hpp" #include "gc/g1/g1ConcurrentRefine.hpp" +#include "gc/g1/g1ConcurrentRefineStats.hpp" #include "gc/g1/g1CollectionSetChooser.hpp" #include "gc/g1/g1HeterogeneousHeapPolicy.hpp" #include "gc/g1/g1HotCardCache.hpp" @@ -43,7 +44,7 @@ #include "gc/g1/heapRegionRemSet.hpp" #include "gc/shared/concurrentGCBreakpoints.hpp" #include "gc/shared/gcPolicyCounters.hpp" -#include "logging/logStream.hpp" +#include "logging/log.hpp" #include "runtime/arguments.hpp" #include "runtime/java.hpp" #include "runtime/mutexLocker.hpp" @@ -72,10 +73,6 @@ G1Policy::G1Policy(STWGCTimer* gc_timer) : _rs_length(0), _rs_length_prediction(0), _pending_cards_at_gc_start(0), - _pending_cards_at_prev_gc_end(0), - _total_mutator_refined_cards(0), - _total_concurrent_refined_cards(0), - _total_concurrent_refinement_time(), _bytes_allocated_in_old_since_last_gc(0), _initial_mark_to_mixed(), _collection_set(NULL), @@ -432,7 +429,7 @@ void G1Policy::record_full_collection_start() { collector_state()->set_in_young_only_phase(false); collector_state()->set_in_full_gc(true); _collection_set->clear_candidates(); - record_concurrent_refinement_data(true /* is_full_collection */); + _pending_cards_at_gc_start = 0; } void G1Policy::record_full_collection_end() { @@ -462,64 +459,62 @@ void G1Policy::record_full_collection_end() { _survivor_surv_rate_group->reset(); update_young_list_max_and_target_length(); update_rs_length_prediction(); - _pending_cards_at_prev_gc_end = _g1h->pending_card_num(); _bytes_allocated_in_old_since_last_gc = 0; record_pause(FullGC, _full_collection_start_sec, end_sec); } -void G1Policy::record_concurrent_refinement_data(bool is_full_collection) { - _pending_cards_at_gc_start = _g1h->pending_card_num(); +static void log_refinement_stats(const char* kind, const G1ConcurrentRefineStats& stats) { + log_debug(gc, refine, stats) + ("%s refinement: %.2fms, refined: " SIZE_FORMAT + ", precleaned: " SIZE_FORMAT ", dirtied: " SIZE_FORMAT, + kind, + stats.refinement_time().seconds() * MILLIUNITS, + stats.refined_cards(), + stats.precleaned_cards(), + stats.dirtied_cards()); +} + +void G1Policy::record_concurrent_refinement_stats() { + G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); + _pending_cards_at_gc_start = dcqs.num_cards(); - // Record info about concurrent refinement thread processing. + // Collect per-thread stats, mostly from mutator activity. + G1ConcurrentRefineStats mut_stats = dcqs.get_and_reset_refinement_stats(); + + // Collect specialized concurrent refinement thread stats. G1ConcurrentRefine* cr = _g1h->concurrent_refine(); - G1ConcurrentRefine::RefinementStats cr_stats = cr->total_refinement_stats(); - - Tickspan cr_time = cr_stats._time - _total_concurrent_refinement_time; - _total_concurrent_refinement_time = cr_stats._time; - - size_t cr_cards = cr_stats._cards - _total_concurrent_refined_cards; - _total_concurrent_refined_cards = cr_stats._cards; - - // Don't update rate if full collection. We could be in an implicit full - // collection after a non-full collection failure, in which case there - // wasn't any mutator/cr-thread activity since last recording. And if - // we're in an explicit full collection, the time since the last GC can - // be arbitrarily short, so not a very good sample. Similarly, don't - // update the rate if the current sample is empty or time is zero. - if (!is_full_collection && (cr_cards > 0) && (cr_time > Tickspan())) { - double rate = cr_cards / (cr_time.seconds() * MILLIUNITS); + G1ConcurrentRefineStats cr_stats = cr->get_and_reset_refinement_stats(); + + G1ConcurrentRefineStats total_stats = mut_stats + cr_stats; + + log_refinement_stats("Mutator", mut_stats); + log_refinement_stats("Concurrent", cr_stats); + log_refinement_stats("Total", total_stats); + + // Record the rate at which cards were refined. + // Don't update the rate if the current sample is empty or time is zero. + Tickspan refinement_time = total_stats.refinement_time(); + size_t refined_cards = total_stats.refined_cards(); + if ((refined_cards > 0) && (refinement_time > Tickspan())) { + double rate = refined_cards / (refinement_time.seconds() * MILLIUNITS); _analytics->report_concurrent_refine_rate_ms(rate); + log_debug(gc, refine, stats)("Concurrent refinement rate: %.2f cards/ms", rate); } - // Record info about mutator thread processing. - G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); - size_t mut_total_cards = dcqs.total_mutator_refined_cards(); - size_t mut_cards = mut_total_cards - _total_mutator_refined_cards; - _total_mutator_refined_cards = mut_total_cards; - // Record mutator's card logging rate. - // Don't update if full collection; see above. - if (!is_full_collection) { - size_t total_cards = _pending_cards_at_gc_start + cr_cards + mut_cards; - assert(_pending_cards_at_prev_gc_end <= total_cards, - "untracked cards: last pending: " SIZE_FORMAT - ", pending: " SIZE_FORMAT ", conc refine: " SIZE_FORMAT - ", mut refine:" SIZE_FORMAT, - _pending_cards_at_prev_gc_end, _pending_cards_at_gc_start, - cr_cards, mut_cards); - size_t logged_cards = total_cards - _pending_cards_at_prev_gc_end; - double logging_start_time = _analytics->prev_collection_pause_end_ms(); - double logging_end_time = Ticks::now().seconds() * MILLIUNITS; - double logging_time = logging_end_time - logging_start_time; - // Unlike above for conc-refine rate, here we should not require a - // non-empty sample, since an application could go some time with only - // young-gen or filtered out writes. But we'll ignore unusually short - // sample periods, as they may just pollute the predictions. - if (logging_time > 1.0) { // Require > 1ms sample time. - _analytics->report_logged_cards_rate_ms(logged_cards / logging_time); - } + double mut_start_time = _analytics->prev_collection_pause_end_ms(); + double mut_end_time = phase_times()->cur_collection_start_sec() * MILLIUNITS; + double mut_time = mut_end_time - mut_start_time; + // Unlike above for conc-refine rate, here we should not require a + // non-empty sample, since an application could go some time with only + // young-gen or filtered out writes. But we'll ignore unusually short + // sample periods, as they may just pollute the predictions. + if (mut_time > 1.0) { // Require > 1ms sample time. + double dirtied_rate = total_stats.dirtied_cards() / mut_time; + _analytics->report_dirtied_cards_rate_ms(dirtied_rate); + log_debug(gc, refine, stats)("Generate dirty cards rate: %.2f cards/ms", dirtied_rate); } } @@ -536,7 +531,7 @@ void G1Policy::record_collection_pause_start(double start_time_sec) { phase_times()->record_cur_collection_start_sec(start_time_sec); - record_concurrent_refinement_data(false /* is_full_collection */); + record_concurrent_refinement_stats(); _collection_set->reset_bytes_used_before(); @@ -830,7 +825,6 @@ void G1Policy::record_collection_pause_end(double pause_time_ms) { scan_logged_cards_time_goal_ms -= merge_hcc_time_ms; } - _pending_cards_at_prev_gc_end = _g1h->pending_card_num(); double const logged_cards_time = logged_cards_processing_time(); log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms", diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp index 2535635b0f1..b52a9f7013d 100644 --- a/src/hotspot/share/gc/g1/g1Policy.hpp +++ b/src/hotspot/share/gc/g1/g1Policy.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -101,10 +101,6 @@ class G1Policy: public CHeapObj { size_t _rs_length_prediction; size_t _pending_cards_at_gc_start; - size_t _pending_cards_at_prev_gc_end; - size_t _total_mutator_refined_cards; - size_t _total_concurrent_refined_cards; - Tickspan _total_concurrent_refinement_time; // The amount of allocated bytes in old gen during the last mutator and the following // young GC phase. @@ -287,7 +283,8 @@ class G1Policy: public CHeapObj { // Indicate that we aborted marking before doing any mixed GCs. void abort_time_to_mixed_tracking(); - void record_concurrent_refinement_data(bool is_full_collection); + // Record and log stats before not-full collection. + void record_concurrent_refinement_stats(); public: diff --git a/src/hotspot/share/gc/g1/g1RemSet.cpp b/src/hotspot/share/gc/g1/g1RemSet.cpp index e89919107b7..cc2fbbe7295 100644 --- a/src/hotspot/share/gc/g1/g1RemSet.cpp +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -910,9 +910,6 @@ void G1RemSet::prepare_region_for_scan(HeapRegion* region) { } void G1RemSet::prepare_for_scan_heap_roots() { - G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); - dcqs.concatenate_logs(); - _scan_state->prepare(); } diff --git a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp index f78971fcc5f..6c06e3d8dff 100644 --- a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp +++ b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,12 +47,10 @@ void G1RemSetSummary::update() { G1ConcurrentRefineThread* crt = static_cast(t); _summary->set_rs_thread_vtime(_counter, crt->vtime_accum()); _counter++; - _summary->_total_concurrent_refined_cards += crt->total_refined_cards(); } } collector(this); G1CollectedHeap* g1h = G1CollectedHeap::heap(); g1h->concurrent_refine()->threads_do(&collector); - _total_mutator_refined_cards = G1BarrierSet::dirty_card_queue_set().total_mutator_refined_cards(); _num_coarsenings = HeapRegionRemSet::n_coarsenings(); set_sampling_thread_vtime(g1h->sampling_thread()->vtime_accum()); @@ -71,8 +69,6 @@ double G1RemSetSummary::rs_thread_vtime(uint thread) const { } G1RemSetSummary::G1RemSetSummary(bool should_update) : - _total_mutator_refined_cards(0), - _total_concurrent_refined_cards(0), _num_coarsenings(0), _num_vtimes(G1ConcurrentRefine::max_num_threads()), _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)), @@ -93,9 +89,6 @@ void G1RemSetSummary::set(G1RemSetSummary* other) { assert(other != NULL, "just checking"); assert(_num_vtimes == other->_num_vtimes, "just checking"); - _total_mutator_refined_cards = other->total_mutator_refined_cards(); - _total_concurrent_refined_cards = other->total_concurrent_refined_cards(); - _num_coarsenings = other->num_coarsenings(); memcpy(_rs_threads_vtimes, other->_rs_threads_vtimes, sizeof(double) * _num_vtimes); @@ -107,9 +100,6 @@ void G1RemSetSummary::subtract_from(G1RemSetSummary* other) { assert(other != NULL, "just checking"); assert(_num_vtimes == other->_num_vtimes, "just checking"); - _total_mutator_refined_cards = other->total_mutator_refined_cards() - _total_mutator_refined_cards; - _total_concurrent_refined_cards = other->total_concurrent_refined_cards() - _total_concurrent_refined_cards; - _num_coarsenings = other->num_coarsenings() - _num_coarsenings; for (uint i = 0; i < _num_vtimes; i++) { @@ -330,14 +320,6 @@ class HRRSStatsIter: public HeapRegionClosure { }; void G1RemSetSummary::print_on(outputStream* out) { - out->print_cr(" Recent concurrent refinement statistics"); - out->print_cr(" Of " SIZE_FORMAT " refined cards:", total_refined_cards()); - out->print_cr(" " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent refinement threads.", - total_concurrent_refined_cards(), - percent_of(total_concurrent_refined_cards(), total_refined_cards())); - out->print_cr(" " SIZE_FORMAT_W(8) " (%5.1f%%) by mutator threads.", - total_mutator_refined_cards(), - percent_of(total_mutator_refined_cards(), total_refined_cards())); out->print_cr(" Did " SIZE_FORMAT " coarsenings.", num_coarsenings()); out->print_cr(" Concurrent refinement threads times (s)"); out->print(" "); diff --git a/src/hotspot/share/gc/g1/g1RemSetSummary.hpp b/src/hotspot/share/gc/g1/g1RemSetSummary.hpp index fea8d4e489c..4208bd32b5c 100644 --- a/src/hotspot/share/gc/g1/g1RemSetSummary.hpp +++ b/src/hotspot/share/gc/g1/g1RemSetSummary.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,10 +33,6 @@ class G1RemSet; // A G1RemSetSummary manages statistical information about the G1RemSet class G1RemSetSummary { -private: - size_t _total_mutator_refined_cards; - size_t _total_concurrent_refined_cards; - size_t _num_coarsenings; size_t _num_vtimes; @@ -70,18 +66,6 @@ class G1RemSetSummary { return _sampling_thread_vtime; } - size_t total_mutator_refined_cards() const { - return _total_mutator_refined_cards; - } - - size_t total_concurrent_refined_cards() const { - return _total_concurrent_refined_cards; - } - - size_t total_refined_cards() const { - return total_mutator_refined_cards() + total_concurrent_refined_cards(); - } - size_t num_coarsenings() const { return _num_coarsenings; } diff --git a/src/hotspot/share/logging/logPrefix.hpp b/src/hotspot/share/logging/logPrefix.hpp index 8652e7b6361..2027bf6bc29 100644 --- a/src/hotspot/share/logging/logPrefix.hpp +++ b/src/hotspot/share/logging/logPrefix.hpp @@ -82,6 +82,7 @@ DEBUG_ONLY(size_t Test_log_prefix_prefixer(char* buf, size_t len);) LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, remset, tracking)) \ LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, ref)) \ LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, ref, start)) \ + LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, refine, stats)) \ LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, reloc)) \ LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, start)) \ LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, symboltable)) \ diff --git a/src/hotspot/share/runtime/mutexLocker.cpp b/src/hotspot/share/runtime/mutexLocker.cpp index de8240b6cf0..badb5f7e19b 100644 --- a/src/hotspot/share/runtime/mutexLocker.cpp +++ b/src/hotspot/share/runtime/mutexLocker.cpp @@ -74,6 +74,7 @@ Monitor* STS_lock = NULL; Monitor* FullGCCount_lock = NULL; Monitor* G1OldGCCount_lock = NULL; Mutex* Shared_DirtyCardQ_lock = NULL; +Mutex* G1DetachedRefinementStats_lock = NULL; Mutex* MarkStackFreeList_lock = NULL; Mutex* MarkStackChunkList_lock = NULL; Mutex* MonitoringSupport_lock = NULL; @@ -212,6 +213,8 @@ void mutex_init() { def(Shared_DirtyCardQ_lock , PaddedMutex , access + 1, true, _safepoint_check_never); + def(G1DetachedRefinementStats_lock, PaddedMutex, leaf , true, _safepoint_check_never); + def(FreeList_lock , PaddedMutex , leaf , true, _safepoint_check_never); def(OldSets_lock , PaddedMutex , leaf , true, _safepoint_check_never); def(RootRegionScan_lock , PaddedMonitor, leaf , true, _safepoint_check_never); @@ -269,7 +272,7 @@ void mutex_init() { def(PerfDataManager_lock , PaddedMutex , leaf, true, _safepoint_check_always); // used for synchronized access to PerfDataManager resources def(Threads_lock , PaddedMonitor, barrier, true, _safepoint_check_always); // Used for safepoint protocol. - def(NonJavaThreadsList_lock , PaddedMutex, leaf, true, _safepoint_check_never); + def(NonJavaThreadsList_lock , PaddedMutex, barrier, true, _safepoint_check_never); def(NonJavaThreadsListSync_lock , PaddedMutex, leaf, true, _safepoint_check_never); def(VMOperationQueue_lock , PaddedMonitor, nonleaf, true, _safepoint_check_never); // VM_thread allowed to block on these diff --git a/src/hotspot/share/runtime/mutexLocker.hpp b/src/hotspot/share/runtime/mutexLocker.hpp index c46b70ffbeb..f618beb9962 100644 --- a/src/hotspot/share/runtime/mutexLocker.hpp +++ b/src/hotspot/share/runtime/mutexLocker.hpp @@ -71,6 +71,7 @@ extern Monitor* G1OldGCCount_lock; // in support of "concurrent" f extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card // queue shared by // non-Java threads. +extern Mutex* G1DetachedRefinementStats_lock; // Lock protecting detached refinement stats extern Mutex* MarkStackFreeList_lock; // Protects access to the global mark stack free list. extern Mutex* MarkStackChunkList_lock; // Protects access to the global mark stack chunk list. extern Mutex* MonitoringSupport_lock; // Protects updates to the serviceability memory pools. diff --git a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java index fbab8e008b0..00dbafe1764 100644 --- a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java +++ b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java @@ -143,6 +143,7 @@ public boolean isAvailable() { new LogMessageWithLevel("Expand Heap After Collection", Level.DEBUG), new LogMessageWithLevel("Region Register", Level.DEBUG), new LogMessageWithLevel("Prepare Heap Roots", Level.DEBUG), + new LogMessageWithLevel("Concatenate Dirty Card Logs", Level.DEBUG), // Free CSet new LogMessageWithLevel("Free Collection Set", Level.DEBUG), new LogMessageWithLevel("Serial Free Collection Set", Level.TRACE), @@ -186,6 +187,7 @@ void checkMessagesAtLevel(OutputAnalyzer output, LogMessageWithLevel messages[], public static void main(String[] args) throws Exception { new TestGCLogMessages().testNormalLogs(); + new TestGCLogMessages().testConcurrentRefinementLogs(); new TestGCLogMessages().testWithToSpaceExhaustionLogs(); new TestGCLogMessages().testWithInitialMark(); new TestGCLogMessages().testExpandHeap(); @@ -221,6 +223,23 @@ private void testNormalLogs() throws Exception { output.shouldHaveExitValue(0); } + LogMessageWithLevel concRefineMessages[] = new LogMessageWithLevel[] { + new LogMessageWithLevel("Mutator refinement: ", Level.DEBUG), + new LogMessageWithLevel("Concurrent refinement: ", Level.DEBUG), + new LogMessageWithLevel("Total refinement: ", Level.DEBUG), + // "Concurrent refinement rate" optionally printed if any. + // "Generate dirty cards rate" optionally printed if any. + }; + + private void testConcurrentRefinementLogs() throws Exception { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC", + "-Xmx10M", + "-Xlog:gc+refine+stats=debug", + GCTest.class.getName()); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + checkMessagesAtLevel(output, concRefineMessages, Level.DEBUG); + } + LogMessageWithLevel exhFailureMessages[] = new LogMessageWithLevel[] { new LogMessageWithLevel("Evacuation Failure", Level.DEBUG), new LogMessageWithLevel("Recalculate Used", Level.TRACE), diff --git a/test/hotspot/jtreg/gc/g1/TestRemsetLoggingTools.java b/test/hotspot/jtreg/gc/g1/TestRemsetLoggingTools.java index 19e11a71804..66114801bff 100644 --- a/test/hotspot/jtreg/gc/g1/TestRemsetLoggingTools.java +++ b/test/hotspot/jtreg/gc/g1/TestRemsetLoggingTools.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -110,7 +110,7 @@ public static void expectPerRegionRSetSummaries(String result, int expectedCumul } public static void expectRSetSummaries(String result, int expectedCumulative, int expectedPeriodic) throws Exception { - int actualTotal = result.split("concurrent refinement statistics").length - 1; + int actualTotal = result.split("Concurrent refinement threads times").length - 1; int actualCumulative = result.split("Cumulative RS summary").length - 1; if (expectedCumulative != actualCumulative) {