Skip to content
Permalink
Browse files
8242078: G1: Improve concurrent refinement analytics and logging
Unify data collection and reporting.

Reviewed-by: tschatzl, sjohanss
  • Loading branch information
Kim Barrett committed Apr 14, 2020
1 parent b98e863 commit 4694da31f8af46a25f84fe2b9483c9d4d654dc0f
Showing 25 changed files with 408 additions and 219 deletions.
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -79,7 +79,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
_prev_collection_pause_end_ms(0.0),
_rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
_concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_dirtied_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
_mixed_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
_young_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -107,7 +107,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
// Start with inverse of maximum STW cost.
_concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]);
// Some applications have very low rates for logging cards.
_logged_cards_rate_ms_seq->add(0.0);
_dirtied_cards_rate_ms_seq->add(0.0);
_young_card_merge_to_scan_ratio_seq->add(young_card_merge_to_scan_ratio_defaults[index]);
_young_cost_per_card_scan_ms_seq->add(young_only_cost_per_card_scan_ms_defaults[index]);

@@ -168,8 +168,8 @@ void G1Analytics::report_concurrent_refine_rate_ms(double cards_per_ms) {
_concurrent_refine_rate_ms_seq->add(cards_per_ms);
}

void G1Analytics::report_logged_cards_rate_ms(double cards_per_ms) {
_logged_cards_rate_ms_seq->add(cards_per_ms);
void G1Analytics::report_dirtied_cards_rate_ms(double cards_per_ms) {
_dirtied_cards_rate_ms_seq->add(cards_per_ms);
}

void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_gc) {
@@ -236,8 +236,8 @@ double G1Analytics::predict_concurrent_refine_rate_ms() const {
return predict_zero_bounded(_concurrent_refine_rate_ms_seq);
}

double G1Analytics::predict_logged_cards_rate_ms() const {
return predict_zero_bounded(_logged_cards_rate_ms_seq);
double G1Analytics::predict_dirtied_cards_rate_ms() const {
return predict_zero_bounded(_dirtied_cards_rate_ms_seq);
}

double G1Analytics::predict_young_card_merge_to_scan_ratio() const {
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,7 @@ class G1Analytics: public CHeapObj<mtGC> {

TruncatedSeq* _rs_length_diff_seq;
TruncatedSeq* _concurrent_refine_rate_ms_seq;
TruncatedSeq* _logged_cards_rate_ms_seq;
TruncatedSeq* _dirtied_cards_rate_ms_seq;
// The ratio between the number of merged cards and actually scanned cards, for
// young-only and mixed gcs.
TruncatedSeq* _young_card_merge_to_scan_ratio_seq;
@@ -115,7 +115,7 @@ class G1Analytics: public CHeapObj<mtGC> {
void report_concurrent_mark_cleanup_times_ms(double ms);
void report_alloc_rate_ms(double alloc_rate);
void report_concurrent_refine_rate_ms(double cards_per_ms);
void report_logged_cards_rate_ms(double cards_per_ms);
void report_dirtied_cards_rate_ms(double cards_per_ms);
void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc);
void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc);
void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc);
@@ -131,7 +131,7 @@ class G1Analytics: public CHeapObj<mtGC> {
int num_alloc_rate_ms() const;

double predict_concurrent_refine_rate_ms() const;
double predict_logged_cards_rate_ms() const;
double predict_dirtied_cards_rate_ms() const;
double predict_young_card_merge_to_scan_ratio() const;

double predict_mixed_card_merge_to_scan_ratio() const;
@@ -157,5 +157,5 @@ void G1BarrierSet::on_thread_detach(Thread* thread) {
// Flush any deferred card marks.
CardTableBarrierSet::on_thread_detach(thread);
G1ThreadLocalData::satb_mark_queue(thread).flush();
G1ThreadLocalData::dirty_card_queue(thread).flush();
G1ThreadLocalData::dirty_card_queue(thread).on_thread_detach();
}
@@ -1054,10 +1054,11 @@ void G1CollectedHeap::abort_refinement() {
_hot_card_cache->reset_hot_cache();
}

// Discard all remembered set updates.
// Discard all remembered set updates and reset refinement statistics.
G1BarrierSet::dirty_card_queue_set().abandon_logs();
assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0,
"DCQS should be empty");
concurrent_refine()->get_and_reset_refinement_stats();
}

void G1CollectedHeap::verify_after_full_collection() {
@@ -2684,9 +2685,22 @@ void G1CollectedHeap::gc_prologue(bool full) {
}

// Fill TLAB's and such
double start = os::elapsedTime();
ensure_parsability(true);
phase_times()->record_prepare_tlab_time_ms((os::elapsedTime() - start) * 1000.0);
{
Ticks start = Ticks::now();
ensure_parsability(true);
Tickspan dt = Ticks::now() - start;
phase_times()->record_prepare_tlab_time_ms(dt.seconds() * MILLIUNITS);
}

if (!full) {
// Flush dirty card queues to qset, so later phases don't need to account
// for partially filled per-thread queues and such. Not needed for full
// collections, which ignore those logs.
Ticks start = Ticks::now();
G1BarrierSet::dirty_card_queue_set().concatenate_logs();
Tickspan dt = Ticks::now() - start;
phase_times()->record_concatenate_dirty_card_logs_time_ms(dt.seconds() * MILLIUNITS);
}
}

void G1CollectedHeap::gc_epilogue(bool full) {
@@ -2759,20 +2773,6 @@ void G1CollectedHeap::do_concurrent_mark() {
}
}

size_t G1CollectedHeap::pending_card_num() {
struct CountCardsClosure : public ThreadClosure {
size_t _cards;
CountCardsClosure() : _cards(0) {}
virtual void do_thread(Thread* t) {
_cards += G1ThreadLocalData::dirty_card_queue(t).size();
}
} count_from_threads;
Threads::threads_do(&count_from_threads);

G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
return dcqs.num_cards() + count_from_threads._cards;
}

bool G1CollectedHeap::is_potential_eager_reclaim_candidate(HeapRegion* r) const {
// We don't nominate objects with many remembered set entries, on
// the assumption that such objects are likely still live.
@@ -1465,8 +1465,6 @@ class G1CollectedHeap : public CollectedHeap {

// Used to print information about locations in the hs_err file.
virtual bool print_location(outputStream* st, void* addr) const;

size_t pending_card_num();
};

class G1ParEvacuateFollowersClosure : public VoidClosure {
@@ -408,20 +408,18 @@ void G1ConcurrentRefine::adjust(double logged_cards_scan_time,
dcqs.notify_if_necessary();
}

G1ConcurrentRefine::RefinementStats G1ConcurrentRefine::total_refinement_stats() const {
struct CollectData : public ThreadClosure {
Tickspan _total_time;
size_t _total_cards;
CollectData() : _total_time(), _total_cards(0) {}
G1ConcurrentRefineStats G1ConcurrentRefine::get_and_reset_refinement_stats() {
struct CollectStats : public ThreadClosure {
G1ConcurrentRefineStats _total_stats;
virtual void do_thread(Thread* t) {
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
_total_time += crt->total_refinement_time();
_total_cards += crt->total_refined_cards();
G1ConcurrentRefineStats& stats = *crt->refinement_stats();
_total_stats += stats;
stats.reset();
}
} collector;
// Cast away const so we can call non-modifying closure on threads.
const_cast<G1ConcurrentRefine*>(this)->threads_do(&collector);
return RefinementStats(collector._total_time, collector._total_cards);
threads_do(&collector);
return collector._total_stats;
}

size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const {
@@ -445,7 +443,7 @@ void G1ConcurrentRefine::maybe_activate_more_threads(uint worker_id, size_t num_
}

bool G1ConcurrentRefine::do_refinement_step(uint worker_id,
size_t* total_refined_cards) {
G1ConcurrentRefineStats* stats) {
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();

size_t curr_cards = dcqs.num_cards();
@@ -460,5 +458,5 @@ bool G1ConcurrentRefine::do_refinement_step(uint worker_id,
// Process the next buffer, if there are enough left.
return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(),
deactivation_threshold(worker_id),
total_refined_cards);
stats);
}
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
#ifndef SHARE_GC_G1_G1CONCURRENTREFINE_HPP
#define SHARE_GC_G1_G1CONCURRENTREFINE_HPP

#include "gc/g1/g1ConcurrentRefineStats.hpp"
#include "memory/allocation.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/ticks.hpp"
@@ -119,22 +120,18 @@ class G1ConcurrentRefine : public CHeapObj<mtGC> {
// Adjust refinement thresholds based on work done during the pause and the goal time.
void adjust(double logged_cards_scan_time, size_t processed_logged_cards, double goal_ms);

struct RefinementStats {
Tickspan _time;
size_t _cards;
RefinementStats(Tickspan time, size_t cards) : _time(time), _cards(cards) {}
};

RefinementStats total_refinement_stats() const;
// Return total of concurrent refinement stats for the
// ConcurrentRefineThreads. Also reset the stats for the threads.
G1ConcurrentRefineStats get_and_reset_refinement_stats();

// Cards in the dirty card queue set.
size_t activation_threshold(uint worker_id) const;
size_t deactivation_threshold(uint worker_id) const;

// Perform a single refinement step; called by the refinement
// threads. Returns true if there was refinement work available.
// Increments *total_refined_cards.
bool do_refinement_step(uint worker_id, size_t* total_refined_cards);
// Updates stats.
bool do_refinement_step(uint worker_id, G1ConcurrentRefineStats* stats);

// Iterate over all concurrent refinement threads applying the given closure.
void threads_do(ThreadClosure *tc);
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "gc/g1/g1ConcurrentRefineStats.hpp"

G1ConcurrentRefineStats::G1ConcurrentRefineStats() :
_refinement_time(),
_refined_cards(0),
_precleaned_cards(0),
_dirtied_cards(0)
{}

G1ConcurrentRefineStats&
G1ConcurrentRefineStats::operator+=(const G1ConcurrentRefineStats& other) {
_refinement_time += other._refinement_time;
_refined_cards += other._refined_cards;
_precleaned_cards += other._precleaned_cards;
_dirtied_cards += other._dirtied_cards;
return *this;
}

template<typename T>
static T clipped_sub(T x, T y) {
return (x < y) ? T() : (x - y);
}

G1ConcurrentRefineStats&
G1ConcurrentRefineStats::operator-=(const G1ConcurrentRefineStats& other) {
_refinement_time = clipped_sub(_refinement_time, other._refinement_time);
_refined_cards = clipped_sub(_refined_cards, other._refined_cards);
_precleaned_cards = clipped_sub(_precleaned_cards, other._precleaned_cards);
_dirtied_cards = clipped_sub(_dirtied_cards, other._dirtied_cards);
return *this;
}

void G1ConcurrentRefineStats::reset() {
*this = G1ConcurrentRefineStats();
}
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef SHARE_GC_G1_G1CONCURRENTREFINESTATS_HPP
#define SHARE_GC_G1_G1CONCURRENTREFINESTATS_HPP

#include "memory/allocation.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/ticks.hpp"

// Collection of statistics for concurrent refinement processing.
// Used for collecting per-thread statistics and for summaries over a
// collection of threads.
class G1ConcurrentRefineStats : public CHeapObj<mtGC> {
Tickspan _refinement_time;
size_t _refined_cards;
size_t _precleaned_cards;
size_t _dirtied_cards;

public:
G1ConcurrentRefineStats();

// Time spent performing concurrent refinement.
Tickspan refinement_time() const { return _refinement_time; }

// Number of refined cards.
size_t refined_cards() const { return _refined_cards; }

// Number of cards for which refinement was skipped because some other
// thread had already refined them.
size_t precleaned_cards() const { return _precleaned_cards; }

// Number of cards marked dirty and in need of refinement.
size_t dirtied_cards() const { return _dirtied_cards; }

void inc_refinement_time(Tickspan t) { _refinement_time += t; }
void inc_refined_cards(size_t cards) { _refined_cards += cards; }
void inc_precleaned_cards(size_t cards) { _precleaned_cards += cards; }
void inc_dirtied_cards(size_t cards) { _dirtied_cards += cards; }

G1ConcurrentRefineStats& operator+=(const G1ConcurrentRefineStats& other);
G1ConcurrentRefineStats& operator-=(const G1ConcurrentRefineStats& other);

friend G1ConcurrentRefineStats operator+(G1ConcurrentRefineStats x,
const G1ConcurrentRefineStats& y) {
return x += y;
}

friend G1ConcurrentRefineStats operator-(G1ConcurrentRefineStats x,
const G1ConcurrentRefineStats& y) {
return x -= y;
}

void reset();
};

#endif // SHARE_GC_G1_G1CONCURRENTREFINESTATS_HPP

0 comments on commit 4694da3

Please sign in to comment.