Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8272773: Configurable card table card size #5838

Closed
wants to merge 12 commits into from
Closed
@@ -28,6 +28,11 @@
#include "gc/shared/memset_with_concurrent_readers.hpp"
#include "logging/log.hpp"

uint G1CardTable::min_card_size(size_t region_size) {
int region_size_log = exact_log2_long((jlong)region_size);
return 1 << (region_size_log - LogOfMaxCardsPerRegion);
}

void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
CardValue *const first = byte_for(mr.start());
CardValue *const last = byte_after(mr.last());
@@ -26,6 +26,7 @@
#define SHARE_GC_G1_G1CARDTABLE_HPP

#include "gc/g1/g1RegionToSpaceMapper.hpp"
#include "gc/g1/g1CardSetContainers.hpp"
#include "gc/shared/cardTable.hpp"
#include "oops/oopsHierarchy.hpp"
#include "utilities/macros.hpp"
@@ -75,6 +76,7 @@ class G1CardTable : public CardTable {

static const size_t WordAllClean = SIZE_MAX;
static const size_t WordAllDirty = 0;
static const size_t LogOfMaxCardsPerRegion = BitsPerByte * sizeof(G1CardSetArray::EntryDataType);
Copy link
Contributor Author

@vish-chan vish-chan Oct 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tschatzl Do we need this code once JDK-8275056 (Virtualize G1CardSet containers over heap region) gets integrated? From my understanding, this won't be a restriction from heap region point of view. Do I need to remove this and the changes in g1CardTable.cpp ?

Copy link
Contributor

@tschatzl tschatzl Oct 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. The change you will need will be very similar to 70dcca6 (slightly modified because of review comments); but please wait until it is in. No hurry though, we'll need to complete the CSR before we can integrate anyway.

Thomas

Copy link
Contributor Author

@vish-chan vish-chan Oct 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay, got it.


STATIC_ASSERT(BitsPerByte == 8);
static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned;
@@ -86,6 +88,8 @@ class G1CardTable : public CardTable {
static CardValue g1_young_card_val() { return g1_young_gen; }
static CardValue g1_scanned_card_val() { return g1_card_already_scanned; }

static uint min_card_size(size_t region_size);

void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN;
void g1_mark_as_young(const MemRegion& mr);

@@ -1661,7 +1661,7 @@ jint G1CollectedHeap::initialize() {

// The G1FromCardCache reserves card with value 0 as "invalid", so the heap must not
// start within the first card.
guarantee(heap_rs.base() >= (char*)G1CardTable::card_size, "Java heap must not start within the first card.");
guarantee(heap_rs.base() >= (char*)(uintptr_t)(G1CardTable::card_size), "Java heap must not start within the first card.");
Copy link
Member

@albertnetymk albertnetymk Nov 18, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about (uintptr_t)heap_rs.base() >= G1CardTable::card_size? Converting an arbitrary integer to a pointer is implementation-defined.

G1FromCardCache::initialize(max_reserved_regions());
// Also create a G1 rem set.
_rem_set = new G1RemSet(this, _card_table, _hot_card_cache);
@@ -91,6 +91,10 @@ void HeapRegion::setup_heap_region_size(size_t max_heap_size) {
guarantee(GrainWords == 0, "we should only set it once");
GrainWords = GrainBytes >> LogHeapWordSize;

// Initialize card size based on the region size.
// Maximum no. of cards per region is 2^16.
CardTable::initialize_card_size(G1CardTable::min_card_size(region_size));

guarantee(CardsPerRegion == 0, "we should only set it once");
CardsPerRegion = GrainBytes >> G1CardTable::card_shift;

@@ -31,11 +31,21 @@
#include "services/memTracker.hpp"
#include "utilities/align.hpp"

uint ObjectStartArray::block_shift = 0;
uint ObjectStartArray::block_size = 0;
uint ObjectStartArray::block_size_in_words = 0;

void ObjectStartArray::initialize_block_size(uint card_shift) {
block_shift = card_shift;
block_size = 1 << block_shift;
block_size_in_words = block_size / sizeof(HeapWord);
}

void ObjectStartArray::initialize(MemRegion reserved_region) {
// We're based on the assumption that we use the same
// size blocks as the card table.
assert((int)block_size == (int)CardTable::card_size, "Sanity");
assert((int)block_size <= 512, "block_size must be less than or equal to 512");
assert(block_size <= MaxBlockSize, "block_size must be less than or equal to " UINT32_FORMAT, MaxBlockSize);

// Calculate how much space must be reserved
_reserved_region = reserved_region;
@@ -52,11 +52,16 @@ class ObjectStartArray : public CHeapObj<mtGC> {
clean_block = -1
};

enum BlockSizeConstants {
block_shift = 9,
block_size = 1 << block_shift,
block_size_in_words = block_size / sizeof(HeapWord)
};
static uint block_shift;
static uint block_size;
static uint block_size_in_words;
Copy link
Contributor

@tschatzl tschatzl Oct 7, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the naming guideline for static members is the same as for regular class members, i.e. use an underscore in front. Maybe wait on changing this for another person to comment on (I did not look up the style guide).
We do not use public static members too often :)

Copy link
Contributor Author

@vish-chan vish-chan Oct 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I've kept the names of all the pre-existing constants (which are now variables) same as before. Otherwise we have to change all the references also.

Copy link
Contributor

@tschatzl tschatzl Oct 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's see what the second reviewer thinks. Otherwise this can be done separately as it might hide the interesting part of this change.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a review (yet), just a response to these questions.

Public data members are very rare in HotSpot code. There are a few POD/standard layout classes (examples include G1BufferNodeList and OopStorage::BasicParState::IterationData), but otherwise accessor functions are preferred. The style-guide says
"Use public accessor functions for member variables accessed outside the class."

The style-guide says this:
"Class data member names have a leading underscore, and use lowercase with words separated by a single underscore (_foo_bar)."
It makes no distinction for public or not, or for static or not. That is intentional, based on some discussion about that question when I was updating the style-guide a while ago.

Of course, there are probably counter-examples to both of these lurking in the code base.

Copy link
Contributor

@tschatzl tschatzl Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer if this introduction of getters were moved to an extra CR, but I'm good to do that now as well; not so much a problem with the block_* ones, more with the ones in CardTable that add lots of noise.


// This maximum is derived from that we need an extra bit for possible
// offsets in the byte for backskip values (this is a hard limit)
Copy link
Contributor

@tschatzl tschatzl Oct 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// This maximum is derived from that we need an extra bit for possible
// offsets in the byte for backskip values (this is a hard limit)
// Maximum size an offset table entry can cover. This maximum is derived from that we need an extra bit for possible
// offsets in the byte for backskip values, leaving 2^7 possible offsets. Mininum object alignment is 8 bytes (2^3), so we can at most represent 2^10 offsets within a BOT value.

(We could make this maximum dependent on ObjectAlignmentInBytes, but I do not know if increasing ObjectAlignmentInBytes gives any advantage. So at most defer this investigation to some other time as I'm already keeping busy some machines)

Copy link
Contributor

@tschatzl tschatzl Nov 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it is interesting to make this size depend on current ObjectAlignmentInBytes after all. I've seen some recent testing where it proved useful. However that may be done as a separate change.
It also seems that reducing the size of the BOT/card table (increasing this maximum) has diminishing returns e.g. wrt to Card Table Clear times.

Maybe it is interesting for you to try out even larger card sizes though (e.g. an ObjectAlignmentInBytes value of 16 should allow a card size of 2048 and so on).

Copy link
Contributor Author

@vish-chan vish-chan Nov 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. I'll explore this and share the results.

static const uint MaxBlockSize = 1024;

// Initialize block size based on card size
static void initialize_block_size(uint card_shift);

protected:

@@ -97,6 +97,8 @@ static size_t default_gen_alignment() {
}

void ParallelArguments::initialize_alignments() {
// Initialize card size before initializing alignments
CardTable::initialize_card_size();
SpaceAlignment = GenAlignment = default_gen_alignment();
HeapAlignment = compute_heap_alignment();
}
@@ -33,6 +33,18 @@
#include "runtime/java.hpp"
#include "services/memTracker.hpp"

uint BOTConstants::LogN = 0;
uint BOTConstants::LogN_words = 0;
uint BOTConstants::N_bytes = 0;
uint BOTConstants::N_words = 0;

void BOTConstants::initialize_bot_size(uint card_shift) {
LogN = card_shift;
LogN_words = LogN - LogHeapWordSize;
N_bytes = 1 << LogN;
N_words = 1 << LogN_words;
}

//////////////////////////////////////////////////////////////////////
// BlockOffsetSharedArray
//////////////////////////////////////////////////////////////////////
@@ -27,6 +27,7 @@

#include "gc/shared/gc_globals.hpp"
#include "gc/shared/memset_with_concurrent_readers.hpp"
#include "gc/shared/cardTable.hpp"
#include "memory/allocation.hpp"
#include "memory/memRegion.hpp"
#include "memory/virtualspace.hpp"
@@ -49,16 +50,20 @@ class ContiguousSpace;

class BOTConstants : public AllStatic {
public:
static const uint LogN = 9;
static const uint LogN_words = LogN - LogHeapWordSize;
static const uint N_bytes = 1 << LogN;
static const uint N_words = 1 << LogN_words;
static uint LogN;
static uint LogN_words;
static uint N_bytes;
static uint N_words;

// entries "e" of at least N_words mean "go back by Base^(e-N_words)."
// All entries are less than "N_words + N_powers".
static const uint LogBase = 4;
static const uint Base = (1 << LogBase);
static const uint N_powers = 14;

// Initialize bot size based on card size
static void initialize_bot_size(uint card_shift);

static size_t power_to_cards_back(uint i) {
return (size_t)1 << (LogBase * i);
}
@@ -93,6 +98,7 @@ class BlockOffsetTable {
BlockOffsetTable(HeapWord* bottom, HeapWord* end):
_bottom(bottom), _end(end) {
assert(_bottom <= _end, "arguments out of order");
assert(BOTConstants::N_bytes == CardTable::card_size, "sanity");
}

// Note that the committed size of the covered space may have changed,
@@ -26,13 +26,48 @@
#include "gc/shared/cardTable.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/space.inline.hpp"
#include "gc/shared/gcLogPrecious.hpp"
#include "gc/parallel/objectStartArray.hpp"
Copy link
Contributor

@tschatzl tschatzl Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This include is in the wrong order and should probably be guarded by #if INCLUDE_PARALLELGC.

#include "logging/log.hpp"
#include "memory/virtualspace.hpp"
#include "runtime/java.hpp"
#include "runtime/os.hpp"
#include "runtime/globals_extension.hpp"
Copy link
Contributor

@tschatzl tschatzl Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we typically include the globals include that contains the flag - this should probably be #include "gc/shared/gc_globals.hpp (and sorted correctly in this list).

#include "services/memTracker.hpp"
#include "utilities/align.hpp"

uint CardTable::card_shift = 0;
uint CardTable::card_size = 0;
uint CardTable::card_size_in_words = 0;

void CardTable::initialize_card_size(uint min_card_size) {
assert(UseG1GC || UseParallelGC || UseSerialGC,
"Initialize card size should only be called by card based collectors.");

// Card size is the max. of minimum permissible value and GCCardSizeInBytes
Copy link
Contributor

@tschatzl tschatzl Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Outdated comment. Remove.

card_size = MAX2(min_card_size, GCCardSizeInBytes);
card_shift = log2i_exact(card_size);
card_size_in_words = card_size / sizeof(HeapWord);

// Set blockOffsetTable size based on card table entry size
BOTConstants::initialize_bot_size(card_shift);

#if INCLUDE_PARALLELGC
// Set ObjectStartArray block size based on card table entry size
ObjectStartArray::initialize_block_size(card_shift);
#endif

if (GCCardSizeInBytes != card_size) {
FLAG_SET_ERGO(GCCardSizeInBytes, card_size);
}

Copy link
Contributor

@tschatzl tschatzl Oct 7, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would also be unnecessary with a constraint function that limits the values of the option.

Copy link
Contributor Author

@vish-chan vish-chan Oct 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FLAG_SET_ERGO is needed for G1 GC. In case of G1, min card size is determined by the region size.
For the following cmdline arguments: -XX:G1HeapRegionSize=32M -XX:GCCardSizeInBytes=256
---- card_size is ergonomically set to 512 instead of 256, as min. card size for 32M region is 512.

Copy link
Contributor

@tschatzl tschatzl Oct 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's leave this question out for now: I am not sure it makes sense to allow a card size of 256; however the common reaction when specifying incompatible command line options would be to bail out.
The question is what to do if the user selects GCCardSizeInBytes=256 and G1 ergonomically decides to use 32m regions. Silently use 16m regions, which are very likely worse than 32m regions in that case?
That is why we should first determine if card size of 256 makes sense somewhere and then decide (or somebody else has a particular opinion on this).

Copy link
Contributor

@tschatzl tschatzl Oct 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That same question will come up if we allow 64m regions later, but the step from 64->32m may not be that big. Another option would be to go away from the 1:1 mapping of cards in the remembered set and cards in the card table. This could probably be fixed, but is a completely different issue of course.

Copy link
Contributor Author

@vish-chan vish-chan Oct 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this current implementation, with G1 GC, card_size is initialized after determining the region_size inside setup_heap_region_size(). card_size value is determined as MAX(GCCardSizeInByte, minimum card size for the region size). So, if user selects GCCardSizeInBytes=256 and G1 ergonomically decides to use 32M regions, card_size is silently set to 512b. And, if user selects GCCardSizeInBytes=1024 and G1 ergonomically decides to use 32M regions, card_size stays at 1024b. In case user doesn't specify any card_size, default value of 512b is used. We can set it ergonomically, but thats a completely new problem.
With Parallel and Serial GC, we directly use GCCardSizeInBytes, as there's no dependency.

Copy link
Contributor

@tschatzl tschatzl Oct 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand the code, but the problems are elsewhere:

  • this restriction is G1 only, but the code handling this situation is in common code (in the common cardTable.cpp, not in e.g. g1CardTable*). I do not have an extremely strong opinion about this at the moment.
  • the code overrides a user decision silently, which is not a good idea imho. The typical solution is that other settings that the user did not explicitly specified try to adapt to these. However in this case, doing that would be in almost all cases disadvantageous (I guess). Let's see what other people think about this.

log_info_p(gc, init)("CardTable entry size: " UINT32_FORMAT, card_size);
}

void CardTable::initialize_card_size() {
initialize_card_size(CardSizeMin);
}

size_t CardTable::compute_byte_map_size() {
assert(_guard_index == cards_required(_whole_heap.word_size()) - 1,
"uninitialized, check declaration order");
@@ -41,6 +76,7 @@ size_t CardTable::compute_byte_map_size() {
return align_up(_guard_index + 1, MAX2(_page_size, granularity));
}


CardTable::CardTable(MemRegion whole_heap) :
_whole_heap(whole_heap),
_guard_index(0),
@@ -56,8 +92,6 @@ CardTable::CardTable(MemRegion whole_heap) :
{
assert((uintptr_t(_whole_heap.start()) & (card_size - 1)) == 0, "heap must start at card boundary");
assert((uintptr_t(_whole_heap.end()) & (card_size - 1)) == 0, "heap must end at card boundary");

assert(card_size <= 512, "card_size must be less than 512"); // why?
}

CardTable::~CardTable() {
@@ -428,7 +462,8 @@ MemRegion CardTable::dirty_card_range_after_reset(MemRegion mr,
}

uintx CardTable::ct_max_alignment_constraint() {
return card_size * os::vm_page_size();
// CardTable max alignment is computed with card_size_max
return CardSizeMax * os::vm_page_size();
}

void CardTable::verify_guard() {
@@ -228,17 +228,25 @@ class CardTable: public CHeapObj<mtGC> {
MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset,
int reset_val);

// Constants
enum SomePublicConstants {
card_shift = 9,
card_size = 1 << card_shift,
card_size_in_words = card_size / sizeof(HeapWord)
};
// CardTable entry size
static uint card_shift;
static uint card_size;
static uint card_size_in_words;

// min and max permissible card sizes
static const uint CardSizeMin = 512;
static const uint CardSizeMax = 1024;

static constexpr CardValue clean_card_val() { return clean_card; }
static constexpr CardValue dirty_card_val() { return dirty_card; }
static intptr_t clean_card_row_val() { return clean_card_row; }

// Initialize card size based on the min_card_size value
static void initialize_card_size(uint min_card_size);

//Initialize card size based on CardTable::card_size_min
static void initialize_card_size();

// Card marking array base (adjusted for heap low boundary)
// This would be the 0th element of _byte_map, if the heap started at 0x0.
// But since the heap starts at some higher address, this points to somewhere
@@ -692,9 +692,13 @@
product(uintx, GCDrainStackTargetSize, 64, \
"Number of entries we will try to leave on the stack " \
"during parallel gc") \
range(0, max_juint)

// end of GC_FLAGS
range(0, max_juint) \
\
product(uint, GCCardSizeInBytes, 512, \
"Card table entry size (in bytes) for card based collectors") \
range(512, 1024) \
constraint(GCCardSizeInBytesConstraintFunc, AfterErgo) \
Copy link
Contributor

@tschatzl tschatzl Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
constraint(GCCardSizeInBytesConstraintFunc, AfterErgo) \
constraint(GCCardSizeInBytesConstraintFunc,AtParse)

Before AfterErgo there should be no space as in other constraints in this file. Also, since it is the last item in the macro, there is no need to have the \ at the very end.
If this constraint were made AtParse as suggested, then we can use the actual value of GCCardSize in the ct_alignment_constraint() function, not requiring a minimum heap size of 4MB (on x86), i.e. a doubling of minimum heap size.

// end of GC_FLAGS
Copy link
Contributor

@tschatzl tschatzl Oct 7, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See e.g. MaxTenuringThreshold for how adding a constraint function works.


DECLARE_FLAGS(GC_FLAGS)

@@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "gc/shared/genArguments.hpp"
#include "gc/shared/generation.hpp"
#include "gc/shared/cardTable.hpp"
#include "logging/log.hpp"
#include "runtime/globals_extension.hpp"
#include "runtime/java.hpp"
@@ -61,6 +62,8 @@ static size_t bound_minus_alignment(size_t desired_size,
}

void GenArguments::initialize_alignments() {
// Initialize card size before initializing alignments
CardTable::initialize_card_size();
SpaceAlignment = GenAlignment = (size_t)Generation::GenGrain;
HeapAlignment = compute_heap_alignment();
}
@@ -424,3 +424,17 @@ JVMFlag::Error MaxMetaspaceSizeConstraintFunc(size_t value, bool verbose) {
}
}

JVMFlag::Error GCCardSizeInBytesConstraintFunc(uint value, bool verbose) {
if(!(UseG1GC || UseParallelGC || UseSerialGC))
return JVMFlag::SUCCESS;
Copy link
Contributor

@tschatzl tschatzl Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this check - if we are doing the constraint check at parse time, the Use***GC flags have not been set. It does not hurt to do the unnecessary check either even a GC does not support that option.

if (!is_power_of_2(value)) {
JVMFlag::printError(verbose,
"GCCardSizeInBytes (" UINT32_FORMAT ") must be "
Copy link
Contributor

@tschatzl tschatzl Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use %u instead of UINT32_FORMAT.

"a power of 2\n",
value);
return JVMFlag::VIOLATES_CONSTRAINT;
} else {
return JVMFlag::SUCCESS;
}
}

@@ -66,7 +66,8 @@
f(uintx, TLABWasteIncrementConstraintFunc) \
f(uintx, SurvivorRatioConstraintFunc) \
f(size_t, MetaspaceSizeConstraintFunc) \
f(size_t, MaxMetaspaceSizeConstraintFunc)
f(size_t, MaxMetaspaceSizeConstraintFunc) \
f(uint, GCCardSizeInBytesConstraintFunc)

SHARED_GC_CONSTRAINTS(DECLARE_CONSTRAINT)