Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8273626: G1: Factor out concurrent segmented array from G1CardSetAllocator #5478

@@ -32,7 +32,6 @@
#include "utilities/lockFreeStack.hpp"

class G1CardSetAllocOptions;
class G1CardSetBufferList;
class G1CardSetHashTable;
class G1CardSetHashTableValue;
class G1CardSetMemoryManager;
@@ -31,8 +31,6 @@
#include "utilities/growableArray.hpp"
#include "utilities/ticks.hpp"

class G1CardSetBuffer;

// Task handling deallocation of free card set memory.
class G1CardSetFreeMemoryTask : public G1ServiceTask {

@@ -30,99 +30,20 @@
#include "utilities/formatBuffer.hpp"
#include "utilities/ostream.hpp"

G1CardSetBuffer::G1CardSetBuffer(uint elem_size, uint num_instances, G1CardSetBuffer* next) :
_elem_size(elem_size), _num_elems(num_instances), _next(next), _next_allocate(0) {

_buffer = NEW_C_HEAP_ARRAY(char, (size_t)_num_elems * elem_size, mtGCCardSet);
}

G1CardSetBuffer::~G1CardSetBuffer() {
FREE_C_HEAP_ARRAY(mtGCCardSet, _buffer);
}

void* G1CardSetBuffer::get_new_buffer_elem() {
if (_next_allocate >= _num_elems) {
return nullptr;
}
uint result = Atomic::fetch_and_add(&_next_allocate, 1u, memory_order_relaxed);
if (result >= _num_elems) {
return nullptr;
}
void* r = _buffer + (uint)result * _elem_size;
return r;
}

void G1CardSetBufferList::bulk_add(G1CardSetBuffer& first, G1CardSetBuffer& last, size_t num, size_t mem_size) {
_list.prepend(first, last);
Atomic::add(&_num_buffers, num, memory_order_relaxed);
Atomic::add(&_mem_size, mem_size, memory_order_relaxed);
}

void G1CardSetBufferList::print_on(outputStream* out, const char* prefix) {
out->print_cr("%s: buffers %zu size %zu", prefix, Atomic::load(&_num_buffers), Atomic::load(&_mem_size));
}

G1CardSetBuffer* G1CardSetBufferList::get() {
GlobalCounter::CriticalSection cs(Thread::current());

G1CardSetBuffer* result = _list.pop();
if (result != nullptr) {
Atomic::dec(&_num_buffers, memory_order_relaxed);
Atomic::sub(&_mem_size, result->mem_size(), memory_order_relaxed);
}
return result;
}

G1CardSetBuffer* G1CardSetBufferList::get_all(size_t& num_buffers, size_t& mem_size) {
GlobalCounter::CriticalSection cs(Thread::current());

G1CardSetBuffer* result = _list.pop_all();
num_buffers = Atomic::load(&_num_buffers);
mem_size = Atomic::load(&_mem_size);

if (result != nullptr) {
Atomic::sub(&_num_buffers, num_buffers, memory_order_relaxed);
Atomic::sub(&_mem_size, mem_size, memory_order_relaxed);
}
return result;
}

void G1CardSetBufferList::free_all() {
size_t num_freed = 0;
size_t mem_size_freed = 0;
G1CardSetBuffer* cur;

while ((cur = _list.pop()) != nullptr) {
mem_size_freed += cur->mem_size();
num_freed++;
delete cur;
}

Atomic::sub(&_num_buffers, num_freed, memory_order_relaxed);
Atomic::sub(&_mem_size, mem_size_freed, memory_order_relaxed);
}

template <class Elem>
G1CardSetAllocator<Elem>::G1CardSetAllocator(const char* name,
const G1CardSetAllocOptions& buffer_options,
G1CardSetBufferList* free_buffer_list) :
_alloc_options(buffer_options),
_first(nullptr),
_last(nullptr),
_num_buffers(0),
_mem_size(0),
_free_buffer_list(free_buffer_list),
G1SegmentedArray<Elem, mtGCCardSet>(name, buffer_options, free_buffer_list),
_transfer_lock(false),
_free_nodes_list(),
_pending_nodes_list(),
_num_pending_nodes(0),
_num_free_nodes(0),
_num_allocated_nodes(0),
_num_available_nodes(0)
_num_free_nodes(0)
{
assert(elem_size() >= sizeof(G1CardSetContainer), "Element instance size %u for allocator %s too small",
elem_size(), name);
assert(_free_buffer_list != nullptr, "precondition!");
uint elem_size = G1SegmentedArray<Elem, mtGCCardSet>::elem_size();
assert(elem_size >= sizeof(G1CardSetContainer), "Element instance size %u for allocator %s too small", elem_size, name);
}

template <class Elem>
@@ -164,7 +85,7 @@ bool G1CardSetAllocator<Elem>::try_transfer_pending() {
template <class Elem>
void G1CardSetAllocator<Elem>::free(Elem* elem) {
assert(elem != nullptr, "precondition");
assert(elem_size() >= sizeof(G1CardSetContainer), "size mismatch");
// assert(elem_size() >= sizeof(G1CardSetContainer), "size mismatch");
Hamlin-Li marked this conversation as resolved.
Show resolved Hide resolved
// Desired minimum transfer batch size. There is relatively little
// importance to the specific number. It shouldn't be too big, else
// we're wasting space when the release rate is low. If the release
@@ -192,47 +113,27 @@ template <class Elem>
void G1CardSetAllocator<Elem>::drop_all() {
_free_nodes_list.pop_all();
_pending_nodes_list.pop_all();
G1CardSetBuffer* cur = Atomic::load_acquire(&_first);

if (cur != nullptr) {
assert(_last != nullptr, "If there is at least one element, there must be a last one.");

G1CardSetBuffer* first = cur;
#ifdef ASSERT
// Check list consistency.
G1CardSetBuffer* last = cur;
uint num_buffers = 0;
size_t mem_size = 0;
while (cur != nullptr) {
mem_size += cur->mem_size();
num_buffers++;

G1CardSetBuffer* next = cur->next();
last = cur;
cur = next;
}
#endif
assert(num_buffers == _num_buffers, "Buffer count inconsistent %u %u", num_buffers, _num_buffers);
assert(mem_size == _mem_size, "Memory size inconsistent");
assert(last == _last, "Inconsistent last element");

_free_buffer_list->bulk_add(*first, *_last, _num_buffers, _mem_size);
}

_first = nullptr;
_last = nullptr;
_num_available_nodes = 0;
_num_allocated_nodes = 0;
_num_pending_nodes = 0;
_num_buffers = 0;
_mem_size = 0;
_num_free_nodes = 0;

G1SegmentedArray<Elem, mtGCCardSet>::drop_all();
}

template <class Elem>
void G1CardSetAllocator<Elem>::print(outputStream* os) {
uint num_allocated_nodes = G1SegmentedArray<Elem, mtGCCardSet>::num_allocated_nodes();
uint num_available_nodes = G1SegmentedArray<Elem, mtGCCardSet>::num_available_nodes();
const G1SegmentedArrayBuffer<mtGCCardSet>* first_array_buffer = G1SegmentedArray<Elem, mtGCCardSet>::first_array_buffer();
uint num_buffers = G1SegmentedArray<Elem, mtGCCardSet>::num_buffers();
os->print("MA " PTR_FORMAT ": %u elems pending (allocated %u available %u) used %.3f highest %u buffers %u size %zu ",
p2i(this), _num_pending_nodes, _num_allocated_nodes, _num_available_nodes, percent_of(_num_allocated_nodes - _num_pending_nodes, _num_available_nodes), _first != nullptr ? _first->num_elems() : 0, _num_buffers, mem_size());
p2i(this),
_num_pending_nodes,
num_allocated_nodes,
num_available_nodes,
percent_of(num_allocated_nodes - _num_pending_nodes, num_available_nodes),
first_array_buffer != nullptr ? first_array_buffer->num_elems() : 0,
Hamlin-Li marked this conversation as resolved.
Show resolved Hide resolved
num_buffers,
mem_size());
}

G1CardSetMemoryStats::G1CardSetMemoryStats() {
@@ -27,6 +27,8 @@

#include "gc/g1/g1CardSet.hpp"
#include "gc/g1/g1CardSetContainers.hpp"
#include "gc/g1/g1CardSetContainers.inline.hpp"
#include "gc/g1/g1SegmentedArray.hpp"
#include "memory/allocation.hpp"
#include "utilities/growableArray.hpp"
#include "utilities/lockFreeStack.hpp"
@@ -36,25 +38,14 @@ class outputStream;

// Collects G1CardSetAllocator options/heuristics. Called by G1CardSetAllocator
// to determine the next size of the allocated G1CardSetBuffer.
class G1CardSetAllocOptions {
uint _elem_size;
uint _initial_num_elems;
// Defines a limit to the number of elements in the buffer
uint _max_num_elems;

uint exponential_expand(uint prev_num_elems) {
return clamp(prev_num_elems * 2, _initial_num_elems, _max_num_elems);
}

Hamlin-Li marked this conversation as resolved.
Show resolved Hide resolved
class G1CardSetAllocOptions : public G1SegmentedArrayAllocOptions {

public:
static const uint BufferAlignment = 8;
static const uint MinimumBufferSize = 8;
static const uint MaximumBufferSize = UINT_MAX / 2;

G1CardSetAllocOptions(uint elem_size, uint initial_num_elems = MinimumBufferSize, uint max_num_elems = MaximumBufferSize) :
_elem_size(align_up(elem_size, BufferAlignment)),
_initial_num_elems(initial_num_elems),
_max_num_elems(max_num_elems) {
G1SegmentedArrayAllocOptions(align_up(elem_size, BufferAlignment), initial_num_elems, max_num_elems, BufferAlignment) {
}

uint next_num_elems(uint prev_num_elems) {
Hamlin-Li marked this conversation as resolved.
Show resolved Hide resolved
@@ -64,85 +55,9 @@ class G1CardSetAllocOptions {
uint elem_size () const {return _elem_size;}
Hamlin-Li marked this conversation as resolved.
Show resolved Hide resolved
};

// A single buffer/arena containing _num_elems blocks of memory of _elem_size.
// G1CardSetBuffers can be linked together using a singly linked list.
class G1CardSetBuffer : public CHeapObj<mtGCCardSet> {
uint _elem_size;
uint _num_elems;

G1CardSetBuffer* volatile _next;

char* _buffer; // Actual data.

// Index into the next free block to allocate into. Full if equal (or larger)
// to _num_elems (can be larger because we atomically increment this value and
// check only afterwards if the allocation has been successful).
uint volatile _next_allocate;

public:
G1CardSetBuffer(uint elem_size, uint num_elems, G1CardSetBuffer* next);
~G1CardSetBuffer();

G1CardSetBuffer* volatile* next_addr() { return &_next; }

void* get_new_buffer_elem();

uint num_elems() const { return _num_elems; }

G1CardSetBuffer* next() const { return _next; }

void set_next(G1CardSetBuffer* next) {
assert(next != this, " loop condition");
_next = next;
}

void reset(G1CardSetBuffer* next) {
_next_allocate = 0;
assert(next != this, " loop condition");
set_next(next);
memset((void*)_buffer, 0, (size_t)_num_elems * _elem_size);
}

uint elem_size() const { return _elem_size; }

size_t mem_size() const { return sizeof(*this) + (size_t)_num_elems * _elem_size; }

bool is_full() const { return _next_allocate >= _num_elems; }
};

// Set of (free) G1CardSetBuffers. The assumed usage is that allocation
// to it and removal of elements is strictly separate, but every action may be
// performed by multiple threads at the same time.
// Counts and memory usage are current on a best-effort basis if accessed concurrently.
class G1CardSetBufferList {
static G1CardSetBuffer* volatile* next_ptr(G1CardSetBuffer& node) {
return node.next_addr();
}
typedef LockFreeStack<G1CardSetBuffer, &next_ptr> NodeStack;

NodeStack _list;

volatile size_t _num_buffers;
volatile size_t _mem_size;

public:
G1CardSetBufferList() : _list(), _num_buffers(0), _mem_size(0) { }
~G1CardSetBufferList() { free_all(); }

void bulk_add(G1CardSetBuffer& first, G1CardSetBuffer& last, size_t num, size_t mem_size);
void add(G1CardSetBuffer& elem) { _list.prepend(elem); }

G1CardSetBuffer* get();
G1CardSetBuffer* get_all(size_t& num_buffers, size_t& mem_size);
typedef G1SegmentedArrayBuffer<mtGCCardSet> G1CardSetBuffer;

// Give back all memory to the OS.
void free_all();

void print_on(outputStream* out, const char* prefix = "");

size_t num_buffers() const { return Atomic::load(&_num_buffers); }
size_t mem_size() const { return Atomic::load(&_mem_size); }
};
typedef G1SegmentedArrayBufferList<mtGCCardSet> G1CardSetBufferList;

// Arena-like allocator for (card set) heap memory objects (Elem elements).
//
@@ -181,24 +96,11 @@ class G1CardSetBufferList {
// own set of allocators, there is intentionally no padding between them to save
// memory.
template <class Elem>
class G1CardSetAllocator {
class G1CardSetAllocator : public G1SegmentedArray<Elem, mtGCCardSet> {
Hamlin-Li marked this conversation as resolved.
Show resolved Hide resolved
// G1CardSetBuffer management.

// G1CardSetAllocOptions provides parameters for allocation buffer
// sizing and expansion.
G1CardSetAllocOptions _alloc_options;

G1CardSetBuffer* volatile _first; // The (start of the) list of all buffers.
G1CardSetBuffer* _last; // The last element of the list of all buffers.
volatile uint _num_buffers; // Number of assigned buffers to this allocator.
volatile size_t _mem_size; // Memory used by all buffers.

G1CardSetBufferList* _free_buffer_list; // The global free buffer list to
// preferentially get new buffers from.

// G1CardSetContainer node management within the G1CardSetBuffers allocated
// by this allocator.

static G1CardSetContainer* volatile* next_ptr(G1CardSetContainer& node);
typedef LockFreeStack<G1CardSetContainer, &G1CardSetAllocator::next_ptr> NodeStack;

@@ -209,20 +111,13 @@ class G1CardSetAllocator {
volatile uint _num_pending_nodes; // Number of nodes in the pending list.
volatile uint _num_free_nodes; // Number of nodes in the free list.

volatile uint _num_allocated_nodes; // Number of total nodes allocated and in use.
volatile uint _num_available_nodes; // Number of nodes available in all buffers (allocated + free + pending + not yet used).

// Try to transfer nodes from _pending_nodes_list to _free_nodes_list, with a
// synchronization delay for any in-progress pops from the _free_nodes_list
// to solve ABA here.
bool try_transfer_pending();

uint num_free_elems() const;

G1CardSetBuffer* create_new_buffer(G1CardSetBuffer* const prev);

uint elem_size() const { return _alloc_options.elem_size(); }

public:
G1CardSetAllocator(const char* name,
const G1CardSetAllocOptions& buffer_options,
@@ -238,15 +133,16 @@ class G1CardSetAllocator {
// be called in a globally synchronized area.
void drop_all();

uint num_buffers() const;

size_t mem_size() const {
return sizeof(*this) +
num_buffers() * sizeof(G1CardSetBuffer) + (size_t)_num_available_nodes * elem_size();
G1SegmentedArray<Elem, mtGCCardSet>::num_buffers() * sizeof(G1CardSetBuffer)
+ G1SegmentedArray<Elem, mtGCCardSet>::num_available_nodes() * G1SegmentedArray<Elem, mtGCCardSet>::elem_size();
}

size_t wasted_mem_size() const {
return ((size_t)_num_available_nodes - (_num_allocated_nodes - _num_pending_nodes)) * elem_size();
return (G1SegmentedArray<Elem, mtGCCardSet>::num_available_nodes()
Hamlin-Li marked this conversation as resolved.
Show resolved Hide resolved
- (G1SegmentedArray<Elem, mtGCCardSet>::num_allocated_nodes() - _num_pending_nodes))
* G1SegmentedArray<Elem, mtGCCardSet>::elem_size();
}

void print(outputStream* os);