Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8254739: G1: Optimize evacuation failure for regions with few failed objects #5181

Closed
Changes from 18 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
954b5ee
speed up iterate evac failure objs in one region.
Hamlin-Li Aug 18, 2021
4d88f42
fix: reset state after iteration
Hamlin-Li Aug 18, 2021
74bc4d9
Use a segmented array rather than a linked list to record evacuation …
Hamlin-Li Aug 25, 2021
05f026a
Fix compilation issues on some platform.
Hamlin-Li Aug 25, 2021
ded8275
Fix test failures; Fix compilation failures on some platforms
Hamlin-Li Aug 26, 2021
51d19eb
Fix test failures; Fix compilation failures on some platforms
Hamlin-Li Aug 26, 2021
f454925
Fix compilation error on windows
Hamlin-Li Aug 26, 2021
34aed3f
Fix compilation error on windows
Hamlin-Li Aug 26, 2021
43a8b59
Merge branch 'master' into speedup-iterate-evac-failure-objs-in-one-r…
Hamlin-Li Oct 25, 2021
6132372
Fix wrong merge
Hamlin-Li Oct 25, 2021
44e3562
Use refactored G1SegmentedArray rather than home-made Array+Node
Hamlin-Li Oct 25, 2021
e779c3a
Add asserts, comments
Hamlin-Li Oct 26, 2021
924fec5
Rename from g1EvacuationFailureObjsInHR to g1EvacFailureObjsInHR
Hamlin-Li Oct 26, 2021
0070826
Refactor as Thomas suggested
Hamlin-Li Oct 29, 2021
c4ca77c
Fix compilation error
Hamlin-Li Oct 29, 2021
ab04f1c
Fix compilation error
Hamlin-Li Oct 29, 2021
3712037
Merge branch 'openjdk:master' into speedup-iterate-evac-failure-objs-…
Hamlin-Li Oct 29, 2021
82c172a
Refine code based on Thomas' suggestion
Hamlin-Li Oct 29, 2021
d33f87b
Merge branch 'openjdk:master' into speedup-iterate-evac-failure-objs-…
Hamlin-Li Nov 3, 2021
e588cad
Move allocation/deallocation in one place
Hamlin-Li Nov 3, 2021
3efa90b
Fix typo
Hamlin-Li Nov 5, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -25,6 +25,7 @@
#include "precompiled.hpp"

#include "gc/g1/g1CardSetMemory.inline.hpp"
#include "gc/g1/g1SegmentedArray.inline.hpp"
#include "logging/log.hpp"
#include "runtime/atomic.hpp"
#include "utilities/formatBuffer.hpp"
@@ -46,6 +47,11 @@ G1CardSetAllocator<Elem>::G1CardSetAllocator(const char* name,
assert(elem_size >= sizeof(G1CardSetContainer), "Element instance size %u for allocator %s too small", elem_size, name);
}

template <class Elem>
G1CardSetAllocator<Elem>::~G1CardSetAllocator() {
drop_all();
}

template <class Elem>
bool G1CardSetAllocator<Elem>::try_transfer_pending() {
// Attempt to claim the lock.
@@ -113,9 +113,7 @@ class G1CardSetAllocator {
G1CardSetAllocator(const char* name,
const G1CardSetAllocOptions* buffer_options,
G1CardSetBufferList* free_buffer_list);
~G1CardSetAllocator() {
drop_all();
}
~G1CardSetAllocator();

Elem* allocate();
void free(Elem* elem);
@@ -68,40 +68,40 @@ class RemoveSelfForwardPtrObjClosure: public ObjectClosure {
// dead too) already.
void do_object(oop obj) {
HeapWord* obj_addr = cast_from_oop<HeapWord*>(obj);
assert(_last_forwarded_object_end <= obj_addr, "should iterate in ascending address order");
assert(_hr->is_in(obj_addr), "sanity");

if (obj->is_forwarded() && obj->forwardee() == obj) {
// The object failed to move.
// The object failed to move.
assert(obj->is_forwarded() && obj->forwardee() == obj, "sanity");

zap_dead_objects(_last_forwarded_object_end, obj_addr);
// We consider all objects that we find self-forwarded to be
// live. What we'll do is that we'll update the prev marking
// info so that they are all under PTAMS and explicitly marked.
if (!_cm->is_marked_in_prev_bitmap(obj)) {
_cm->mark_in_prev_bitmap(obj);
}
if (_during_concurrent_start) {
// For the next marking info we'll only mark the
// self-forwarded objects explicitly if we are during
// concurrent start (since, normally, we only mark objects pointed
// to by roots if we succeed in copying them). By marking all
// self-forwarded objects we ensure that we mark any that are
// still pointed to be roots. During concurrent marking, and
// after concurrent start, we don't need to mark any objects
// explicitly and all objects in the CSet are considered
// (implicitly) live. So, we won't mark them explicitly and
// we'll leave them over NTAMS.
_cm->mark_in_next_bitmap(_worker_id, _hr, obj);
}
size_t obj_size = obj->size();
zap_dead_objects(_last_forwarded_object_end, obj_addr);
// We consider all objects that we find self-forwarded to be
// live. What we'll do is that we'll update the prev marking
// info so that they are all under PTAMS and explicitly marked.
if (!_cm->is_marked_in_prev_bitmap(obj)) {
_cm->mark_in_prev_bitmap(obj);
}
if (_during_concurrent_start) {
// For the next marking info we'll only mark the
// self-forwarded objects explicitly if we are during
// concurrent start (since, normally, we only mark objects pointed
// to by roots if we succeed in copying them). By marking all
// self-forwarded objects we ensure that we mark any that are
// still pointed to be roots. During concurrent marking, and
// after concurrent start, we don't need to mark any objects
// explicitly and all objects in the CSet are considered
// (implicitly) live. So, we won't mark them explicitly and
// we'll leave them over NTAMS.
_cm->mark_in_next_bitmap(_worker_id, _hr, obj);
}
size_t obj_size = obj->size();

_marked_bytes += (obj_size * HeapWordSize);
PreservedMarks::init_forwarded_mark(obj);
_marked_bytes += (obj_size * HeapWordSize);
PreservedMarks::init_forwarded_mark(obj);

HeapWord* obj_end = obj_addr + obj_size;
_last_forwarded_object_end = obj_end;
_hr->alloc_block_in_bot(obj_addr, obj_end);
}
HeapWord* obj_end = obj_addr + obj_size;
_last_forwarded_object_end = obj_end;
_hr->alloc_block_in_bot(obj_addr, obj_end);
}

// Fill the memory area from start to end with filler objects, and update the BOT
@@ -164,7 +164,8 @@ class RemoveSelfForwardPtrHRClosure: public HeapRegionClosure {
RemoveSelfForwardPtrObjClosure rspc(hr,
during_concurrent_start,
_worker_id);
hr->object_iterate(&rspc);
// Iterates evac failure objs which are recorded during evacuation.
hr->iterate_evac_failure_objs(&rspc);
// Need to zap the remainder area of the processed region.
rspc.zap_remainder();

@@ -0,0 +1,131 @@
/*
* Copyright (c) 2021, Huawei and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "gc/g1/g1EvacFailureObjectsSet.hpp"
#include "gc/g1/g1CollectedHeap.hpp"
#include "gc/g1/g1SegmentedArray.inline.hpp"
#include "gc/g1/heapRegion.hpp"
#include "gc/g1/heapRegion.inline.hpp"
#include "utilities/quickSort.hpp"


const G1SegmentedArrayAllocOptions G1EvacFailureObjectsSet::_alloc_options =
G1SegmentedArrayAllocOptions((uint)sizeof(OffsetInRegion), BufferLength, UINT_MAX, Alignment);

G1SegmentedArrayBufferList<mtGC> G1EvacFailureObjectsSet::_free_buffer_list;

#ifdef ASSERT
void G1EvacFailureObjectsSet::assert_is_valid_offset(size_t offset) const {
const uint max_offset = 1u << (HeapRegion::LogOfHRGrainBytes - LogHeapWordSize);
assert(offset < max_offset, "must be, but is " SIZE_FORMAT, offset);
}
#endif

oop G1EvacFailureObjectsSet::from_offset(OffsetInRegion offset) const {
assert_is_valid_offset(offset);
return cast_to_oop(_bottom + offset);
}

G1EvacFailureObjectsSet::OffsetInRegion G1EvacFailureObjectsSet::to_offset(oop obj) const {
const HeapWord* o = cast_from_oop<const HeapWord*>(obj);
size_t offset = pointer_delta(o, _bottom);
assert(obj == from_offset(static_cast<OffsetInRegion>(offset)), "must be");
return static_cast<OffsetInRegion>(offset);
}

G1EvacFailureObjectsSet::G1EvacFailureObjectsSet(uint region_idx, HeapWord* bottom) :
DEBUG_ONLY(_region_idx(region_idx) COMMA)
_bottom(bottom),
_offsets(&_alloc_options, &_free_buffer_list) {
assert(HeapRegion::LogOfHRGrainBytes < 32, "must be");
}

void G1EvacFailureObjectsSet::record(oop obj) {
assert(obj != NULL, "must be");
assert(_region_idx == G1CollectedHeap::heap()->heap_region_containing(obj)->hrm_index(), "must be");
OffsetInRegion* e = _offsets.allocate();
*e = to_offset(obj);
}

// Helper class to join, sort and iterate over the previously collected segmented
// array of objects that failed evacuation.
class G1EvacFailureObjectsIterationHelper {
typedef G1EvacFailureObjectsSet::OffsetInRegion OffsetInRegion;

G1EvacFailureObjectsSet* _objects_set;
const G1SegmentedArray<OffsetInRegion, mtGC>* _segments;
OffsetInRegion* _offset_array;
uint _array_length;

static int order_oop(OffsetInRegion a, OffsetInRegion b) {
return static_cast<int>(a-b);
}

void join_and_sort() {
uint num = _segments->num_allocated_nodes();
_offset_array = NEW_C_HEAP_ARRAY(OffsetInRegion, num, mtGC);

_segments->iterate_nodes(*this);
assert(_array_length == num, "must be %u, %u", _array_length, num);

QuickSort::sort(_offset_array, _array_length, order_oop, true);
}

void iterate_internal(ObjectClosure* closure) {
for (uint i = 0; i < _array_length; i++) {
oop cur = _objects_set->from_offset(_offset_array[i]);
closure->do_object(cur);
}

FREE_C_HEAP_ARRAY(OffsetInRegion, _offset_array);
}

public:
G1EvacFailureObjectsIterationHelper(G1EvacFailureObjectsSet* collector) :
_objects_set(collector),
_segments(&_objects_set->_offsets),
_offset_array(nullptr),
_array_length(0) { }

void iterate(ObjectClosure* closure) {
join_and_sort();
iterate_internal(closure);
Copy link
Contributor

@tschatzl tschatzl Nov 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would probably move the array allocation and freeing here instead of having this at the start and end of join_and_sort and iterate_internal respectively. Otherwise there is a hidden dependency on the first method allocating and the second freeing it, and looks cleaner as allocation and deallocation is obvious and on the same call level.

I.e.

  void iterate(ObjectClosure* closure) {
     uint num = _segments->num_allocated_nodes();
    _offset_array = NEW_C_HEAP_ARRAY(OffsetInRegion, num, mtGC);

    join_and_sort();
    iterate_internal(closure);

    FREE_C_HEAP_ARRAY(OffsetInRegion, _offset_array);
  }

Copy link
Author

@Hamlin-Li Hamlin-Li Nov 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Thomas, good catch.

}

// Callback of G1SegmentedArray::iterate_nodes
void do_buffer(G1SegmentedArrayBuffer<mtGC>* node, uint length) {
node->copy_to(&_offset_array[_array_length]);
_array_length += length;
}
};

void G1EvacFailureObjectsSet::iterate(ObjectClosure* closure) {
assert_at_safepoint();

G1EvacFailureObjectsIterationHelper helper(this);
helper.iterate(closure);

_offsets.drop_all();
}
Copy link
Member

@albertnetymk albertnetymk Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having some destructive operations (drop_all) inside a method named iterate could come as a surprise, IMO. If I understand this correctly, the following would be problematic.

evac_failed_objects.iterate(closure1);
...
evac_failed_objects.iterate(closure2);

Copy link
Author

@Hamlin-Li Hamlin-Li Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drop_all just returns buffers to free list, it will not destruct the buffers. So, iterate multiple times is OK, because next time it will get memory from free list or allocate a new buffer. Hope this answer your question.

Copy link
Member

@albertnetymk albertnetymk Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since drop_all() resets all counters (e.g. _num_allocated_nodes), the subsequent iteration will think the array is empty, won't it?

Copy link
Author

@Hamlin-Li Hamlin-Li Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example, we modify the code as below:

void HeapRegion::iterate_evac_failure_objs(ObjectClosure* closure) {
_evac_failure_objs.iterate(closure);
_evac_failure_objs.iterate(closure);
}

For the second time iteration, all thing will be empty, so iterate_nodes will be an empty operation, QuickSort::sort too, and iterate_internal too. These empty operations will not do harm things.

Copy link
Contributor

@tschatzl tschatzl Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Hamlin-Li : I think @albertnetymk concern is that typically an iterate method does not modify the list itself. That is surprising for readers. The documentation also does not indicate any of that. I do not think he believes this will cause a VM failure.

Maybe change HeapRegion::iterate_evac_failure_objs to call a (new) drop() method on _evac_failure_objs?

I think such a change would solve Albert's concerns.

An alternative could be renaming iterate to something else.

Copy link
Member

@albertnetymk albertnetymk Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank Thomas for unpacking my concern more precisely. I used "problematic" to mean the second iteration will not do what developers expect it to do, not necessarily a VM crash.

Copy link
Author

@Hamlin-Li Hamlin-Li Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your clarification, Albert, Thomas, I see your point, it make sense to me.

As this patch has been blocking some other issues for a while, and I think it's better to think of some good solution for Albert's concern (seems add a drop is a little bit redundant for me :).)

If you don't mind, can I do this refinement later in another issue? Thanks

Copy link
Member

@albertnetymk albertnetymk Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a rather minor issue as I stated originally; I am fine either way.

Copy link
Author

@Hamlin-Li Hamlin-Li Nov 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I created JDK-8276721 to track it.

@@ -0,0 +1,81 @@
/*
* Copyright (c) 2021, Huawei and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef SHARE_GC_G1_G1EVACUATIONFAILUREOBJSINHR_HPP
#define SHARE_GC_G1_G1EVACUATIONFAILUREOBJSINHR_HPP

#include "gc/g1/g1SegmentedArray.hpp"
#include "memory/iterator.hpp"
#include "oops/oop.hpp"

class G1EvacFailureObjectsIterationHelper;

// This class collects addresses of objects that failed evacuation in a specific
// heap region.
// Provides sorted iteration of these elements for processing during the remove
// self forwards phase.
class G1EvacFailureObjectsSet {
friend class G1EvacFailureObjectsIterationHelper;

public:
// Storage type of an object that failed evacuation within a region. Given
// heap region size and possible object locations within a region, it is
// sufficient to use an uint here to save some space instead of full pointers.
typedef uint OffsetInRegion;

private:
static const uint BufferLength = 256;
static const uint Alignment = 4;

static const G1SegmentedArrayAllocOptions _alloc_options;

// This free list is shared among evacuation failure process in all regions.
static G1SegmentedArrayBufferList<mtGC> _free_buffer_list;

DEBUG_ONLY(const uint _region_idx;)

// Region bottom
const HeapWord* _bottom;

// Offsets within region containing objects that failed evacuation.
G1SegmentedArray<OffsetInRegion, mtGC> _offsets;

void assert_is_valid_offset(size_t offset) const NOT_DEBUG_RETURN;
// Converts between an offset within a region and an oop address.
oop from_offset(OffsetInRegion offset) const;
OffsetInRegion to_offset(oop obj) const;

public:
G1EvacFailureObjectsSet(uint region_idx, HeapWord* bottom);

// Record an object that failed evacuation.
void record(oop obj);

// Apply the given ObjectClosure to all objects that failed evacuation. Objects
// are passed in increasing address order.
void iterate(ObjectClosure* closure);
};


#endif //SHARE_GC_G1_G1EVACUATIONFAILUREOBJSINHR_HPP
@@ -607,6 +607,9 @@ oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m, siz
if (forward_ptr == NULL) {
// Forward-to-self succeeded. We are the "owner" of the object.
HeapRegion* r = _g1h->heap_region_containing(old);
// Records evac failure objs, this will help speed up iteration
// of these objs later in *remove self forward* phase of post evacuation.
r->record_evac_failure_obj(old);

if (_evac_failure_regions->record(r->hrm_index())) {
_g1h->hr_printer()->evac_failure(r);
@@ -73,6 +73,17 @@ class G1SegmentedArrayBuffer : public CHeapObj<flag> {

size_t mem_size() const { return sizeof(*this) + (size_t)_num_elems * _elem_size; }

uint length() const {
// _next_allocate might grow larger than _num_elems in multi-thread environments
// due to races.
return MIN2(_next_allocate, _num_elems);
}

// Copies the (valid) contents of this buffer into the destination.
void copy_to(void* dest) const {
::memcpy(dest, _buffer, length() * _elem_size);
}

bool is_full() const { return _next_allocate >= _num_elems; }
};

@@ -190,19 +201,23 @@ class G1SegmentedArray {
private:
inline G1SegmentedArrayBuffer<flag>* create_new_buffer(G1SegmentedArrayBuffer<flag>* const prev);

DEBUG_ONLY(uint calculate_length() const;)

public:
const G1SegmentedArrayBuffer<flag>* first_array_buffer() const { return Atomic::load(&_first); }

uint num_available_nodes() const { return Atomic::load(&_num_available_nodes); }
uint num_allocated_nodes() const { return Atomic::load(&_num_allocated_nodes); }
uint num_allocated_nodes() const {
uint allocated = Atomic::load(&_num_allocated_nodes);
assert(calculate_length() == allocated, "Must be");
return allocated;
}

inline uint elem_size() const;

G1SegmentedArray(const G1SegmentedArrayAllocOptions* buffer_options,
G1SegmentedArrayBufferList<flag>* free_buffer_list);
~G1SegmentedArray() {
drop_all();
}
~G1SegmentedArray();

// Deallocate all buffers to the free buffer list and reset this allocator. Must
// be called in a globally synchronized area.
@@ -211,6 +226,9 @@ class G1SegmentedArray {
inline Elem* allocate();

inline uint num_buffers() const;

template<typename BufferClosure>
void iterate_nodes(BufferClosure& closure) const;
};

#endif //SHARE_GC_G1_G1SEGMENTEDARRAY_HPP