Skip to content

Commit

Permalink
8295066: Folding of loads is broken in C2 after JDK-8242115
Browse files Browse the repository at this point in the history
Backport-of: 58a7141a0dea5d1b4bfe6d56a95d860c854b3461
  • Loading branch information
GoeLin committed Jan 2, 2023
1 parent abfa08f commit 0ac93eb
Show file tree
Hide file tree
Showing 9 changed files with 214 additions and 82 deletions.
184 changes: 111 additions & 73 deletions src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
Expand Up @@ -676,85 +676,123 @@ bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const {
return strcmp(call->_name, "write_ref_field_pre_entry") == 0 || strcmp(call->_name, "write_ref_field_post_entry") == 0;
}

void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
// It could be only one user, URShift node, in Object.clone() intrinsic
// but the new allocation is passed to arraycopy stub and it could not
// be scalar replaced. So we don't check the case.

// An other case of only one user (Xor) is when the value check for NULL
// in G1 post barrier is folded after CCP so the code which used URShift
// is removed.

// Take Region node before eliminating post barrier since it also
// eliminates CastP2X node when it has only one user.
Node* this_region = node->in(0);
assert(this_region != NULL, "");

// Remove G1 post barrier.

// Search for CastP2X->Xor->URShift->Cmp path which
// checks if the store done to a different from the value's region.
// And replace Cmp with #0 (false) to collapse G1 post barrier.
Node* xorx = node->find_out_with(Op_XorX);
if (xorx != NULL) {
Node* shift = xorx->unique_out();
Node* cmpx = shift->unique_out();
assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
"missing region check in G1 post barrier");
macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));

// Remove G1 pre barrier.

// Search "if (marking != 0)" check and set it to "false".
// There is no G1 pre barrier if previous stored value is NULL
// (for example, after initialization).
if (this_region->is_Region() && this_region->req() == 3) {
int ind = 1;
if (!this_region->in(ind)->is_IfFalse()) {
ind = 2;
}
if (this_region->in(ind)->is_IfFalse() &&
this_region->in(ind)->in(0)->Opcode() == Op_If) {
Node* bol = this_region->in(ind)->in(0)->in(1);
assert(bol->is_Bool(), "");
cmpx = bol->in(1);
if (bol->as_Bool()->_test._test == BoolTest::ne &&
cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) &&
cmpx->in(1)->is_Load()) {
Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() &&
adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) {
macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
bool G1BarrierSetC2::is_g1_pre_val_load(Node* n) {
if (n->is_Load() && n->as_Load()->has_pinned_control_dependency()) {
// Make sure the only users of it are: CmpP, StoreP, and a call to write_ref_field_pre_entry

// Skip possible decode
if (n->outcnt() == 1 && n->unique_out()->is_DecodeN()) {
n = n->unique_out();
}

if (n->outcnt() == 3) {
int found = 0;
for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) {
Node* use = iter.get();
if (use->is_Cmp() || use->is_Store()) {
++found;
} else if (use->is_CallLeaf()) {
CallLeafNode* call = use->as_CallLeaf();
if (strcmp(call->_name, "write_ref_field_pre_entry") == 0) {
++found;
}
}
}
if (found == 3) {
return true;
}
}
}
return false;
}

bool G1BarrierSetC2::is_gc_pre_barrier_node(Node *node) const {
return is_g1_pre_val_load(node);
}

void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
if (is_g1_pre_val_load(node)) {
macro->replace_node(node, macro->zerocon(node->as_Load()->bottom_type()->basic_type()));
} else {
assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
// This is a G1 post barrier emitted by the Object.clone() intrinsic.
// Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card
// is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier.
Node* shift = node->find_out_with(Op_URShiftX);
assert(shift != NULL, "missing G1 post barrier");
Node* addp = shift->unique_out();
Node* load = addp->find_out_with(Op_LoadB);
assert(load != NULL, "missing G1 post barrier");
Node* cmpx = load->unique_out();
assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
"missing card value check in G1 post barrier");
macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
// There is no G1 pre barrier in this case
assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
// It could be only one user, URShift node, in Object.clone() intrinsic
// but the new allocation is passed to arraycopy stub and it could not
// be scalar replaced. So we don't check the case.

// An other case of only one user (Xor) is when the value check for NULL
// in G1 post barrier is folded after CCP so the code which used URShift
// is removed.

// Take Region node before eliminating post barrier since it also
// eliminates CastP2X node when it has only one user.
Node* this_region = node->in(0);
assert(this_region != NULL, "");

// Remove G1 post barrier.

// Search for CastP2X->Xor->URShift->Cmp path which
// checks if the store done to a different from the value's region.
// And replace Cmp with #0 (false) to collapse G1 post barrier.
Node* xorx = node->find_out_with(Op_XorX);
if (xorx != NULL) {
Node* shift = xorx->unique_out();
Node* cmpx = shift->unique_out();
assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
"missing region check in G1 post barrier");
macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));

// Remove G1 pre barrier.

// Search "if (marking != 0)" check and set it to "false".
// There is no G1 pre barrier if previous stored value is NULL
// (for example, after initialization).
if (this_region->is_Region() && this_region->req() == 3) {
int ind = 1;
if (!this_region->in(ind)->is_IfFalse()) {
ind = 2;
}
if (this_region->in(ind)->is_IfFalse() &&
this_region->in(ind)->in(0)->Opcode() == Op_If) {
Node* bol = this_region->in(ind)->in(0)->in(1);
assert(bol->is_Bool(), "");
cmpx = bol->in(1);
if (bol->as_Bool()->_test._test == BoolTest::ne &&
cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) &&
cmpx->in(1)->is_Load()) {
Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() &&
adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) {
macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
}
}
}
}
} else {
assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
// This is a G1 post barrier emitted by the Object.clone() intrinsic.
// Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card
// is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier.
Node* shift = node->find_out_with(Op_URShiftX);
assert(shift != NULL, "missing G1 post barrier");
Node* addp = shift->unique_out();
Node* load = addp->find_out_with(Op_LoadB);
assert(load != NULL, "missing G1 post barrier");
Node* cmpx = load->unique_out();
assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
"missing card value check in G1 post barrier");
macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
// There is no G1 pre barrier in this case
}
// Now CastP2X can be removed since it is used only on dead path
// which currently still alive until igvn optimize it.
assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, "");
macro->replace_node(node, macro->top());
}
// Now CastP2X can be removed since it is used only on dead path
// which currently still alive until igvn optimize it.
assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, "");
macro->replace_node(node, macro->top());
}

Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const {
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
Expand Up @@ -90,7 +90,9 @@ class G1BarrierSetC2: public CardTableBarrierSetC2 {
void verify_no_safepoints(Compile* compile, Node* marking_load, const Unique_Node_List& loads) const;
#endif

static bool is_g1_pre_val_load(Node* n);
public:
virtual bool is_gc_pre_barrier_node(Node* node) const;
virtual bool is_gc_barrier_node(Node* node) const;
virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
virtual Node* step_over_gc_barrier(Node* c) const;
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
Expand Up @@ -259,6 +259,7 @@ class BarrierSetC2: public CHeapObj<mtGC> {

// Support for GC barriers emitted during parsing
virtual bool has_load_barrier_nodes() const { return false; }
virtual bool is_gc_pre_barrier_node(Node* node) const { return false; }
virtual bool is_gc_barrier_node(Node* node) const { return false; }
virtual Node* step_over_gc_barrier(Node* c) const { return c; }

Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
Expand Up @@ -716,6 +716,11 @@ Node* ShenandoahBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& acces
return result;
}


bool ShenandoahBarrierSetC2::is_gc_pre_barrier_node(Node* node) const {
return is_shenandoah_wb_pre_call(node);
}

// Support for GC barriers emitted during parsing
bool ShenandoahBarrierSetC2::is_gc_barrier_node(Node* node) const {
if (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) return true;
Expand Down
Expand Up @@ -112,6 +112,7 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 {
virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const;

// Support for GC barriers emitted during parsing
virtual bool is_gc_pre_barrier_node(Node* node) const;
virtual bool is_gc_barrier_node(Node* node) const;
virtual Node* step_over_gc_barrier(Node* c) const;
virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
Expand Down
7 changes: 2 additions & 5 deletions src/hotspot/share/opto/macro.cpp
Expand Up @@ -57,9 +57,6 @@
#if INCLUDE_G1GC
#include "gc/g1/g1ThreadLocalData.hpp"
#endif // INCLUDE_G1GC
#if INCLUDE_SHENANDOAHGC
#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
#endif


//
Expand Down Expand Up @@ -576,6 +573,7 @@ bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArr
}

if (can_eliminate && res != NULL) {
BarrierSetC2 *bs = BarrierSet::barrier_set()->barrier_set_c2();
for (DUIterator_Fast jmax, j = res->fast_outs(jmax);
j < jmax && can_eliminate; j++) {
Node* use = res->fast_out(j);
Expand All @@ -592,8 +590,7 @@ bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArr
for (DUIterator_Fast kmax, k = use->fast_outs(kmax);
k < kmax && can_eliminate; k++) {
Node* n = use->fast_out(k);
if (!n->is_Store() && n->Opcode() != Op_CastP2X
SHENANDOAHGC_ONLY(&& (!UseShenandoahGC || !ShenandoahBarrierSetC2::is_shenandoah_wb_pre_call(n))) ) {
if (!n->is_Store() && n->Opcode() != Op_CastP2X && !bs->is_gc_pre_barrier_node(n)) {
DEBUG_ONLY(disq_node = n;)
if (n->is_Load() || n->is_LoadStore()) {
NOT_PRODUCT(fail_eliminate = "Field load";)
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/opto/macro.hpp
Expand Up @@ -213,6 +213,7 @@ class PhaseMacroExpand : public Phase {
Node* intcon(jint con) const { return _igvn.intcon(con); }
Node* longcon(jlong con) const { return _igvn.longcon(con); }
Node* makecon(const Type *t) const { return _igvn.makecon(t); }
Node* zerocon(BasicType bt) const { return _igvn.zerocon(bt); }
Node* top() const { return C->top(); }

Node* prefetch_allocation(Node* i_o,
Expand Down
12 changes: 8 additions & 4 deletions src/hotspot/share/opto/memnode.cpp
Expand Up @@ -1196,9 +1196,6 @@ bool LoadNode::is_instance_field_load_with_local_phi(Node* ctrl) {
//------------------------------Identity---------------------------------------
// Loads are identity if previous store is to same address
Node* LoadNode::Identity(PhaseGVN* phase) {
if (has_pinned_control_dependency()) {
return this;
}
// If the previous store-maker is the right kind of Store, and the store is
// to the same address, then we are equal to the value stored.
Node* mem = in(Memory);
Expand All @@ -1216,9 +1213,16 @@ Node* LoadNode::Identity(PhaseGVN* phase) {
}
// (This works even when value is a Con, but LoadNode::Value
// usually runs first, producing the singleton type of the Con.)
return value;
if (!has_pinned_control_dependency() || value->is_Con()) {
return value;
} else {
return this;
}
}

if (has_pinned_control_dependency()) {
return this;
}
// Search for an existing data phi which was generated before for the same
// instance's field to avoid infinite generation of phis in a loop.
Node *region = mem->in(0);
Expand Down
83 changes: 83 additions & 0 deletions test/hotspot/jtreg/compiler/c2/irTests/TestScalarReplacement.java
@@ -0,0 +1,83 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package compiler.c2.irTests;

import compiler.lib.ir_framework.*;

/*
* @test
* @bug 8295066
* @summary Test various scalarization scenarios
* @library /test/lib /
* @run driver compiler.c2.irTests.TestScalarReplacement
*/
public class TestScalarReplacement {

public static void main(String[] args) {
TestFramework.run();
}
static class X1 {
int x = 42;
}

@Test
@IR(failOn = { IRNode.ALLOC })
public static int test1() {
X1[] array = new X1[1];
array[0] = new X1();
return array[0].x;
}

static class X2 {
int x = 42;
int y = 43;
public int hash() { return x + y; }
}
static final class ObjectWrapper {
public Object obj;

public ObjectWrapper(Object obj) {
this.obj = obj;
}
}

@Test
@IR(failOn = { IRNode.ALLOC })
public static int test2(X2 obj) {
ObjectWrapper val = new ObjectWrapper(obj);
for (int i = 0; i < 10; ++i) {
for (int j = 0; j < 10; ++j) {
val.obj = val.obj;
}
}
return ((X2)val.obj).hash();
}

@Run(test = "test2")
public static void test2_runner() {
X2 obj = new X2();
test2(obj);
}
}

1 comment on commit 0ac93eb

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.