Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ee8eea1
collapse lshift with constants
marc-chevalier Feb 18, 2025
1b1a84d
more tests
marc-chevalier Feb 18, 2025
cd8a90f
register for igvn
marc-chevalier Feb 18, 2025
40e4f9a
clang-format
marc-chevalier Feb 18, 2025
4b40c66
fix type bug
marc-chevalier Feb 19, 2025
7102dce
format
marc-chevalier Feb 19, 2025
07a499c
actually return a new node
marc-chevalier Feb 19, 2025
bb867b6
improve simplification of double shifts in stores
marc-chevalier Feb 21, 2025
932a32d
Add test suggested by @dean-long exhibiting the difference between (x…
marc-chevalier Feb 25, 2025
7a1bb1b
rename
marc-chevalier Feb 25, 2025
7e33134
Remove useless local, with especially helpful name
marc-chevalier Feb 25, 2025
84d18df
Fix style in the few lines I haven't touched yet
marc-chevalier Feb 25, 2025
c792090
comment
marc-chevalier Feb 25, 2025
b8c3d74
+ comment on why not zerocon
marc-chevalier Feb 27, 2025
e71cb56
Merge branch 'master' into fix/missing-transformation-for-chain-of-sh…
marc-chevalier Mar 7, 2025
8e93a12
use a random number in testing
marc-chevalier Mar 7, 2025
14ee25d
Rework on the comment
marc-chevalier Mar 7, 2025
f882409
reworked another comment
marc-chevalier Mar 7, 2025
b07d0a2
Random testing, trying...
marc-chevalier Mar 7, 2025
042c4dd
Copied bits_per_java_integer, hoping it will merge nicely
marc-chevalier Mar 11, 2025
230af28
nit
marc-chevalier Mar 11, 2025
abe137a
char -> byte
marc-chevalier Mar 11, 2025
7421ea5
rephrase
marc-chevalier Mar 11, 2025
ad563ba
rephrased corner case
marc-chevalier Mar 11, 2025
be2f7b7
s
marc-chevalier Mar 11, 2025
f244e17
correct
marc-chevalier Mar 11, 2025
a96a219
rephrase
marc-chevalier Mar 11, 2025
5afbe37
order
marc-chevalier Mar 11, 2025
e3ecf35
more checks
marc-chevalier Mar 11, 2025
124d938
Rephrase comment
marc-chevalier Mar 20, 2025
cd0b0c0
Merge branch 'master' into fix/missing-transformation-for-chain-of-sh…
marc-chevalier Mar 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 202 additions & 14 deletions src/hotspot/share/opto/memnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3606,20 +3606,208 @@ Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) {

//------------------------------Ideal_sign_extended_input----------------------
// Check for useless sign-extension before a partial-word store
// (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) )
// If (conIL == conIR && conIR <= num_bits) this simplifies to
// (StoreB ... (valIn) )
Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) {
Node *val = in(MemNode::ValueIn);
if( val->Opcode() == Op_RShiftI ) {
const TypeInt *t = phase->type( val->in(2) )->isa_int();
if( t && t->is_con() && (t->get_con() <= num_bits) ) {
Node *shl = val->in(1);
if( shl->Opcode() == Op_LShiftI ) {
const TypeInt *t2 = phase->type( shl->in(2) )->isa_int();
if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) {
set_req_X(MemNode::ValueIn, shl->in(1), phase);
return this;
// (StoreB ... (RShiftI _ (LShiftI _ v conIL) conIR))
// If (conIL == conIR && conIR <= num_rejected_bits) this simplifies to
// (StoreB ... (v))
// If (conIL > conIR) under some conditions, it can be simplified into
// (StoreB ... (LShiftI _ v (conIL - conIR)))
// This case happens when the value of the store was itself a left shift, that
// gets merged into the inner left shift of the sign-extension. For instance,
// if we have
// array_of_shorts[0] = (short)(v << 2)
// We get a structure such as:
// (StoreB ... (RShiftI _ (LShiftI _ (LShiftI _ v 2) 16) 16))
// that is simplified into
// (StoreB ... (RShiftI _ (LShiftI _ v 18) 16)).
// It is thus useful to handle cases where conIL > conIR. But this simplification
// does not always hold. Let's see in which cases it's valid.
//
// Let's assume we have the following 32 bits integer v:
// +----------------------------------+
// | v[0..31] |
// +----------------------------------+
// 31 0
// that will be stuffed in 8 bits byte after a shift left and a shift right of
// potentially different magnitudes.
// We denote num_rejected_bits the number of bits of the discarded part. In this
// case, num_rejected_bits == 24.
//
// Statement (proved further below in case analysis):
// Given:
// - 0 <= conIL < BitsPerJavaInteger (no wrap in shift, enforced by maskShiftAmount)
// - 0 <= conIR < BitsPerJavaInteger (no wrap in shift, enforced by maskShiftAmount)
// - conIL >= conIR
// - num_rejected_bits >= conIR
// Then this form:
// (RShiftI _ (LShiftI _ v conIL) conIR)
// can be replaced with this form:
// (LShiftI _ v (conIL-conIR))
//
// Note: We only have to show that the non-rejected lowest bits (8 bits for byte)
// have to be correct, as the higher bits are rejected / truncated by the store.
//
// The hypotheses
// 0 <= conIL < BitsPerJavaInteger
// 0 <= conIR < BitsPerJavaInteger
// are ensured by maskShiftAmount (called from ::Ideal of shift nodes). Indeed,
// (v << 31) << 2 must be simplified into 0, not into v << 33 (which is equivalent
// to v << 1).
//
//
// If you don't like case analysis, jump after the conclusion.
// ### Case 1 : conIL == conIR
// ###### Case 1.1: conIL == conIR == num_rejected_bits
// If we do the shift left then right by 24 bits, we get:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be nice if you explicitly denoted the 3 cases, maybe even using some indentation for emphasis:

Case 1: conIL == conIR
Case 2: conIL > conIR
Case 3: conIL < conIR

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could also be nice to say what you are trying to show / prove here.

// after: v << 24
// +---------+------------------------+
// | v[0..7] | 0 |
// +---------+------------------------+
// 31 24 23 0
// after: (v << 24) >> 24
// +------------------------+---------+
// | sign bit | v[0..7] |
// +------------------------+---------+
// 31 8 7 0
// The non-rejected bits (bits kept by the store, that is the 8 lower bits of the
// result) are the same before and after, so, indeed, simplifying is correct.

// ###### Case 1.2: conIL == conIR < num_rejected_bits
// If we do the shift left then right by 22 bits, we get:
// after: v << 22
// +---------+------------------------+
// | v[0..9] | 0 |
// +---------+------------------------+
// 31 22 21 0
// after: (v << 22) >> 22
// +------------------------+---------+
// | sign bit | v[0..9] |
// +------------------------+---------+
// 31 10 9 0
// The non-rejected bits are the 8 lower bits of v. The bits 8 and 9 of v are still
// present in (v << 22) >> 22 but will be dropped by the store. The simplification is
// still correct.

// ###### But! Case 1.3: conIL == conIR > num_rejected_bits
// If we do the shift left then right by 26 bits, we get:
// after: v << 26
// +---------+------------------------+
// | v[0..5] | 0 |
// +---------+------------------------+
// 31 26 25 0
// after: (v << 26) >> 26
// +------------------------+---------+
// | sign bit | v[0..5] |
// +------------------------+---------+
// 31 6 5 0
// The non-rejected bits are made of
// - 0-5 => the bits 0 to 5 of v
// - 6-7 => the sign bit of v[0..5] (that is v[5])
// Simplifying this as v is not correct.
// The condition conIR <= num_rejected_bits is indeed necessary in Case 1
//
// ### Case 2: conIL > conIR
// ###### Case 2.1: num_rejected_bits == conIR
// We take conIL == 26 for this example.
// after: v << 26
// +---------+------------------------+
// | v[0..5] | 0 |
// +---------+------------------------+
// 31 26 25 0
// after: (v << 26) >> 24
// +------------------+---------+-----+
// | sign bit | v[0..5] | 0 |
// +------------------+---------+-----+
// 31 8 7 2 1 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you make a statement if this is ok or not?

// The non-rejected bits are the 8 lower ones of (v << conIL - conIR).
// The bits 6 and 7 of v have been thrown away after the shift left.
// The simplification is still correct.
//
// ###### Case 2.2: num_rejected_bits > conIR.
// Let's say conIL == 26 and conIR == 22.
// after: v << 26
// +---------+------------------------+
// | v[0..5] | 0 |
// +---------+------------------------+
// 31 26 25 0
// after: (v << 26) >> 22
// +------------------+---------+-----+
// | sign bit | v[0..5] | 0 |
// +------------------+---------+-----+
// 31 10 9 4 3 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, this is basically a second example of conIL > conIR.
Can you say if this case is ok?

// The bits non-rejected by the store are exactly the 8 lower ones of (v << (conIL - conIR)):
// - 0-3 => 0
// - 4-7 => bits 0 to 3 of v
// The simplification is still correct.
// The bits 4 and 5 of v are still present in (v << (conIL - conIR)) but they don't
// matter as they are not in the 8 lower bits: they will be cut out by the store.
//
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we also have a case where conIL < conIR? What happens then?

// ###### But! Case 2.3: num_rejected_bits < conIR.
// Let's see that this case is not as easy to simplify.
// Let's say conIL == 28 and conIR == 26.
// after: v << 28
// +---------+------------------------+
// | v[0..3] | 0 |
// +---------+------------------------+
// 31 28 27 0
// after: (v << 28) >> 26
// +------------------+---------+-----+
// | sign bit | v[0..3] | 0 |
// +------------------+---------+-----+
// 31 6 5 2 1 0
// The non-rejected bits are made of
// - 0-1 => 0
// - 2-5 => the bits 0 to 3 of v
// - 6-7 => the sign bit of v[0..3] (that is v[3])
// Simplifying this as (v << 2) is not correct.
// The condition conIR <= num_rejected_bits is indeed necessary in Case 2.
//
// ### Conclusion:
// Our hypotheses are indeed sufficient:
// - 0 <= conIL < BitsPerJavaInteger
// - 0 <= conIR < BitsPerJavaInteger
// - conIL >= conIR
// - num_rejected_bits >= conIR
//
// ### A rationale without case analysis:
// After the shift left, conIL upper bits of v are discarded and conIL lower bit
// zeroes are added. After the shift right, conIR lower bits of the previous result
// are discarded. If conIL >= conIR, we discard only the zeroes we made up during
// the shift left, but if conIL < conIR, then we discard also lower bits of v. But
// the point of the simplification is to get an expression of the form
// (v << (conIL - conIR)). This expression discard only higher bits of v, thus the
// simplification is not correct if conIL < conIR.
//
// Moreover, after the shift right, the higher bit of (v << conIL) is repeated on the
// conIR higher bits of ((v << conIL) >> conIR), it's the sign-extension. If
// conIR > num_rejected_bits, then at least one artificial copy of this sign bit will
// be in the window of the store. Thus ((v << conIL) >> conIR) is not equivalent to
// (v << (conIL-conIR)) if conIR > num_rejected_bits.
//
// We do not treat the case conIL < conIR here since the point of this function is
// to skip sign-extensions (that is conIL == conIR == num_rejected_bits). The need
// of treating conIL > conIR comes from the cases where the sign-extended value is
// also left-shift expression. Computing the sign-extension of a right-shift expression
// doesn't yield a situation such as
// (StoreB ... (RShiftI _ (LShiftI _ v conIL) conIR))
// where conIL < conIR.
Node* StoreNode::Ideal_sign_extended_input(PhaseGVN* phase, int num_rejected_bits) {
Node* shr = in(MemNode::ValueIn);
if (shr->Opcode() == Op_RShiftI) {
const TypeInt* conIR = phase->type(shr->in(2))->isa_int();
if (conIR != nullptr && conIR->is_con() && conIR->get_con() >= 0 && conIR->get_con() < BitsPerJavaInteger && conIR->get_con() <= num_rejected_bits) {
Node* shl = shr->in(1);
if (shl->Opcode() == Op_LShiftI) {
const TypeInt* conIL = phase->type(shl->in(2))->isa_int();
if (conIL != nullptr && conIL->is_con() && conIL->get_con() >= 0 && conIL->get_con() < BitsPerJavaInteger) {
if (conIL->get_con() == conIR->get_con()) {
set_req_X(MemNode::ValueIn, shl->in(1), phase);
return this;
}
if (conIL->get_con() > conIR->get_con()) {
Node* new_shl = phase->transform(new LShiftINode(shl->in(1), phase->intcon(conIL->get_con() - conIR->get_con())));
set_req_X(MemNode::ValueIn, new_shl, phase);
return this;
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/memnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ class StoreNode : public MemNode {
virtual bool depends_only_on_test() const { return false; }

Node *Ideal_masked_input (PhaseGVN *phase, uint mask);
Node *Ideal_sign_extended_input(PhaseGVN *phase, int num_bits);
Node* Ideal_sign_extended_input(PhaseGVN* phase, int num_rejected_bits);

public:
// We must ensure that stores of object references will be visible
Expand Down
57 changes: 57 additions & 0 deletions src/hotspot/share/opto/mulnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,43 @@ static int maskShiftAmount(PhaseGVN* phase, Node* shiftNode, int nBits) {
return 0;
}

// Called with
// outer_shift = (_ << con0)
// We are looking for the pattern:
// outer_shift = ((X << con1) << con0)
// we denote inner_shift the nested expression (X << con1)
//
// con0 and con1 are both in [0..nbits), as they are computed by maskShiftAmount.
//
// There are 2 cases:
// if con0 + con1 >= nbits => 0
// if con0 + con1 < nbits => X << (con1 + con0)
static Node* collapse_nested_shift_left(PhaseGVN* phase, Node* outer_shift, int con0, BasicType bt) {
assert(bt == T_LONG || bt == T_INT, "Unexpected type");
int nbits = static_cast<int>(bits_per_java_integer(bt));
Node* inner_shift = outer_shift->in(1);
if (inner_shift->Opcode() != Op_LShift(bt)) {
return nullptr;
}

int con1 = maskShiftAmount(phase, inner_shift, nbits);
if (con1 == 0) { // Either non-const, or actually 0 (up to mask) and then delegated to Identity()
return nullptr;
}

if (con0 + con1 >= nbits) {
// While it might be tempting to use
// phase->zerocon(bt);
// it would be incorrect: zerocon caches nodes, while Ideal is only allowed
// to return a new node, this or nullptr, but not an old (cached) node.
return ConNode::make(TypeInteger::zero(bt));
}

// con0 + con1 < nbits ==> actual shift happens now
Node* con0_plus_con1 = phase->intcon(con0 + con1);
return LShiftNode::make(inner_shift->in(1), con0_plus_con1, bt);
}

//------------------------------Identity---------------------------------------
Node* LShiftINode::Identity(PhaseGVN* phase) {
int count = 0;
Expand All @@ -983,6 +1020,9 @@ Node* LShiftINode::Identity(PhaseGVN* phase) {
//------------------------------Ideal------------------------------------------
// If the right input is a constant, and the left input is an add of a
// constant, flatten the tree: (X+con1)<<con0 ==> X<<con0 + con1<<con0
//
// Also collapse nested left-shifts with constant rhs:
// (X << con1) << con2 ==> X << (con1 + con2)
Node *LShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
int con = maskShiftAmount(phase, this, BitsPerJavaInteger);
if (con == 0) {
Expand Down Expand Up @@ -1095,6 +1135,13 @@ Node *LShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
phase->type(add1->in(2)) == TypeInt::make( bits_mask ) )
return new LShiftINode( add1->in(1), in(2) );

// Performs:
// (X << con1) << con2 ==> X << (con1 + con2)
Node* doubleShift = collapse_nested_shift_left(phase, this, con, T_INT);
if (doubleShift != nullptr) {
return doubleShift;
}

return nullptr;
}

Expand Down Expand Up @@ -1159,6 +1206,9 @@ Node* LShiftLNode::Identity(PhaseGVN* phase) {
//------------------------------Ideal------------------------------------------
// If the right input is a constant, and the left input is an add of a
// constant, flatten the tree: (X+con1)<<con0 ==> X<<con0 + con1<<con0
//
// Also collapse nested left-shifts with constant rhs:
// (X << con1) << con2 ==> X << (con1 + con2)
Node *LShiftLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
int con = maskShiftAmount(phase, this, BitsPerJavaLong);
if (con == 0) {
Expand Down Expand Up @@ -1271,6 +1321,13 @@ Node *LShiftLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
phase->type(add1->in(2)) == TypeLong::make( bits_mask ) )
return new LShiftLNode( add1->in(1), in(2) );

// Performs:
// (X << con1) << con2 ==> X << (con1 + con2)
Node* doubleShift = collapse_nested_shift_left(phase, this, con, T_LONG);
if (doubleShift != nullptr) {
return doubleShift;
}

return nullptr;
}

Expand Down
8 changes: 8 additions & 0 deletions src/hotspot/share/utilities/globalDefinitions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,14 @@ inline jlong min_signed_integer(BasicType bt) {
return min_jlong;
}

inline uint bits_per_java_integer(BasicType bt) {
if (bt == T_INT) {
return BitsPerJavaInteger;
}
assert(bt == T_LONG, "int or long only");
return BitsPerJavaLong;
}

// Auxiliary math routines
// least common multiple
extern size_t lcm(size_t a, size_t b);
Expand Down
Loading