Skip to content

Commit bdcac98

Browse files
marc-chevalierTobiHartmann
authored andcommitted
8347459: C2: missing transformation for chain of shifts/multiplications by constants
Reviewed-by: dfenacci, epeter
1 parent 3d3b782 commit bdcac98

File tree

6 files changed

+526
-17
lines changed

6 files changed

+526
-17
lines changed

src/hotspot/share/opto/memnode.cpp

Lines changed: 202 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3606,20 +3606,208 @@ Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) {
36063606

36073607
//------------------------------Ideal_sign_extended_input----------------------
36083608
// Check for useless sign-extension before a partial-word store
3609-
// (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) )
3610-
// If (conIL == conIR && conIR <= num_bits) this simplifies to
3611-
// (StoreB ... (valIn) )
3612-
Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) {
3613-
Node *val = in(MemNode::ValueIn);
3614-
if( val->Opcode() == Op_RShiftI ) {
3615-
const TypeInt *t = phase->type( val->in(2) )->isa_int();
3616-
if( t && t->is_con() && (t->get_con() <= num_bits) ) {
3617-
Node *shl = val->in(1);
3618-
if( shl->Opcode() == Op_LShiftI ) {
3619-
const TypeInt *t2 = phase->type( shl->in(2) )->isa_int();
3620-
if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) {
3621-
set_req_X(MemNode::ValueIn, shl->in(1), phase);
3622-
return this;
3609+
// (StoreB ... (RShiftI _ (LShiftI _ v conIL) conIR))
3610+
// If (conIL == conIR && conIR <= num_rejected_bits) this simplifies to
3611+
// (StoreB ... (v))
3612+
// If (conIL > conIR) under some conditions, it can be simplified into
3613+
// (StoreB ... (LShiftI _ v (conIL - conIR)))
3614+
// This case happens when the value of the store was itself a left shift, that
3615+
// gets merged into the inner left shift of the sign-extension. For instance,
3616+
// if we have
3617+
// array_of_shorts[0] = (short)(v << 2)
3618+
// We get a structure such as:
3619+
// (StoreB ... (RShiftI _ (LShiftI _ (LShiftI _ v 2) 16) 16))
3620+
// that is simplified into
3621+
// (StoreB ... (RShiftI _ (LShiftI _ v 18) 16)).
3622+
// It is thus useful to handle cases where conIL > conIR. But this simplification
3623+
// does not always hold. Let's see in which cases it's valid.
3624+
//
3625+
// Let's assume we have the following 32 bits integer v:
3626+
// +----------------------------------+
3627+
// | v[0..31] |
3628+
// +----------------------------------+
3629+
// 31 0
3630+
// that will be stuffed in 8 bits byte after a shift left and a shift right of
3631+
// potentially different magnitudes.
3632+
// We denote num_rejected_bits the number of bits of the discarded part. In this
3633+
// case, num_rejected_bits == 24.
3634+
//
3635+
// Statement (proved further below in case analysis):
3636+
// Given:
3637+
// - 0 <= conIL < BitsPerJavaInteger (no wrap in shift, enforced by maskShiftAmount)
3638+
// - 0 <= conIR < BitsPerJavaInteger (no wrap in shift, enforced by maskShiftAmount)
3639+
// - conIL >= conIR
3640+
// - num_rejected_bits >= conIR
3641+
// Then this form:
3642+
// (RShiftI _ (LShiftI _ v conIL) conIR)
3643+
// can be replaced with this form:
3644+
// (LShiftI _ v (conIL-conIR))
3645+
//
3646+
// Note: We only have to show that the non-rejected lowest bits (8 bits for byte)
3647+
// have to be correct, as the higher bits are rejected / truncated by the store.
3648+
//
3649+
// The hypotheses
3650+
// 0 <= conIL < BitsPerJavaInteger
3651+
// 0 <= conIR < BitsPerJavaInteger
3652+
// are ensured by maskShiftAmount (called from ::Ideal of shift nodes). Indeed,
3653+
// (v << 31) << 2 must be simplified into 0, not into v << 33 (which is equivalent
3654+
// to v << 1).
3655+
//
3656+
//
3657+
// If you don't like case analysis, jump after the conclusion.
3658+
// ### Case 1 : conIL == conIR
3659+
// ###### Case 1.1: conIL == conIR == num_rejected_bits
3660+
// If we do the shift left then right by 24 bits, we get:
3661+
// after: v << 24
3662+
// +---------+------------------------+
3663+
// | v[0..7] | 0 |
3664+
// +---------+------------------------+
3665+
// 31 24 23 0
3666+
// after: (v << 24) >> 24
3667+
// +------------------------+---------+
3668+
// | sign bit | v[0..7] |
3669+
// +------------------------+---------+
3670+
// 31 8 7 0
3671+
// The non-rejected bits (bits kept by the store, that is the 8 lower bits of the
3672+
// result) are the same before and after, so, indeed, simplifying is correct.
3673+
3674+
// ###### Case 1.2: conIL == conIR < num_rejected_bits
3675+
// If we do the shift left then right by 22 bits, we get:
3676+
// after: v << 22
3677+
// +---------+------------------------+
3678+
// | v[0..9] | 0 |
3679+
// +---------+------------------------+
3680+
// 31 22 21 0
3681+
// after: (v << 22) >> 22
3682+
// +------------------------+---------+
3683+
// | sign bit | v[0..9] |
3684+
// +------------------------+---------+
3685+
// 31 10 9 0
3686+
// The non-rejected bits are the 8 lower bits of v. The bits 8 and 9 of v are still
3687+
// present in (v << 22) >> 22 but will be dropped by the store. The simplification is
3688+
// still correct.
3689+
3690+
// ###### But! Case 1.3: conIL == conIR > num_rejected_bits
3691+
// If we do the shift left then right by 26 bits, we get:
3692+
// after: v << 26
3693+
// +---------+------------------------+
3694+
// | v[0..5] | 0 |
3695+
// +---------+------------------------+
3696+
// 31 26 25 0
3697+
// after: (v << 26) >> 26
3698+
// +------------------------+---------+
3699+
// | sign bit | v[0..5] |
3700+
// +------------------------+---------+
3701+
// 31 6 5 0
3702+
// The non-rejected bits are made of
3703+
// - 0-5 => the bits 0 to 5 of v
3704+
// - 6-7 => the sign bit of v[0..5] (that is v[5])
3705+
// Simplifying this as v is not correct.
3706+
// The condition conIR <= num_rejected_bits is indeed necessary in Case 1
3707+
//
3708+
// ### Case 2: conIL > conIR
3709+
// ###### Case 2.1: num_rejected_bits == conIR
3710+
// We take conIL == 26 for this example.
3711+
// after: v << 26
3712+
// +---------+------------------------+
3713+
// | v[0..5] | 0 |
3714+
// +---------+------------------------+
3715+
// 31 26 25 0
3716+
// after: (v << 26) >> 24
3717+
// +------------------+---------+-----+
3718+
// | sign bit | v[0..5] | 0 |
3719+
// +------------------+---------+-----+
3720+
// 31 8 7 2 1 0
3721+
// The non-rejected bits are the 8 lower ones of (v << conIL - conIR).
3722+
// The bits 6 and 7 of v have been thrown away after the shift left.
3723+
// The simplification is still correct.
3724+
//
3725+
// ###### Case 2.2: num_rejected_bits > conIR.
3726+
// Let's say conIL == 26 and conIR == 22.
3727+
// after: v << 26
3728+
// +---------+------------------------+
3729+
// | v[0..5] | 0 |
3730+
// +---------+------------------------+
3731+
// 31 26 25 0
3732+
// after: (v << 26) >> 22
3733+
// +------------------+---------+-----+
3734+
// | sign bit | v[0..5] | 0 |
3735+
// +------------------+---------+-----+
3736+
// 31 10 9 4 3 0
3737+
// The bits non-rejected by the store are exactly the 8 lower ones of (v << (conIL - conIR)):
3738+
// - 0-3 => 0
3739+
// - 4-7 => bits 0 to 3 of v
3740+
// The simplification is still correct.
3741+
// The bits 4 and 5 of v are still present in (v << (conIL - conIR)) but they don't
3742+
// matter as they are not in the 8 lower bits: they will be cut out by the store.
3743+
//
3744+
// ###### But! Case 2.3: num_rejected_bits < conIR.
3745+
// Let's see that this case is not as easy to simplify.
3746+
// Let's say conIL == 28 and conIR == 26.
3747+
// after: v << 28
3748+
// +---------+------------------------+
3749+
// | v[0..3] | 0 |
3750+
// +---------+------------------------+
3751+
// 31 28 27 0
3752+
// after: (v << 28) >> 26
3753+
// +------------------+---------+-----+
3754+
// | sign bit | v[0..3] | 0 |
3755+
// +------------------+---------+-----+
3756+
// 31 6 5 2 1 0
3757+
// The non-rejected bits are made of
3758+
// - 0-1 => 0
3759+
// - 2-5 => the bits 0 to 3 of v
3760+
// - 6-7 => the sign bit of v[0..3] (that is v[3])
3761+
// Simplifying this as (v << 2) is not correct.
3762+
// The condition conIR <= num_rejected_bits is indeed necessary in Case 2.
3763+
//
3764+
// ### Conclusion:
3765+
// Our hypotheses are indeed sufficient:
3766+
// - 0 <= conIL < BitsPerJavaInteger
3767+
// - 0 <= conIR < BitsPerJavaInteger
3768+
// - conIL >= conIR
3769+
// - num_rejected_bits >= conIR
3770+
//
3771+
// ### A rationale without case analysis:
3772+
// After the shift left, conIL upper bits of v are discarded and conIL lower bit
3773+
// zeroes are added. After the shift right, conIR lower bits of the previous result
3774+
// are discarded. If conIL >= conIR, we discard only the zeroes we made up during
3775+
// the shift left, but if conIL < conIR, then we discard also lower bits of v. But
3776+
// the point of the simplification is to get an expression of the form
3777+
// (v << (conIL - conIR)). This expression discard only higher bits of v, thus the
3778+
// simplification is not correct if conIL < conIR.
3779+
//
3780+
// Moreover, after the shift right, the higher bit of (v << conIL) is repeated on the
3781+
// conIR higher bits of ((v << conIL) >> conIR), it's the sign-extension. If
3782+
// conIR > num_rejected_bits, then at least one artificial copy of this sign bit will
3783+
// be in the window of the store. Thus ((v << conIL) >> conIR) is not equivalent to
3784+
// (v << (conIL-conIR)) if conIR > num_rejected_bits.
3785+
//
3786+
// We do not treat the case conIL < conIR here since the point of this function is
3787+
// to skip sign-extensions (that is conIL == conIR == num_rejected_bits). The need
3788+
// of treating conIL > conIR comes from the cases where the sign-extended value is
3789+
// also left-shift expression. Computing the sign-extension of a right-shift expression
3790+
// doesn't yield a situation such as
3791+
// (StoreB ... (RShiftI _ (LShiftI _ v conIL) conIR))
3792+
// where conIL < conIR.
3793+
Node* StoreNode::Ideal_sign_extended_input(PhaseGVN* phase, int num_rejected_bits) {
3794+
Node* shr = in(MemNode::ValueIn);
3795+
if (shr->Opcode() == Op_RShiftI) {
3796+
const TypeInt* conIR = phase->type(shr->in(2))->isa_int();
3797+
if (conIR != nullptr && conIR->is_con() && conIR->get_con() >= 0 && conIR->get_con() < BitsPerJavaInteger && conIR->get_con() <= num_rejected_bits) {
3798+
Node* shl = shr->in(1);
3799+
if (shl->Opcode() == Op_LShiftI) {
3800+
const TypeInt* conIL = phase->type(shl->in(2))->isa_int();
3801+
if (conIL != nullptr && conIL->is_con() && conIL->get_con() >= 0 && conIL->get_con() < BitsPerJavaInteger) {
3802+
if (conIL->get_con() == conIR->get_con()) {
3803+
set_req_X(MemNode::ValueIn, shl->in(1), phase);
3804+
return this;
3805+
}
3806+
if (conIL->get_con() > conIR->get_con()) {
3807+
Node* new_shl = phase->transform(new LShiftINode(shl->in(1), phase->intcon(conIL->get_con() - conIR->get_con())));
3808+
set_req_X(MemNode::ValueIn, new_shl, phase);
3809+
return this;
3810+
}
36233811
}
36243812
}
36253813
}

src/hotspot/share/opto/memnode.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ class StoreNode : public MemNode {
579579
virtual bool depends_only_on_test() const { return false; }
580580

581581
Node *Ideal_masked_input (PhaseGVN *phase, uint mask);
582-
Node *Ideal_sign_extended_input(PhaseGVN *phase, int num_bits);
582+
Node* Ideal_sign_extended_input(PhaseGVN* phase, int num_rejected_bits);
583583

584584
public:
585585
// We must ensure that stores of object references will be visible

src/hotspot/share/opto/mulnode.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,43 @@ static int maskShiftAmount(PhaseGVN* phase, Node* shiftNode, int nBits) {
970970
return 0;
971971
}
972972

973+
// Called with
974+
// outer_shift = (_ << con0)
975+
// We are looking for the pattern:
976+
// outer_shift = ((X << con1) << con0)
977+
// we denote inner_shift the nested expression (X << con1)
978+
//
979+
// con0 and con1 are both in [0..nbits), as they are computed by maskShiftAmount.
980+
//
981+
// There are 2 cases:
982+
// if con0 + con1 >= nbits => 0
983+
// if con0 + con1 < nbits => X << (con1 + con0)
984+
static Node* collapse_nested_shift_left(PhaseGVN* phase, Node* outer_shift, int con0, BasicType bt) {
985+
assert(bt == T_LONG || bt == T_INT, "Unexpected type");
986+
int nbits = static_cast<int>(bits_per_java_integer(bt));
987+
Node* inner_shift = outer_shift->in(1);
988+
if (inner_shift->Opcode() != Op_LShift(bt)) {
989+
return nullptr;
990+
}
991+
992+
int con1 = maskShiftAmount(phase, inner_shift, nbits);
993+
if (con1 == 0) { // Either non-const, or actually 0 (up to mask) and then delegated to Identity()
994+
return nullptr;
995+
}
996+
997+
if (con0 + con1 >= nbits) {
998+
// While it might be tempting to use
999+
// phase->zerocon(bt);
1000+
// it would be incorrect: zerocon caches nodes, while Ideal is only allowed
1001+
// to return a new node, this or nullptr, but not an old (cached) node.
1002+
return ConNode::make(TypeInteger::zero(bt));
1003+
}
1004+
1005+
// con0 + con1 < nbits ==> actual shift happens now
1006+
Node* con0_plus_con1 = phase->intcon(con0 + con1);
1007+
return LShiftNode::make(inner_shift->in(1), con0_plus_con1, bt);
1008+
}
1009+
9731010
//------------------------------Identity---------------------------------------
9741011
Node* LShiftINode::Identity(PhaseGVN* phase) {
9751012
int count = 0;
@@ -983,6 +1020,9 @@ Node* LShiftINode::Identity(PhaseGVN* phase) {
9831020
//------------------------------Ideal------------------------------------------
9841021
// If the right input is a constant, and the left input is an add of a
9851022
// constant, flatten the tree: (X+con1)<<con0 ==> X<<con0 + con1<<con0
1023+
//
1024+
// Also collapse nested left-shifts with constant rhs:
1025+
// (X << con1) << con2 ==> X << (con1 + con2)
9861026
Node *LShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
9871027
int con = maskShiftAmount(phase, this, BitsPerJavaInteger);
9881028
if (con == 0) {
@@ -1095,6 +1135,13 @@ Node *LShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
10951135
phase->type(add1->in(2)) == TypeInt::make( bits_mask ) )
10961136
return new LShiftINode( add1->in(1), in(2) );
10971137

1138+
// Performs:
1139+
// (X << con1) << con2 ==> X << (con1 + con2)
1140+
Node* doubleShift = collapse_nested_shift_left(phase, this, con, T_INT);
1141+
if (doubleShift != nullptr) {
1142+
return doubleShift;
1143+
}
1144+
10981145
return nullptr;
10991146
}
11001147

@@ -1159,6 +1206,9 @@ Node* LShiftLNode::Identity(PhaseGVN* phase) {
11591206
//------------------------------Ideal------------------------------------------
11601207
// If the right input is a constant, and the left input is an add of a
11611208
// constant, flatten the tree: (X+con1)<<con0 ==> X<<con0 + con1<<con0
1209+
//
1210+
// Also collapse nested left-shifts with constant rhs:
1211+
// (X << con1) << con2 ==> X << (con1 + con2)
11621212
Node *LShiftLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
11631213
int con = maskShiftAmount(phase, this, BitsPerJavaLong);
11641214
if (con == 0) {
@@ -1271,6 +1321,13 @@ Node *LShiftLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
12711321
phase->type(add1->in(2)) == TypeLong::make( bits_mask ) )
12721322
return new LShiftLNode( add1->in(1), in(2) );
12731323

1324+
// Performs:
1325+
// (X << con1) << con2 ==> X << (con1 + con2)
1326+
Node* doubleShift = collapse_nested_shift_left(phase, this, con, T_LONG);
1327+
if (doubleShift != nullptr) {
1328+
return doubleShift;
1329+
}
1330+
12741331
return nullptr;
12751332
}
12761333

src/hotspot/share/utilities/globalDefinitions.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,14 @@ inline jlong min_signed_integer(BasicType bt) {
797797
return min_jlong;
798798
}
799799

800+
inline uint bits_per_java_integer(BasicType bt) {
801+
if (bt == T_INT) {
802+
return BitsPerJavaInteger;
803+
}
804+
assert(bt == T_LONG, "int or long only");
805+
return BitsPerJavaLong;
806+
}
807+
800808
// Auxiliary math routines
801809
// least common multiple
802810
extern size_t lcm(size_t a, size_t b);

0 commit comments

Comments
 (0)