@@ -3606,20 +3606,208 @@ Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) {
36063606
36073607// ------------------------------Ideal_sign_extended_input----------------------
36083608// Check for useless sign-extension before a partial-word store
3609- // (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) )
3610- // If (conIL == conIR && conIR <= num_bits) this simplifies to
3611- // (StoreB ... (valIn) )
3612- Node *StoreNode::Ideal_sign_extended_input (PhaseGVN *phase, int num_bits) {
3613- Node *val = in (MemNode::ValueIn);
3614- if ( val->Opcode () == Op_RShiftI ) {
3615- const TypeInt *t = phase->type ( val->in (2 ) )->isa_int ();
3616- if ( t && t->is_con () && (t->get_con () <= num_bits) ) {
3617- Node *shl = val->in (1 );
3618- if ( shl->Opcode () == Op_LShiftI ) {
3619- const TypeInt *t2 = phase->type ( shl->in (2 ) )->isa_int ();
3620- if ( t2 && t2->is_con () && (t2->get_con () == t->get_con ()) ) {
3621- set_req_X (MemNode::ValueIn, shl->in (1 ), phase);
3622- return this ;
3609+ // (StoreB ... (RShiftI _ (LShiftI _ v conIL) conIR))
3610+ // If (conIL == conIR && conIR <= num_rejected_bits) this simplifies to
3611+ // (StoreB ... (v))
3612+ // If (conIL > conIR) under some conditions, it can be simplified into
3613+ // (StoreB ... (LShiftI _ v (conIL - conIR)))
3614+ // This case happens when the value of the store was itself a left shift, that
3615+ // gets merged into the inner left shift of the sign-extension. For instance,
3616+ // if we have
3617+ // array_of_shorts[0] = (short)(v << 2)
3618+ // We get a structure such as:
3619+ // (StoreB ... (RShiftI _ (LShiftI _ (LShiftI _ v 2) 16) 16))
3620+ // that is simplified into
3621+ // (StoreB ... (RShiftI _ (LShiftI _ v 18) 16)).
3622+ // It is thus useful to handle cases where conIL > conIR. But this simplification
3623+ // does not always hold. Let's see in which cases it's valid.
3624+ //
3625+ // Let's assume we have the following 32 bits integer v:
3626+ // +----------------------------------+
3627+ // | v[0..31] |
3628+ // +----------------------------------+
3629+ // 31 0
3630+ // that will be stuffed in 8 bits byte after a shift left and a shift right of
3631+ // potentially different magnitudes.
3632+ // We denote num_rejected_bits the number of bits of the discarded part. In this
3633+ // case, num_rejected_bits == 24.
3634+ //
3635+ // Statement (proved further below in case analysis):
3636+ // Given:
3637+ // - 0 <= conIL < BitsPerJavaInteger (no wrap in shift, enforced by maskShiftAmount)
3638+ // - 0 <= conIR < BitsPerJavaInteger (no wrap in shift, enforced by maskShiftAmount)
3639+ // - conIL >= conIR
3640+ // - num_rejected_bits >= conIR
3641+ // Then this form:
3642+ // (RShiftI _ (LShiftI _ v conIL) conIR)
3643+ // can be replaced with this form:
3644+ // (LShiftI _ v (conIL-conIR))
3645+ //
3646+ // Note: We only have to show that the non-rejected lowest bits (8 bits for byte)
3647+ // have to be correct, as the higher bits are rejected / truncated by the store.
3648+ //
3649+ // The hypotheses
3650+ // 0 <= conIL < BitsPerJavaInteger
3651+ // 0 <= conIR < BitsPerJavaInteger
3652+ // are ensured by maskShiftAmount (called from ::Ideal of shift nodes). Indeed,
3653+ // (v << 31) << 2 must be simplified into 0, not into v << 33 (which is equivalent
3654+ // to v << 1).
3655+ //
3656+ //
3657+ // If you don't like case analysis, jump after the conclusion.
3658+ // ### Case 1 : conIL == conIR
3659+ // ###### Case 1.1: conIL == conIR == num_rejected_bits
3660+ // If we do the shift left then right by 24 bits, we get:
3661+ // after: v << 24
3662+ // +---------+------------------------+
3663+ // | v[0..7] | 0 |
3664+ // +---------+------------------------+
3665+ // 31 24 23 0
3666+ // after: (v << 24) >> 24
3667+ // +------------------------+---------+
3668+ // | sign bit | v[0..7] |
3669+ // +------------------------+---------+
3670+ // 31 8 7 0
3671+ // The non-rejected bits (bits kept by the store, that is the 8 lower bits of the
3672+ // result) are the same before and after, so, indeed, simplifying is correct.
3673+
3674+ // ###### Case 1.2: conIL == conIR < num_rejected_bits
3675+ // If we do the shift left then right by 22 bits, we get:
3676+ // after: v << 22
3677+ // +---------+------------------------+
3678+ // | v[0..9] | 0 |
3679+ // +---------+------------------------+
3680+ // 31 22 21 0
3681+ // after: (v << 22) >> 22
3682+ // +------------------------+---------+
3683+ // | sign bit | v[0..9] |
3684+ // +------------------------+---------+
3685+ // 31 10 9 0
3686+ // The non-rejected bits are the 8 lower bits of v. The bits 8 and 9 of v are still
3687+ // present in (v << 22) >> 22 but will be dropped by the store. The simplification is
3688+ // still correct.
3689+
3690+ // ###### But! Case 1.3: conIL == conIR > num_rejected_bits
3691+ // If we do the shift left then right by 26 bits, we get:
3692+ // after: v << 26
3693+ // +---------+------------------------+
3694+ // | v[0..5] | 0 |
3695+ // +---------+------------------------+
3696+ // 31 26 25 0
3697+ // after: (v << 26) >> 26
3698+ // +------------------------+---------+
3699+ // | sign bit | v[0..5] |
3700+ // +------------------------+---------+
3701+ // 31 6 5 0
3702+ // The non-rejected bits are made of
3703+ // - 0-5 => the bits 0 to 5 of v
3704+ // - 6-7 => the sign bit of v[0..5] (that is v[5])
3705+ // Simplifying this as v is not correct.
3706+ // The condition conIR <= num_rejected_bits is indeed necessary in Case 1
3707+ //
3708+ // ### Case 2: conIL > conIR
3709+ // ###### Case 2.1: num_rejected_bits == conIR
3710+ // We take conIL == 26 for this example.
3711+ // after: v << 26
3712+ // +---------+------------------------+
3713+ // | v[0..5] | 0 |
3714+ // +---------+------------------------+
3715+ // 31 26 25 0
3716+ // after: (v << 26) >> 24
3717+ // +------------------+---------+-----+
3718+ // | sign bit | v[0..5] | 0 |
3719+ // +------------------+---------+-----+
3720+ // 31 8 7 2 1 0
3721+ // The non-rejected bits are the 8 lower ones of (v << conIL - conIR).
3722+ // The bits 6 and 7 of v have been thrown away after the shift left.
3723+ // The simplification is still correct.
3724+ //
3725+ // ###### Case 2.2: num_rejected_bits > conIR.
3726+ // Let's say conIL == 26 and conIR == 22.
3727+ // after: v << 26
3728+ // +---------+------------------------+
3729+ // | v[0..5] | 0 |
3730+ // +---------+------------------------+
3731+ // 31 26 25 0
3732+ // after: (v << 26) >> 22
3733+ // +------------------+---------+-----+
3734+ // | sign bit | v[0..5] | 0 |
3735+ // +------------------+---------+-----+
3736+ // 31 10 9 4 3 0
3737+ // The bits non-rejected by the store are exactly the 8 lower ones of (v << (conIL - conIR)):
3738+ // - 0-3 => 0
3739+ // - 4-7 => bits 0 to 3 of v
3740+ // The simplification is still correct.
3741+ // The bits 4 and 5 of v are still present in (v << (conIL - conIR)) but they don't
3742+ // matter as they are not in the 8 lower bits: they will be cut out by the store.
3743+ //
3744+ // ###### But! Case 2.3: num_rejected_bits < conIR.
3745+ // Let's see that this case is not as easy to simplify.
3746+ // Let's say conIL == 28 and conIR == 26.
3747+ // after: v << 28
3748+ // +---------+------------------------+
3749+ // | v[0..3] | 0 |
3750+ // +---------+------------------------+
3751+ // 31 28 27 0
3752+ // after: (v << 28) >> 26
3753+ // +------------------+---------+-----+
3754+ // | sign bit | v[0..3] | 0 |
3755+ // +------------------+---------+-----+
3756+ // 31 6 5 2 1 0
3757+ // The non-rejected bits are made of
3758+ // - 0-1 => 0
3759+ // - 2-5 => the bits 0 to 3 of v
3760+ // - 6-7 => the sign bit of v[0..3] (that is v[3])
3761+ // Simplifying this as (v << 2) is not correct.
3762+ // The condition conIR <= num_rejected_bits is indeed necessary in Case 2.
3763+ //
3764+ // ### Conclusion:
3765+ // Our hypotheses are indeed sufficient:
3766+ // - 0 <= conIL < BitsPerJavaInteger
3767+ // - 0 <= conIR < BitsPerJavaInteger
3768+ // - conIL >= conIR
3769+ // - num_rejected_bits >= conIR
3770+ //
3771+ // ### A rationale without case analysis:
3772+ // After the shift left, conIL upper bits of v are discarded and conIL lower bit
3773+ // zeroes are added. After the shift right, conIR lower bits of the previous result
3774+ // are discarded. If conIL >= conIR, we discard only the zeroes we made up during
3775+ // the shift left, but if conIL < conIR, then we discard also lower bits of v. But
3776+ // the point of the simplification is to get an expression of the form
3777+ // (v << (conIL - conIR)). This expression discard only higher bits of v, thus the
3778+ // simplification is not correct if conIL < conIR.
3779+ //
3780+ // Moreover, after the shift right, the higher bit of (v << conIL) is repeated on the
3781+ // conIR higher bits of ((v << conIL) >> conIR), it's the sign-extension. If
3782+ // conIR > num_rejected_bits, then at least one artificial copy of this sign bit will
3783+ // be in the window of the store. Thus ((v << conIL) >> conIR) is not equivalent to
3784+ // (v << (conIL-conIR)) if conIR > num_rejected_bits.
3785+ //
3786+ // We do not treat the case conIL < conIR here since the point of this function is
3787+ // to skip sign-extensions (that is conIL == conIR == num_rejected_bits). The need
3788+ // of treating conIL > conIR comes from the cases where the sign-extended value is
3789+ // also left-shift expression. Computing the sign-extension of a right-shift expression
3790+ // doesn't yield a situation such as
3791+ // (StoreB ... (RShiftI _ (LShiftI _ v conIL) conIR))
3792+ // where conIL < conIR.
3793+ Node* StoreNode::Ideal_sign_extended_input (PhaseGVN* phase, int num_rejected_bits) {
3794+ Node* shr = in (MemNode::ValueIn);
3795+ if (shr->Opcode () == Op_RShiftI) {
3796+ const TypeInt* conIR = phase->type (shr->in (2 ))->isa_int ();
3797+ if (conIR != nullptr && conIR->is_con () && conIR->get_con () >= 0 && conIR->get_con () < BitsPerJavaInteger && conIR->get_con () <= num_rejected_bits) {
3798+ Node* shl = shr->in (1 );
3799+ if (shl->Opcode () == Op_LShiftI) {
3800+ const TypeInt* conIL = phase->type (shl->in (2 ))->isa_int ();
3801+ if (conIL != nullptr && conIL->is_con () && conIL->get_con () >= 0 && conIL->get_con () < BitsPerJavaInteger) {
3802+ if (conIL->get_con () == conIR->get_con ()) {
3803+ set_req_X (MemNode::ValueIn, shl->in (1 ), phase);
3804+ return this ;
3805+ }
3806+ if (conIL->get_con () > conIR->get_con ()) {
3807+ Node* new_shl = phase->transform (new LShiftINode (shl->in (1 ), phase->intcon (conIL->get_con () - conIR->get_con ())));
3808+ set_req_X (MemNode::ValueIn, new_shl, phase);
3809+ return this ;
3810+ }
36233811 }
36243812 }
36253813 }
0 commit comments