-
Notifications
You must be signed in to change notification settings - Fork 6.2k
8347459: C2: missing transformation for chain of shifts/multiplications by constants #23728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ee8eea1
1b1a84d
cd8a90f
40e4f9a
4b40c66
7102dce
07a499c
bb867b6
932a32d
7a1bb1b
7e33134
84d18df
c792090
b8c3d74
e71cb56
8e93a12
14ee25d
f882409
b07d0a2
042c4dd
230af28
abe137a
7421ea5
ad563ba
be2f7b7
f244e17
a96a219
5afbe37
e3ecf35
124d938
cd0b0c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3606,20 +3606,208 @@ Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) { | |
|
|
||
| //------------------------------Ideal_sign_extended_input---------------------- | ||
| // Check for useless sign-extension before a partial-word store | ||
| // (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) ) | ||
| // If (conIL == conIR && conIR <= num_bits) this simplifies to | ||
| // (StoreB ... (valIn) ) | ||
| Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) { | ||
| Node *val = in(MemNode::ValueIn); | ||
| if( val->Opcode() == Op_RShiftI ) { | ||
| const TypeInt *t = phase->type( val->in(2) )->isa_int(); | ||
| if( t && t->is_con() && (t->get_con() <= num_bits) ) { | ||
| Node *shl = val->in(1); | ||
| if( shl->Opcode() == Op_LShiftI ) { | ||
| const TypeInt *t2 = phase->type( shl->in(2) )->isa_int(); | ||
| if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) { | ||
| set_req_X(MemNode::ValueIn, shl->in(1), phase); | ||
| return this; | ||
| // (StoreB ... (RShiftI _ (LShiftI _ v conIL) conIR)) | ||
| // If (conIL == conIR && conIR <= num_rejected_bits) this simplifies to | ||
| // (StoreB ... (v)) | ||
| // If (conIL > conIR) under some conditions, it can be simplified into | ||
| // (StoreB ... (LShiftI _ v (conIL - conIR))) | ||
| // This case happens when the value of the store was itself a left shift, that | ||
| // gets merged into the inner left shift of the sign-extension. For instance, | ||
| // if we have | ||
| // array_of_shorts[0] = (short)(v << 2) | ||
| // We get a structure such as: | ||
| // (StoreB ... (RShiftI _ (LShiftI _ (LShiftI _ v 2) 16) 16)) | ||
| // that is simplified into | ||
| // (StoreB ... (RShiftI _ (LShiftI _ v 18) 16)). | ||
| // It is thus useful to handle cases where conIL > conIR. But this simplification | ||
| // does not always hold. Let's see in which cases it's valid. | ||
| // | ||
| // Let's assume we have the following 32 bits integer v: | ||
| // +----------------------------------+ | ||
| // | v[0..31] | | ||
| // +----------------------------------+ | ||
| // 31 0 | ||
| // that will be stuffed in 8 bits byte after a shift left and a shift right of | ||
| // potentially different magnitudes. | ||
| // We denote num_rejected_bits the number of bits of the discarded part. In this | ||
| // case, num_rejected_bits == 24. | ||
| // | ||
| // Statement (proved further below in case analysis): | ||
| // Given: | ||
| // - 0 <= conIL < BitsPerJavaInteger (no wrap in shift, enforced by maskShiftAmount) | ||
| // - 0 <= conIR < BitsPerJavaInteger (no wrap in shift, enforced by maskShiftAmount) | ||
| // - conIL >= conIR | ||
| // - num_rejected_bits >= conIR | ||
| // Then this form: | ||
| // (RShiftI _ (LShiftI _ v conIL) conIR) | ||
| // can be replaced with this form: | ||
| // (LShiftI _ v (conIL-conIR)) | ||
| // | ||
| // Note: We only have to show that the non-rejected lowest bits (8 bits for byte) | ||
| // have to be correct, as the higher bits are rejected / truncated by the store. | ||
| // | ||
| // The hypotheses | ||
| // 0 <= conIL < BitsPerJavaInteger | ||
| // 0 <= conIR < BitsPerJavaInteger | ||
| // are ensured by maskShiftAmount (called from ::Ideal of shift nodes). Indeed, | ||
| // (v << 31) << 2 must be simplified into 0, not into v << 33 (which is equivalent | ||
| // to v << 1). | ||
| // | ||
| // | ||
| // If you don't like case analysis, jump after the conclusion. | ||
| // ### Case 1 : conIL == conIR | ||
| // ###### Case 1.1: conIL == conIR == num_rejected_bits | ||
| // If we do the shift left then right by 24 bits, we get: | ||
| // after: v << 24 | ||
| // +---------+------------------------+ | ||
| // | v[0..7] | 0 | | ||
| // +---------+------------------------+ | ||
| // 31 24 23 0 | ||
| // after: (v << 24) >> 24 | ||
| // +------------------------+---------+ | ||
| // | sign bit | v[0..7] | | ||
| // +------------------------+---------+ | ||
| // 31 8 7 0 | ||
| // The non-rejected bits (bits kept by the store, that is the 8 lower bits of the | ||
| // result) are the same before and after, so, indeed, simplifying is correct. | ||
|
|
||
| // ###### Case 1.2: conIL == conIR < num_rejected_bits | ||
| // If we do the shift left then right by 22 bits, we get: | ||
| // after: v << 22 | ||
| // +---------+------------------------+ | ||
| // | v[0..9] | 0 | | ||
| // +---------+------------------------+ | ||
| // 31 22 21 0 | ||
| // after: (v << 22) >> 22 | ||
| // +------------------------+---------+ | ||
| // | sign bit | v[0..9] | | ||
| // +------------------------+---------+ | ||
| // 31 10 9 0 | ||
| // The non-rejected bits are the 8 lower bits of v. The bits 8 and 9 of v are still | ||
| // present in (v << 22) >> 22 but will be dropped by the store. The simplification is | ||
| // still correct. | ||
|
|
||
| // ###### But! Case 1.3: conIL == conIR > num_rejected_bits | ||
| // If we do the shift left then right by 26 bits, we get: | ||
| // after: v << 26 | ||
| // +---------+------------------------+ | ||
| // | v[0..5] | 0 | | ||
| // +---------+------------------------+ | ||
| // 31 26 25 0 | ||
| // after: (v << 26) >> 26 | ||
| // +------------------------+---------+ | ||
| // | sign bit | v[0..5] | | ||
| // +------------------------+---------+ | ||
| // 31 6 5 0 | ||
| // The non-rejected bits are made of | ||
| // - 0-5 => the bits 0 to 5 of v | ||
| // - 6-7 => the sign bit of v[0..5] (that is v[5]) | ||
| // Simplifying this as v is not correct. | ||
| // The condition conIR <= num_rejected_bits is indeed necessary in Case 1 | ||
| // | ||
| // ### Case 2: conIL > conIR | ||
| // ###### Case 2.1: num_rejected_bits == conIR | ||
| // We take conIL == 26 for this example. | ||
| // after: v << 26 | ||
| // +---------+------------------------+ | ||
| // | v[0..5] | 0 | | ||
| // +---------+------------------------+ | ||
| // 31 26 25 0 | ||
| // after: (v << 26) >> 24 | ||
| // +------------------+---------+-----+ | ||
| // | sign bit | v[0..5] | 0 | | ||
| // +------------------+---------+-----+ | ||
| // 31 8 7 2 1 0 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you make a statement if this is ok or not? |
||
| // The non-rejected bits are the 8 lower ones of (v << conIL - conIR). | ||
| // The bits 6 and 7 of v have been thrown away after the shift left. | ||
| // The simplification is still correct. | ||
| // | ||
| // ###### Case 2.2: num_rejected_bits > conIR. | ||
| // Let's say conIL == 26 and conIR == 22. | ||
| // after: v << 26 | ||
| // +---------+------------------------+ | ||
| // | v[0..5] | 0 | | ||
| // +---------+------------------------+ | ||
| // 31 26 25 0 | ||
| // after: (v << 26) >> 22 | ||
| // +------------------+---------+-----+ | ||
| // | sign bit | v[0..5] | 0 | | ||
| // +------------------+---------+-----+ | ||
| // 31 10 9 4 3 0 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, this is basically a second example of |
||
| // The bits non-rejected by the store are exactly the 8 lower ones of (v << (conIL - conIR)): | ||
| // - 0-3 => 0 | ||
| // - 4-7 => bits 0 to 3 of v | ||
| // The simplification is still correct. | ||
| // The bits 4 and 5 of v are still present in (v << (conIL - conIR)) but they don't | ||
| // matter as they are not in the 8 lower bits: they will be cut out by the store. | ||
| // | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we also have a case where |
||
| // ###### But! Case 2.3: num_rejected_bits < conIR. | ||
| // Let's see that this case is not as easy to simplify. | ||
| // Let's say conIL == 28 and conIR == 26. | ||
| // after: v << 28 | ||
| // +---------+------------------------+ | ||
| // | v[0..3] | 0 | | ||
| // +---------+------------------------+ | ||
| // 31 28 27 0 | ||
| // after: (v << 28) >> 26 | ||
| // +------------------+---------+-----+ | ||
| // | sign bit | v[0..3] | 0 | | ||
| // +------------------+---------+-----+ | ||
| // 31 6 5 2 1 0 | ||
| // The non-rejected bits are made of | ||
| // - 0-1 => 0 | ||
| // - 2-5 => the bits 0 to 3 of v | ||
| // - 6-7 => the sign bit of v[0..3] (that is v[3]) | ||
| // Simplifying this as (v << 2) is not correct. | ||
| // The condition conIR <= num_rejected_bits is indeed necessary in Case 2. | ||
| // | ||
| // ### Conclusion: | ||
| // Our hypotheses are indeed sufficient: | ||
| // - 0 <= conIL < BitsPerJavaInteger | ||
| // - 0 <= conIR < BitsPerJavaInteger | ||
| // - conIL >= conIR | ||
| // - num_rejected_bits >= conIR | ||
| // | ||
| // ### A rationale without case analysis: | ||
| // After the shift left, conIL upper bits of v are discarded and conIL lower bit | ||
| // zeroes are added. After the shift right, conIR lower bits of the previous result | ||
| // are discarded. If conIL >= conIR, we discard only the zeroes we made up during | ||
| // the shift left, but if conIL < conIR, then we discard also lower bits of v. But | ||
| // the point of the simplification is to get an expression of the form | ||
| // (v << (conIL - conIR)). This expression discard only higher bits of v, thus the | ||
| // simplification is not correct if conIL < conIR. | ||
| // | ||
| // Moreover, after the shift right, the higher bit of (v << conIL) is repeated on the | ||
| // conIR higher bits of ((v << conIL) >> conIR), it's the sign-extension. If | ||
| // conIR > num_rejected_bits, then at least one artificial copy of this sign bit will | ||
| // be in the window of the store. Thus ((v << conIL) >> conIR) is not equivalent to | ||
| // (v << (conIL-conIR)) if conIR > num_rejected_bits. | ||
| // | ||
| // We do not treat the case conIL < conIR here since the point of this function is | ||
| // to skip sign-extensions (that is conIL == conIR == num_rejected_bits). The need | ||
| // of treating conIL > conIR comes from the cases where the sign-extended value is | ||
| // also left-shift expression. Computing the sign-extension of a right-shift expression | ||
| // doesn't yield a situation such as | ||
| // (StoreB ... (RShiftI _ (LShiftI _ v conIL) conIR)) | ||
| // where conIL < conIR. | ||
| Node* StoreNode::Ideal_sign_extended_input(PhaseGVN* phase, int num_rejected_bits) { | ||
| Node* shr = in(MemNode::ValueIn); | ||
| if (shr->Opcode() == Op_RShiftI) { | ||
| const TypeInt* conIR = phase->type(shr->in(2))->isa_int(); | ||
| if (conIR != nullptr && conIR->is_con() && conIR->get_con() >= 0 && conIR->get_con() < BitsPerJavaInteger && conIR->get_con() <= num_rejected_bits) { | ||
| Node* shl = shr->in(1); | ||
| if (shl->Opcode() == Op_LShiftI) { | ||
| const TypeInt* conIL = phase->type(shl->in(2))->isa_int(); | ||
| if (conIL != nullptr && conIL->is_con() && conIL->get_con() >= 0 && conIL->get_con() < BitsPerJavaInteger) { | ||
| if (conIL->get_con() == conIR->get_con()) { | ||
| set_req_X(MemNode::ValueIn, shl->in(1), phase); | ||
| return this; | ||
| } | ||
| if (conIL->get_con() > conIR->get_con()) { | ||
| Node* new_shl = phase->transform(new LShiftINode(shl->in(1), phase->intcon(conIL->get_con() - conIR->get_con()))); | ||
| set_req_X(MemNode::ValueIn, new_shl, phase); | ||
| return this; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It could be nice if you explicitly denoted the 3 cases, maybe even using some indentation for emphasis:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It could also be nice to say what you are trying to show / prove here.