Skip to content

Commit ef101f1

Browse files
committed
8332920: C2: Partial Peeling is wrongly applied for CmpU with negative limit
Reviewed-by: kvn, thartmann, epeter
1 parent 2843745 commit ef101f1

File tree

2 files changed

+493
-38
lines changed

2 files changed

+493
-38
lines changed

src/hotspot/share/opto/loopopts.cpp

Lines changed: 166 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2987,52 +2987,101 @@ RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
29872987
return reg;
29882988
}
29892989

2990-
//------------------------------ insert_cmpi_loop_exit -------------------------------------
2991-
// Clone a signed compare loop exit from an unsigned compare and
2992-
// insert it before the unsigned cmp on the stay-in-loop path.
2993-
// All new nodes inserted in the dominator tree between the original
2994-
// if and it's projections. The original if test is replaced with
2995-
// a constant to force the stay-in-loop path.
2990+
// Idea
2991+
// ----
2992+
// Partial Peeling tries to rotate the loop in such a way that it can later be turned into a counted loop. Counted loops
2993+
// require a signed loop exit test. When calling this method, we've only found a suitable unsigned test to partial peel
2994+
// with. Therefore, we try to split off a signed loop exit test from the unsigned test such that it can be used as new
2995+
// loop exit while keeping the unsigned test unchanged and preserving the same behavior as if we've used the unsigned
2996+
// test alone instead:
29962997
//
2997-
// This is done to make sure that the original if and it's projections
2998-
// still dominate the same set of control nodes, that the ctrl() relation
2999-
// from data nodes to them is preserved, and that their loop nesting is
3000-
// preserved.
2998+
// Before Partial Peeling:
2999+
// Loop:
3000+
// <peeled section>
3001+
// Split off signed loop exit test
3002+
// <-- CUT HERE -->
3003+
// Unchanged unsigned loop exit test
3004+
// <rest of unpeeled section>
3005+
// goto Loop
30013006
//
3002-
// before
3003-
// if(i <u limit) unsigned compare loop exit
3007+
// After Partial Peeling:
3008+
// <cloned peeled section>
3009+
// Cloned split off signed loop exit test
3010+
// Loop:
3011+
// Unchanged unsigned loop exit test
3012+
// <rest of unpeeled section>
3013+
// <peeled section>
3014+
// Split off signed loop exit test
3015+
// goto Loop
3016+
//
3017+
// Details
3018+
// -------
3019+
// Before:
3020+
// if (i <u limit) Unsigned loop exit condition
30043021
// / |
30053022
// v v
30063023
// exit-proj stay-in-loop-proj
30073024
//
3008-
// after
3009-
// if(stay-in-loop-const) original if
3010-
// / |
3011-
// / v
3012-
// / if(i < limit) new signed test
3025+
// Split off a signed loop exit test (i.e. with CmpI) from an unsigned loop exit test (i.e. with CmpU) and insert it
3026+
// before the CmpU on the stay-in-loop path and keep both tests:
3027+
//
3028+
// if (i <u limit) Signed loop exit test
3029+
// / |
3030+
// / if (i <u limit) Unsigned loop exit test
30133031
// / / |
3014-
// / / v
3015-
// / / if(i <u limit) new cloned unsigned test
3016-
// / / / |
3017-
// v v v |
3018-
// region |
3019-
// | |
3020-
// dum-if |
3021-
// / | |
3022-
// ether | |
3023-
// v v
3032+
// v v v
3033+
// exit-region stay-in-loop-proj
3034+
//
3035+
// Implementation
3036+
// --------------
3037+
// We need to make sure that the new signed loop exit test is properly inserted into the graph such that the unsigned
3038+
// loop exit test still dominates the same set of control nodes, the ctrl() relation from data nodes to both loop
3039+
// exit tests is preserved, and their loop nesting is correct.
3040+
//
3041+
// To achieve that, we clone the unsigned loop exit test completely (leave it unchanged), insert the signed loop exit
3042+
// test above it and kill the original unsigned loop exit test by setting it's condition to a constant
3043+
// (i.e. stay-in-loop-const in graph below) such that IGVN can fold it later:
3044+
//
3045+
// if (stay-in-loop-const) Killed original unsigned loop exit test
3046+
// / |
3047+
// / v
3048+
// / if (i < limit) Split off signed loop exit test
3049+
// / / |
3050+
// / / v
3051+
// / / if (i <u limit) Cloned unsigned loop exit test
3052+
// / / / |
3053+
// v v v |
3054+
// exit-region |
3055+
// | |
3056+
// dummy-if |
3057+
// / | |
3058+
// dead | |
3059+
// v v
30243060
// exit-proj stay-in-loop-proj
30253061
//
3026-
IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
3062+
// Note: The dummy-if is inserted to create a region to merge the loop exits between the original to be killed unsigned
3063+
// loop exit test and its exit projection while keeping the exit projection (also see insert_region_before_proj()).
3064+
//
3065+
// Requirements
3066+
// ------------
3067+
// Note that we can only split off a signed loop exit test from the unsigned loop exit test when the behavior is exactly
3068+
// the same as before with only a single unsigned test. This is only possible if certain requirements are met.
3069+
// Otherwise, we need to bail out (see comments in the code below).
3070+
IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree* loop) {
30273071
const bool Signed = true;
30283072
const bool Unsigned = false;
30293073

30303074
BoolNode* bol = if_cmpu->in(1)->as_Bool();
3031-
if (bol->_test._test != BoolTest::lt) return nullptr;
3075+
if (bol->_test._test != BoolTest::lt) {
3076+
return nullptr;
3077+
}
30323078
CmpNode* cmpu = bol->in(1)->as_Cmp();
3033-
if (cmpu->Opcode() != Op_CmpU) return nullptr;
3079+
assert(cmpu->Opcode() == Op_CmpU, "must be unsigned comparison");
3080+
30343081
int stride = stride_of_possible_iv(if_cmpu);
3035-
if (stride == 0) return nullptr;
3082+
if (stride == 0) {
3083+
return nullptr;
3084+
}
30363085

30373086
Node* lp_proj = stay_in_loop(if_cmpu, loop);
30383087
guarantee(lp_proj != nullptr, "null loop node");
@@ -3044,22 +3093,101 @@ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *lo
30443093
// We therefore can't add a single exit condition.
30453094
return nullptr;
30463095
}
3047-
// The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit).
3048-
// Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0.
3049-
Node* limit = nullptr;
3096+
// The unsigned loop exit condition is
3097+
// !(i <u limit)
3098+
// = i >=u limit
3099+
//
3100+
// First, we note that for any x for which
3101+
// 0 <= x <= INT_MAX
3102+
// we can convert x to an unsigned int and still get the same guarantee:
3103+
// 0 <= (uint) x <= INT_MAX = (uint) INT_MAX
3104+
// 0 <=u (uint) x <=u INT_MAX = (uint) INT_MAX (LEMMA)
3105+
//
3106+
// With that in mind, if
3107+
// limit >= 0 (COND)
3108+
// then the unsigned loop exit condition
3109+
// i >=u limit (ULE)
3110+
// is equivalent to
3111+
// i < 0 || i >= limit (SLE-full)
3112+
// because either i is negative and therefore always greater than MAX_INT when converting to unsigned
3113+
// (uint) i >=u MAX_INT >= limit >= 0
3114+
// or otherwise
3115+
// i >= limit >= 0
3116+
// holds due to (LEMMA).
3117+
//
3118+
// For completeness, a counterexample with limit < 0:
3119+
// Assume i = -3 and limit = -2:
3120+
// i < 0
3121+
// -2 < 0
3122+
// is true and thus also "i < 0 || i >= limit". But
3123+
// i >=u limit
3124+
// -3 >=u -2
3125+
// is false.
3126+
Node* limit = cmpu->in(2);
3127+
const TypeInt* type_limit = _igvn.type(limit)->is_int();
3128+
if (type_limit->_lo < 0) {
3129+
return nullptr;
3130+
}
3131+
3132+
// We prove below that we can extract a single signed loop exit condition from (SLE-full), depending on the stride:
3133+
// stride < 0:
3134+
// i < 0 (SLE = SLE-negative)
3135+
// stride > 0:
3136+
// i >= limit (SLE = SLE-positive)
3137+
// such that we have the following graph before Partial Peeling with stride > 0 (similar for stride < 0):
3138+
//
3139+
// Loop:
3140+
// <peeled section>
3141+
// i >= limit (SLE-positive)
3142+
// <-- CUT HERE -->
3143+
// i >=u limit (ULE)
3144+
// <rest of unpeeled section>
3145+
// goto Loop
3146+
//
3147+
// We exit the loop if:
3148+
// (SLE) is true OR (ULE) is true
3149+
// However, if (SLE) is true then (ULE) also needs to be true to ensure the exact same behavior. Otherwise, we wrongly
3150+
// exit a loop that should not have been exited if we did not apply Partial Peeling. More formally, we need to ensure:
3151+
// (SLE) IMPLIES (ULE)
3152+
// This indeed holds when (COND) is given:
3153+
// - stride > 0:
3154+
// i >= limit // (SLE = SLE-positive)
3155+
// i >= limit >= 0 // (COND)
3156+
// i >=u limit >= 0 // (LEMMA)
3157+
// which is the unsigned loop exit condition (ULE).
3158+
// - stride < 0:
3159+
// i < 0 // (SLE = SLE-negative)
3160+
// (uint) i >u MAX_INT // (NEG) all negative values are greater than MAX_INT when converted to unsigned
3161+
// MAX_INT >= limit >= 0 // (COND)
3162+
// MAX_INT >=u limit >= 0 // (LEMMA)
3163+
// and thus from (NEG) and (LEMMA):
3164+
// i >=u limit
3165+
// which is the unsigned loop exit condition (ULE).
3166+
//
3167+
//
3168+
// After Partial Peeling, we have the following structure for stride > 0 (similar for stride < 0):
3169+
// <cloned peeled section>
3170+
// i >= limit (SLE-positive)
3171+
// Loop:
3172+
// i >=u limit (ULE)
3173+
// <rest of unpeeled section>
3174+
// <peeled section>
3175+
// i >= limit (SLE-positive)
3176+
// goto Loop
3177+
Node* rhs_cmpi;
30503178
if (stride > 0) {
3051-
limit = cmpu->in(2);
3179+
rhs_cmpi = limit; // For i >= limit
30523180
} else {
3053-
limit = _igvn.makecon(TypeInt::ZERO);
3054-
set_ctrl(limit, C->root());
3181+
rhs_cmpi = _igvn.makecon(TypeInt::ZERO); // For i < 0
3182+
set_ctrl(rhs_cmpi, C->root());
30553183
}
30563184
// Create a new region on the exit path
30573185
RegionNode* reg = insert_region_before_proj(lp_exit);
30583186
guarantee(reg != nullptr, "null region node");
30593187

30603188
// Clone the if-cmpu-true-false using a signed compare
30613189
BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
3062-
ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
3190+
ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, rhs_cmpi, lp_continue);
30633191
reg->add_req(cmpi_exit);
30643192

30653193
// Clone the if-cmpu-true-false

0 commit comments

Comments
 (0)