Skip to content

Commit 7328df4

Browse files
committed
8332920: C2: Partial Peeling is wrongly applied for CmpU with negative limit
Reviewed-by: chagedorn Backport-of: ef101f1bf20f2813f855af4bc4eb317565175208
1 parent ca23974 commit 7328df4

File tree

2 files changed

+498
-38
lines changed

2 files changed

+498
-38
lines changed

src/hotspot/share/opto/loopopts.cpp

Lines changed: 166 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2926,52 +2926,101 @@ RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
29262926
return reg;
29272927
}
29282928

2929-
//------------------------------ insert_cmpi_loop_exit -------------------------------------
2930-
// Clone a signed compare loop exit from an unsigned compare and
2931-
// insert it before the unsigned cmp on the stay-in-loop path.
2932-
// All new nodes inserted in the dominator tree between the original
2933-
// if and it's projections. The original if test is replaced with
2934-
// a constant to force the stay-in-loop path.
2929+
// Idea
2930+
// ----
2931+
// Partial Peeling tries to rotate the loop in such a way that it can later be turned into a counted loop. Counted loops
2932+
// require a signed loop exit test. When calling this method, we've only found a suitable unsigned test to partial peel
2933+
// with. Therefore, we try to split off a signed loop exit test from the unsigned test such that it can be used as new
2934+
// loop exit while keeping the unsigned test unchanged and preserving the same behavior as if we've used the unsigned
2935+
// test alone instead:
29352936
//
2936-
// This is done to make sure that the original if and it's projections
2937-
// still dominate the same set of control nodes, that the ctrl() relation
2938-
// from data nodes to them is preserved, and that their loop nesting is
2939-
// preserved.
2937+
// Before Partial Peeling:
2938+
// Loop:
2939+
// <peeled section>
2940+
// Split off signed loop exit test
2941+
// <-- CUT HERE -->
2942+
// Unchanged unsigned loop exit test
2943+
// <rest of unpeeled section>
2944+
// goto Loop
29402945
//
2941-
// before
2942-
// if(i <u limit) unsigned compare loop exit
2946+
// After Partial Peeling:
2947+
// <cloned peeled section>
2948+
// Cloned split off signed loop exit test
2949+
// Loop:
2950+
// Unchanged unsigned loop exit test
2951+
// <rest of unpeeled section>
2952+
// <peeled section>
2953+
// Split off signed loop exit test
2954+
// goto Loop
2955+
//
2956+
// Details
2957+
// -------
2958+
// Before:
2959+
// if (i <u limit) Unsigned loop exit condition
29432960
// / |
29442961
// v v
29452962
// exit-proj stay-in-loop-proj
29462963
//
2947-
// after
2948-
// if(stay-in-loop-const) original if
2949-
// / |
2950-
// / v
2951-
// / if(i < limit) new signed test
2964+
// Split off a signed loop exit test (i.e. with CmpI) from an unsigned loop exit test (i.e. with CmpU) and insert it
2965+
// before the CmpU on the stay-in-loop path and keep both tests:
2966+
//
2967+
// if (i <u limit) Signed loop exit test
2968+
// / |
2969+
// / if (i <u limit) Unsigned loop exit test
29522970
// / / |
2953-
// / / v
2954-
// / / if(i <u limit) new cloned unsigned test
2955-
// / / / |
2956-
// v v v |
2957-
// region |
2958-
// | |
2959-
// dum-if |
2960-
// / | |
2961-
// ether | |
2962-
// v v
2971+
// v v v
2972+
// exit-region stay-in-loop-proj
2973+
//
2974+
// Implementation
2975+
// --------------
2976+
// We need to make sure that the new signed loop exit test is properly inserted into the graph such that the unsigned
2977+
// loop exit test still dominates the same set of control nodes, the ctrl() relation from data nodes to both loop
2978+
// exit tests is preserved, and their loop nesting is correct.
2979+
//
2980+
// To achieve that, we clone the unsigned loop exit test completely (leave it unchanged), insert the signed loop exit
2981+
// test above it and kill the original unsigned loop exit test by setting it's condition to a constant
2982+
// (i.e. stay-in-loop-const in graph below) such that IGVN can fold it later:
2983+
//
2984+
// if (stay-in-loop-const) Killed original unsigned loop exit test
2985+
// / |
2986+
// / v
2987+
// / if (i < limit) Split off signed loop exit test
2988+
// / / |
2989+
// / / v
2990+
// / / if (i <u limit) Cloned unsigned loop exit test
2991+
// / / / |
2992+
// v v v |
2993+
// exit-region |
2994+
// | |
2995+
// dummy-if |
2996+
// / | |
2997+
// dead | |
2998+
// v v
29632999
// exit-proj stay-in-loop-proj
29643000
//
2965-
IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
3001+
// Note: The dummy-if is inserted to create a region to merge the loop exits between the original to be killed unsigned
3002+
// loop exit test and its exit projection while keeping the exit projection (also see insert_region_before_proj()).
3003+
//
3004+
// Requirements
3005+
// ------------
3006+
// Note that we can only split off a signed loop exit test from the unsigned loop exit test when the behavior is exactly
3007+
// the same as before with only a single unsigned test. This is only possible if certain requirements are met.
3008+
// Otherwise, we need to bail out (see comments in the code below).
3009+
IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree* loop) {
29663010
const bool Signed = true;
29673011
const bool Unsigned = false;
29683012

29693013
BoolNode* bol = if_cmpu->in(1)->as_Bool();
2970-
if (bol->_test._test != BoolTest::lt) return nullptr;
3014+
if (bol->_test._test != BoolTest::lt) {
3015+
return nullptr;
3016+
}
29713017
CmpNode* cmpu = bol->in(1)->as_Cmp();
2972-
if (cmpu->Opcode() != Op_CmpU) return nullptr;
3018+
assert(cmpu->Opcode() == Op_CmpU, "must be unsigned comparison");
3019+
29733020
int stride = stride_of_possible_iv(if_cmpu);
2974-
if (stride == 0) return nullptr;
3021+
if (stride == 0) {
3022+
return nullptr;
3023+
}
29753024

29763025
Node* lp_proj = stay_in_loop(if_cmpu, loop);
29773026
guarantee(lp_proj != nullptr, "null loop node");
@@ -2983,22 +3032,101 @@ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *lo
29833032
// We therefore can't add a single exit condition.
29843033
return nullptr;
29853034
}
2986-
// The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit).
2987-
// Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0.
2988-
Node* limit = nullptr;
3035+
// The unsigned loop exit condition is
3036+
// !(i <u limit)
3037+
// = i >=u limit
3038+
//
3039+
// First, we note that for any x for which
3040+
// 0 <= x <= INT_MAX
3041+
// we can convert x to an unsigned int and still get the same guarantee:
3042+
// 0 <= (uint) x <= INT_MAX = (uint) INT_MAX
3043+
// 0 <=u (uint) x <=u INT_MAX = (uint) INT_MAX (LEMMA)
3044+
//
3045+
// With that in mind, if
3046+
// limit >= 0 (COND)
3047+
// then the unsigned loop exit condition
3048+
// i >=u limit (ULE)
3049+
// is equivalent to
3050+
// i < 0 || i >= limit (SLE-full)
3051+
// because either i is negative and therefore always greater than MAX_INT when converting to unsigned
3052+
// (uint) i >=u MAX_INT >= limit >= 0
3053+
// or otherwise
3054+
// i >= limit >= 0
3055+
// holds due to (LEMMA).
3056+
//
3057+
// For completeness, a counterexample with limit < 0:
3058+
// Assume i = -3 and limit = -2:
3059+
// i < 0
3060+
// -2 < 0
3061+
// is true and thus also "i < 0 || i >= limit". But
3062+
// i >=u limit
3063+
// -3 >=u -2
3064+
// is false.
3065+
Node* limit = cmpu->in(2);
3066+
const TypeInt* type_limit = _igvn.type(limit)->is_int();
3067+
if (type_limit->_lo < 0) {
3068+
return nullptr;
3069+
}
3070+
3071+
// We prove below that we can extract a single signed loop exit condition from (SLE-full), depending on the stride:
3072+
// stride < 0:
3073+
// i < 0 (SLE = SLE-negative)
3074+
// stride > 0:
3075+
// i >= limit (SLE = SLE-positive)
3076+
// such that we have the following graph before Partial Peeling with stride > 0 (similar for stride < 0):
3077+
//
3078+
// Loop:
3079+
// <peeled section>
3080+
// i >= limit (SLE-positive)
3081+
// <-- CUT HERE -->
3082+
// i >=u limit (ULE)
3083+
// <rest of unpeeled section>
3084+
// goto Loop
3085+
//
3086+
// We exit the loop if:
3087+
// (SLE) is true OR (ULE) is true
3088+
// However, if (SLE) is true then (ULE) also needs to be true to ensure the exact same behavior. Otherwise, we wrongly
3089+
// exit a loop that should not have been exited if we did not apply Partial Peeling. More formally, we need to ensure:
3090+
// (SLE) IMPLIES (ULE)
3091+
// This indeed holds when (COND) is given:
3092+
// - stride > 0:
3093+
// i >= limit // (SLE = SLE-positive)
3094+
// i >= limit >= 0 // (COND)
3095+
// i >=u limit >= 0 // (LEMMA)
3096+
// which is the unsigned loop exit condition (ULE).
3097+
// - stride < 0:
3098+
// i < 0 // (SLE = SLE-negative)
3099+
// (uint) i >u MAX_INT // (NEG) all negative values are greater than MAX_INT when converted to unsigned
3100+
// MAX_INT >= limit >= 0 // (COND)
3101+
// MAX_INT >=u limit >= 0 // (LEMMA)
3102+
// and thus from (NEG) and (LEMMA):
3103+
// i >=u limit
3104+
// which is the unsigned loop exit condition (ULE).
3105+
//
3106+
//
3107+
// After Partial Peeling, we have the following structure for stride > 0 (similar for stride < 0):
3108+
// <cloned peeled section>
3109+
// i >= limit (SLE-positive)
3110+
// Loop:
3111+
// i >=u limit (ULE)
3112+
// <rest of unpeeled section>
3113+
// <peeled section>
3114+
// i >= limit (SLE-positive)
3115+
// goto Loop
3116+
Node* rhs_cmpi;
29893117
if (stride > 0) {
2990-
limit = cmpu->in(2);
3118+
rhs_cmpi = limit; // For i >= limit
29913119
} else {
2992-
limit = _igvn.makecon(TypeInt::ZERO);
2993-
set_ctrl(limit, C->root());
3120+
rhs_cmpi = _igvn.makecon(TypeInt::ZERO); // For i < 0
3121+
set_ctrl(rhs_cmpi, C->root());
29943122
}
29953123
// Create a new region on the exit path
29963124
RegionNode* reg = insert_region_before_proj(lp_exit);
29973125
guarantee(reg != nullptr, "null region node");
29983126

29993127
// Clone the if-cmpu-true-false using a signed compare
30003128
BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
3001-
ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
3129+
ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, rhs_cmpi, lp_continue);
30023130
reg->add_req(cmpi_exit);
30033131

30043132
// Clone the if-cmpu-true-false

0 commit comments

Comments
 (0)