Skip to content

Commit afd852c

Browse files
TobiHartmannslowhog
authored andcommitted
8241114: Better range handling
Reviewed-by: kvn, vlivanov, rhalade, ahgross
1 parent acacae5 commit afd852c

File tree

2 files changed

+97
-147
lines changed

2 files changed

+97
-147
lines changed

src/hotspot/share/opto/loopTransform.cpp

Lines changed: 95 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -2300,105 +2300,92 @@ void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
23002300
}
23012301

23022302
//------------------------------adjust_limit-----------------------------------
2303-
// Helper function for add_constraint().
2304-
Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up) {
2305-
// Compute "I :: (limit-offset)/scale"
2306-
Node *con = new SubINode(rc_limit, offset);
2307-
register_new_node(con, pre_ctrl);
2308-
Node *X = new DivINode(0, con, scale);
2309-
register_new_node(X, pre_ctrl);
2310-
2311-
// When the absolute value of scale is greater than one, the integer
2312-
// division may round limit down so add one to the limit.
2313-
if (round_up) {
2314-
X = new AddINode(X, _igvn.intcon(1));
2315-
register_new_node(X, pre_ctrl);
2316-
}
2317-
2318-
// Adjust loop limit
2319-
loop_limit = (stride_con > 0)
2320-
? (Node*)(new MinINode(loop_limit, X))
2321-
: (Node*)(new MaxINode(loop_limit, X));
2322-
register_new_node(loop_limit, pre_ctrl);
2323-
return loop_limit;
2303+
// Helper function that computes new loop limit as (rc_limit-offset)/scale
2304+
Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) {
2305+
Node* sub = new SubLNode(rc_limit, offset);
2306+
register_new_node(sub, pre_ctrl);
2307+
Node* limit = new DivLNode(NULL, sub, scale);
2308+
register_new_node(limit, pre_ctrl);
2309+
2310+
// When the absolute value of scale is greater than one, the division
2311+
// may round limit down/up, so add/sub one to/from the limit.
2312+
if (round) {
2313+
limit = new AddLNode(limit, _igvn.longcon(is_positive_stride ? -1 : 1));
2314+
register_new_node(limit, pre_ctrl);
2315+
}
2316+
2317+
// Clamp the limit to handle integer under-/overflows.
2318+
// When reducing the limit, clamp to [min_jint, old_limit]:
2319+
// MIN(old_limit, MAX(limit, min_jint))
2320+
// When increasing the limit, clamp to [old_limit, max_jint]:
2321+
// MAX(old_limit, MIN(limit, max_jint))
2322+
Node* cmp = new CmpLNode(limit, _igvn.longcon(is_positive_stride ? min_jint : max_jint));
2323+
register_new_node(cmp, pre_ctrl);
2324+
Node* bol = new BoolNode(cmp, is_positive_stride ? BoolTest::lt : BoolTest::gt);
2325+
register_new_node(bol, pre_ctrl);
2326+
limit = new ConvL2INode(limit);
2327+
register_new_node(limit, pre_ctrl);
2328+
limit = new CMoveINode(bol, limit, _igvn.intcon(is_positive_stride ? min_jint : max_jint), TypeInt::INT);
2329+
register_new_node(limit, pre_ctrl);
2330+
2331+
limit = is_positive_stride ? (Node*)(new MinINode(old_limit, limit))
2332+
: (Node*)(new MaxINode(old_limit, limit));
2333+
register_new_node(limit, pre_ctrl);
2334+
return limit;
23242335
}
23252336

23262337
//------------------------------add_constraint---------------------------------
23272338
// Constrain the main loop iterations so the conditions:
2328-
// low_limit <= scale_con * I + offset < upper_limit
2329-
// always holds true. That is, either increase the number of iterations in
2330-
// the pre-loop or the post-loop until the condition holds true in the main
2331-
// loop. Stride, scale, offset and limit are all loop invariant. Further,
2332-
// stride and scale are constants (offset and limit often are).
2333-
void PhaseIdealLoop::add_constraint(int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit) {
2334-
// For positive stride, the pre-loop limit always uses a MAX function
2335-
// and the main loop a MIN function. For negative stride these are
2336-
// reversed.
2337-
2338-
// Also for positive stride*scale the affine function is increasing, so the
2339-
// pre-loop must check for underflow and the post-loop for overflow.
2340-
// Negative stride*scale reverses this; pre-loop checks for overflow and
2341-
// post-loop for underflow.
2342-
2343-
Node *scale = _igvn.intcon(scale_con);
2339+
// low_limit <= scale_con*I + offset < upper_limit
2340+
// always hold true. That is, either increase the number of iterations in the
2341+
// pre-loop or reduce the number of iterations in the main-loop until the condition
2342+
// holds true in the main-loop. Stride, scale, offset and limit are all loop
2343+
// invariant. Further, stride and scale are constants (offset and limit often are).
2344+
void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit) {
2345+
assert(_igvn.type(offset)->isa_long() != NULL && _igvn.type(low_limit)->isa_long() != NULL &&
2346+
_igvn.type(upper_limit)->isa_long() != NULL, "arguments should be long values");
2347+
2348+
// For a positive stride, we need to reduce the main-loop limit and
2349+
// increase the pre-loop limit. This is reversed for a negative stride.
2350+
bool is_positive_stride = (stride_con > 0);
2351+
2352+
// If the absolute scale value is greater one, division in 'adjust_limit' may require
2353+
// rounding. Make sure the ABS method correctly handles min_jint.
2354+
// Only do this for the pre-loop, one less iteration of the main loop doesn't hurt.
2355+
bool round = ABS(scale_con) > 1;
2356+
2357+
Node* scale = _igvn.longcon(scale_con);
23442358
set_ctrl(scale, C->root());
23452359

23462360
if ((stride_con^scale_con) >= 0) { // Use XOR to avoid overflow
2361+
// Positive stride*scale: the affine function is increasing,
2362+
// the pre-loop checks for underflow and the post-loop for overflow.
2363+
23472364
// The overflow limit: scale*I+offset < upper_limit
2348-
// For main-loop compute
2365+
// For the main-loop limit compute:
23492366
// ( if (scale > 0) /* and stride > 0 */
23502367
// I < (upper_limit-offset)/scale
23512368
// else /* scale < 0 and stride < 0 */
23522369
// I > (upper_limit-offset)/scale
23532370
// )
2354-
//
2355-
// (upper_limit-offset) may overflow or underflow.
2356-
// But it is fine since main loop will either have
2357-
// less iterations or will be skipped in such case.
2358-
*main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl, false);
2359-
2360-
// The underflow limit: low_limit <= scale*I+offset.
2361-
// For pre-loop compute
2371+
*main_limit = adjust_limit(is_positive_stride, scale, offset, upper_limit, *main_limit, pre_ctrl, false);
2372+
2373+
// The underflow limit: low_limit <= scale*I+offset
2374+
// For the pre-loop limit compute:
23622375
// NOT(scale*I+offset >= low_limit)
23632376
// scale*I+offset < low_limit
23642377
// ( if (scale > 0) /* and stride > 0 */
23652378
// I < (low_limit-offset)/scale
23662379
// else /* scale < 0 and stride < 0 */
23672380
// I > (low_limit-offset)/scale
23682381
// )
2382+
*pre_limit = adjust_limit(!is_positive_stride, scale, offset, low_limit, *pre_limit, pre_ctrl, round);
2383+
} else {
2384+
// Negative stride*scale: the affine function is decreasing,
2385+
// the pre-loop checks for overflow and the post-loop for underflow.
23692386

2370-
if (low_limit->get_int() == -max_jint) {
2371-
// We need this guard when scale*pre_limit+offset >= limit
2372-
// due to underflow. So we need execute pre-loop until
2373-
// scale*I+offset >= min_int. But (min_int-offset) will
2374-
// underflow when offset > 0 and X will be > original_limit
2375-
// when stride > 0. To avoid it we replace positive offset with 0.
2376-
//
2377-
// Also (min_int+1 == -max_int) is used instead of min_int here
2378-
// to avoid problem with scale == -1 (min_int/(-1) == min_int).
2379-
Node* shift = _igvn.intcon(31);
2380-
set_ctrl(shift, C->root());
2381-
Node* sign = new RShiftINode(offset, shift);
2382-
register_new_node(sign, pre_ctrl);
2383-
offset = new AndINode(offset, sign);
2384-
register_new_node(offset, pre_ctrl);
2385-
} else {
2386-
assert(low_limit->get_int() == 0, "wrong low limit for range check");
2387-
// The only problem we have here when offset == min_int
2388-
// since (0-min_int) == min_int. It may be fine for stride > 0
2389-
// but for stride < 0 X will be < original_limit. To avoid it
2390-
// max(pre_limit, original_limit) is used in do_range_check().
2391-
}
2392-
// Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
2393-
*pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl,
2394-
scale_con > 1 && stride_con > 0);
2395-
2396-
} else { // stride_con*scale_con < 0
2397-
// For negative stride*scale pre-loop checks for overflow and
2398-
// post-loop for underflow.
2399-
//
24002387
// The overflow limit: scale*I+offset < upper_limit
2401-
// For pre-loop compute
2388+
// For the pre-loop limit compute:
24022389
// NOT(scale*I+offset < upper_limit)
24032390
// scale*I+offset >= upper_limit
24042391
// scale*I+offset+1 > upper_limit
@@ -2407,57 +2394,24 @@ void PhaseIdealLoop::add_constraint(int stride_con, int scale_con, Node *offset,
24072394
// else /* scale > 0 and stride < 0 */
24082395
// I > (upper_limit-(offset+1))/scale
24092396
// )
2410-
//
2411-
// (upper_limit-offset-1) may underflow or overflow.
2412-
// To avoid it min(pre_limit, original_limit) is used
2413-
// in do_range_check() for stride > 0 and max() for < 0.
2414-
Node *one = _igvn.intcon(1);
2397+
Node* one = _igvn.longcon(1);
24152398
set_ctrl(one, C->root());
2416-
2417-
Node *plus_one = new AddINode(offset, one);
2399+
Node* plus_one = new AddLNode(offset, one);
24182400
register_new_node(plus_one, pre_ctrl);
2419-
// Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
2420-
*pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl,
2421-
scale_con < -1 && stride_con > 0);
2422-
2423-
if (low_limit->get_int() == -max_jint) {
2424-
// We need this guard when scale*main_limit+offset >= limit
2425-
// due to underflow. So we need execute main-loop while
2426-
// scale*I+offset+1 > min_int. But (min_int-offset-1) will
2427-
// underflow when (offset+1) > 0 and X will be < main_limit
2428-
// when scale < 0 (and stride > 0). To avoid it we replace
2429-
// positive (offset+1) with 0.
2430-
//
2431-
// Also (min_int+1 == -max_int) is used instead of min_int here
2432-
// to avoid problem with scale == -1 (min_int/(-1) == min_int).
2433-
Node* shift = _igvn.intcon(31);
2434-
set_ctrl(shift, C->root());
2435-
Node* sign = new RShiftINode(plus_one, shift);
2436-
register_new_node(sign, pre_ctrl);
2437-
plus_one = new AndINode(plus_one, sign);
2438-
register_new_node(plus_one, pre_ctrl);
2439-
} else {
2440-
assert(low_limit->get_int() == 0, "wrong low limit for range check");
2441-
// The only problem we have here when offset == max_int
2442-
// since (max_int+1) == min_int and (0-min_int) == min_int.
2443-
// But it is fine since main loop will either have
2444-
// less iterations or will be skipped in such case.
2445-
}
2446-
// The underflow limit: low_limit <= scale*I+offset.
2447-
// For main-loop compute
2401+
*pre_limit = adjust_limit(!is_positive_stride, scale, plus_one, upper_limit, *pre_limit, pre_ctrl, round);
2402+
2403+
// The underflow limit: low_limit <= scale*I+offset
2404+
// For the main-loop limit compute:
24482405
// scale*I+offset+1 > low_limit
24492406
// ( if (scale < 0) /* and stride > 0 */
24502407
// I < (low_limit-(offset+1))/scale
24512408
// else /* scale > 0 and stride < 0 */
24522409
// I > (low_limit-(offset+1))/scale
24532410
// )
2454-
2455-
*main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl,
2456-
false);
2411+
*main_limit = adjust_limit(is_positive_stride, scale, plus_one, low_limit, *main_limit, pre_ctrl, false);
24572412
}
24582413
}
24592414

2460-
24612415
//------------------------------is_scaled_iv---------------------------------
24622416
// Return true if exp is a constant times an induction var
24632417
bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
@@ -2654,22 +2608,14 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
26542608
// Must know if its a count-up or count-down loop
26552609

26562610
int stride_con = cl->stride_con();
2657-
Node *zero = _igvn.intcon(0);
2658-
Node *one = _igvn.intcon(1);
2611+
Node* zero = _igvn.longcon(0);
2612+
Node* one = _igvn.longcon(1);
26592613
// Use symmetrical int range [-max_jint,max_jint]
2660-
Node *mini = _igvn.intcon(-max_jint);
2614+
Node* mini = _igvn.longcon(-max_jint);
26612615
set_ctrl(zero, C->root());
26622616
set_ctrl(one, C->root());
26632617
set_ctrl(mini, C->root());
26642618

2665-
// Range checks that do not dominate the loop backedge (ie.
2666-
// conditionally executed) can lengthen the pre loop limit beyond
2667-
// the original loop limit. To prevent this, the pre limit is
2668-
// (for stride > 0) MINed with the original loop limit (MAXed
2669-
// stride < 0) when some range_check (rc) is conditionally
2670-
// executed.
2671-
bool conditional_rc = false;
2672-
26732619
// Count number of range checks and reduce by load range limits, if zero,
26742620
// the loop is in canonical form to multiversion.
26752621
closed_range_checks = 0;
@@ -2757,23 +2703,30 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
27572703
// stride_con and scale_con can be negative which will flip about the
27582704
// sense of the test.
27592705

2706+
// Perform the limit computations in jlong to avoid overflow
2707+
jlong lscale_con = scale_con;
2708+
Node* int_offset = offset;
2709+
offset = new ConvI2LNode(offset);
2710+
register_new_node(offset, pre_ctrl);
2711+
Node* int_limit = limit;
2712+
limit = new ConvI2LNode(limit);
2713+
register_new_node(limit, pre_ctrl);
2714+
27602715
// Adjust pre and main loop limits to guard the correct iteration set
27612716
if (cmp->Opcode() == Op_CmpU) { // Unsigned compare is really 2 tests
27622717
if (b_test._test == BoolTest::lt) { // Range checks always use lt
27632718
// The underflow and overflow limits: 0 <= scale*I+offset < limit
2764-
add_constraint(stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
2765-
// (0-offset)/scale could be outside of loop iterations range.
2766-
conditional_rc = true;
2719+
add_constraint(stride_con, lscale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
27672720
Node* init = cl->init_trip();
27682721
Node* opaque_init = new OpaqueLoopInitNode(C, init);
27692722
register_new_node(opaque_init, predicate_proj);
27702723

27712724
// predicate on first value of first iteration
2772-
predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, init);
2725+
predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, int_offset, int_limit, stride_con, init);
27732726
assert(!skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
27742727

27752728
// template predicate so it can be updated on next unrolling
2776-
predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, opaque_init);
2729+
predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, int_offset, int_limit, stride_con, opaque_init);
27772730
assert(skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
27782731

27792732
Node* opaque_stride = new OpaqueLoopStrideNode(C, cl->stride());
@@ -2782,7 +2735,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
27822735
register_new_node(max_value, predicate_proj);
27832736
max_value = new AddINode(opaque_init, max_value);
27842737
register_new_node(max_value, predicate_proj);
2785-
predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, max_value);
2738+
predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, int_offset, int_limit, stride_con, max_value);
27862739
assert(skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
27872740

27882741
} else {
@@ -2797,28 +2750,24 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
27972750
// Fall into GE case
27982751
case BoolTest::ge:
27992752
// Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2800-
scale_con = -scale_con;
2801-
offset = new SubINode(zero, offset);
2753+
lscale_con = -lscale_con;
2754+
offset = new SubLNode(zero, offset);
28022755
register_new_node(offset, pre_ctrl);
2803-
limit = new SubINode(zero, limit);
2756+
limit = new SubLNode(zero, limit);
28042757
register_new_node(limit, pre_ctrl);
28052758
// Fall into LE case
28062759
case BoolTest::le:
28072760
if (b_test._test != BoolTest::gt) {
28082761
// Convert X <= Y to X < Y+1
2809-
limit = new AddINode(limit, one);
2762+
limit = new AddLNode(limit, one);
28102763
register_new_node(limit, pre_ctrl);
28112764
}
28122765
// Fall into LT case
28132766
case BoolTest::lt:
28142767
// The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
28152768
// Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
28162769
// to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2817-
add_constraint(stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
2818-
// ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2819-
// Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2820-
// still be outside of loop range.
2821-
conditional_rc = true;
2770+
add_constraint(stride_con, lscale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
28222771
break;
28232772
default:
28242773
if (PrintOpto) {
@@ -2847,7 +2796,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
28472796
--imax;
28482797
}
28492798
}
2850-
if (limit->Opcode() == Op_LoadRange) {
2799+
if (int_limit->Opcode() == Op_LoadRange) {
28512800
closed_range_checks--;
28522801
}
28532802
} // End of is IF
@@ -2858,7 +2807,8 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
28582807
}
28592808

28602809
// Update loop limits
2861-
if (conditional_rc) {
2810+
if (pre_limit != orig_limit) {
2811+
// Computed pre-loop limit can be outside of loop iterations range.
28622812
pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
28632813
: (Node*)new MaxINode(pre_limit, orig_limit);
28642814
register_new_node(pre_limit, pre_ctrl);

src/hotspot/share/opto/loopnode.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,9 +1256,9 @@ class PhaseIdealLoop : public PhaseTransform {
12561256
// always holds true. That is, either increase the number of iterations in
12571257
// the pre-loop or the post-loop until the condition holds true in the main
12581258
// loop. Scale_con, offset and limit are all loop invariant.
1259-
void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit );
1259+
void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
12601260
// Helper function for add_constraint().
1261-
Node* adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up);
1261+
Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
12621262

12631263
// Partially peel loop up through last_peel node.
12641264
bool partial_peel( IdealLoopTree *loop, Node_List &old_new );

0 commit comments

Comments
 (0)