@@ -2300,105 +2300,92 @@ void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
2300
2300
}
2301
2301
2302
2302
// ------------------------------adjust_limit-----------------------------------
2303
- // Helper function for add_constraint().
2304
- Node* PhaseIdealLoop::adjust_limit (int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up) {
2305
- // Compute "I :: (limit-offset)/scale"
2306
- Node *con = new SubINode (rc_limit, offset);
2307
- register_new_node (con, pre_ctrl);
2308
- Node *X = new DivINode (0 , con, scale);
2309
- register_new_node (X, pre_ctrl);
2310
-
2311
- // When the absolute value of scale is greater than one, the integer
2312
- // division may round limit down so add one to the limit.
2313
- if (round_up) {
2314
- X = new AddINode (X, _igvn.intcon (1 ));
2315
- register_new_node (X, pre_ctrl);
2316
- }
2317
-
2318
- // Adjust loop limit
2319
- loop_limit = (stride_con > 0 )
2320
- ? (Node*)(new MinINode (loop_limit, X))
2321
- : (Node*)(new MaxINode (loop_limit, X));
2322
- register_new_node (loop_limit, pre_ctrl);
2323
- return loop_limit;
2303
+ // Helper function that computes new loop limit as (rc_limit-offset)/scale
2304
+ Node* PhaseIdealLoop::adjust_limit (bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) {
2305
+ Node* sub = new SubLNode (rc_limit, offset);
2306
+ register_new_node (sub, pre_ctrl);
2307
+ Node* limit = new DivLNode (NULL , sub, scale);
2308
+ register_new_node (limit, pre_ctrl);
2309
+
2310
+ // When the absolute value of scale is greater than one, the division
2311
+ // may round limit down/up, so add/sub one to/from the limit.
2312
+ if (round) {
2313
+ limit = new AddLNode (limit, _igvn.longcon (is_positive_stride ? -1 : 1 ));
2314
+ register_new_node (limit, pre_ctrl);
2315
+ }
2316
+
2317
+ // Clamp the limit to handle integer under-/overflows.
2318
+ // When reducing the limit, clamp to [min_jint, old_limit]:
2319
+ // MIN(old_limit, MAX(limit, min_jint))
2320
+ // When increasing the limit, clamp to [old_limit, max_jint]:
2321
+ // MAX(old_limit, MIN(limit, max_jint))
2322
+ Node* cmp = new CmpLNode (limit, _igvn.longcon (is_positive_stride ? min_jint : max_jint));
2323
+ register_new_node (cmp, pre_ctrl);
2324
+ Node* bol = new BoolNode (cmp, is_positive_stride ? BoolTest::lt : BoolTest::gt);
2325
+ register_new_node (bol, pre_ctrl);
2326
+ limit = new ConvL2INode (limit);
2327
+ register_new_node (limit, pre_ctrl);
2328
+ limit = new CMoveINode (bol, limit, _igvn.intcon (is_positive_stride ? min_jint : max_jint), TypeInt::INT);
2329
+ register_new_node (limit, pre_ctrl);
2330
+
2331
+ limit = is_positive_stride ? (Node*)(new MinINode (old_limit, limit))
2332
+ : (Node*)(new MaxINode (old_limit, limit));
2333
+ register_new_node (limit, pre_ctrl);
2334
+ return limit;
2324
2335
}
2325
2336
2326
2337
// ------------------------------add_constraint---------------------------------
2327
2338
// Constrain the main loop iterations so the conditions:
2328
- // low_limit <= scale_con * I + offset < upper_limit
2329
- // always holds true. That is, either increase the number of iterations in
2330
- // the pre-loop or the post-loop until the condition holds true in the main
2331
- // loop. Stride, scale, offset and limit are all loop invariant. Further,
2332
- // stride and scale are constants (offset and limit often are).
2333
- void PhaseIdealLoop::add_constraint (int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit) {
2334
- // For positive stride, the pre-loop limit always uses a MAX function
2335
- // and the main loop a MIN function. For negative stride these are
2336
- // reversed.
2337
-
2338
- // Also for positive stride*scale the affine function is increasing, so the
2339
- // pre-loop must check for underflow and the post-loop for overflow.
2340
- // Negative stride*scale reverses this; pre-loop checks for overflow and
2341
- // post-loop for underflow.
2342
-
2343
- Node *scale = _igvn.intcon (scale_con);
2339
+ // low_limit <= scale_con*I + offset < upper_limit
2340
+ // always hold true. That is, either increase the number of iterations in the
2341
+ // pre-loop or reduce the number of iterations in the main-loop until the condition
2342
+ // holds true in the main-loop. Stride, scale, offset and limit are all loop
2343
+ // invariant. Further, stride and scale are constants (offset and limit often are).
2344
+ void PhaseIdealLoop::add_constraint (jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit) {
2345
+ assert (_igvn.type (offset)->isa_long () != NULL && _igvn.type (low_limit)->isa_long () != NULL &&
2346
+ _igvn.type (upper_limit)->isa_long () != NULL , " arguments should be long values" );
2347
+
2348
+ // For a positive stride, we need to reduce the main-loop limit and
2349
+ // increase the pre-loop limit. This is reversed for a negative stride.
2350
+ bool is_positive_stride = (stride_con > 0 );
2351
+
2352
+ // If the absolute scale value is greater one, division in 'adjust_limit' may require
2353
+ // rounding. Make sure the ABS method correctly handles min_jint.
2354
+ // Only do this for the pre-loop, one less iteration of the main loop doesn't hurt.
2355
+ bool round = ABS (scale_con) > 1 ;
2356
+
2357
+ Node* scale = _igvn.longcon (scale_con);
2344
2358
set_ctrl (scale, C->root ());
2345
2359
2346
2360
if ((stride_con^scale_con) >= 0 ) { // Use XOR to avoid overflow
2361
+ // Positive stride*scale: the affine function is increasing,
2362
+ // the pre-loop checks for underflow and the post-loop for overflow.
2363
+
2347
2364
// The overflow limit: scale*I+offset < upper_limit
2348
- // For main-loop compute
2365
+ // For the main-loop limit compute:
2349
2366
// ( if (scale > 0) /* and stride > 0 */
2350
2367
// I < (upper_limit-offset)/scale
2351
2368
// else /* scale < 0 and stride < 0 */
2352
2369
// I > (upper_limit-offset)/scale
2353
2370
// )
2354
- //
2355
- // (upper_limit-offset) may overflow or underflow.
2356
- // But it is fine since main loop will either have
2357
- // less iterations or will be skipped in such case.
2358
- *main_limit = adjust_limit (stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl, false );
2359
-
2360
- // The underflow limit: low_limit <= scale*I+offset.
2361
- // For pre-loop compute
2371
+ *main_limit = adjust_limit (is_positive_stride, scale, offset, upper_limit, *main_limit, pre_ctrl, false );
2372
+
2373
+ // The underflow limit: low_limit <= scale*I+offset
2374
+ // For the pre-loop limit compute:
2362
2375
// NOT(scale*I+offset >= low_limit)
2363
2376
// scale*I+offset < low_limit
2364
2377
// ( if (scale > 0) /* and stride > 0 */
2365
2378
// I < (low_limit-offset)/scale
2366
2379
// else /* scale < 0 and stride < 0 */
2367
2380
// I > (low_limit-offset)/scale
2368
2381
// )
2382
+ *pre_limit = adjust_limit (!is_positive_stride, scale, offset, low_limit, *pre_limit, pre_ctrl, round);
2383
+ } else {
2384
+ // Negative stride*scale: the affine function is decreasing,
2385
+ // the pre-loop checks for overflow and the post-loop for underflow.
2369
2386
2370
- if (low_limit->get_int () == -max_jint) {
2371
- // We need this guard when scale*pre_limit+offset >= limit
2372
- // due to underflow. So we need execute pre-loop until
2373
- // scale*I+offset >= min_int. But (min_int-offset) will
2374
- // underflow when offset > 0 and X will be > original_limit
2375
- // when stride > 0. To avoid it we replace positive offset with 0.
2376
- //
2377
- // Also (min_int+1 == -max_int) is used instead of min_int here
2378
- // to avoid problem with scale == -1 (min_int/(-1) == min_int).
2379
- Node* shift = _igvn.intcon (31 );
2380
- set_ctrl (shift, C->root ());
2381
- Node* sign = new RShiftINode (offset, shift);
2382
- register_new_node (sign, pre_ctrl);
2383
- offset = new AndINode (offset, sign);
2384
- register_new_node (offset, pre_ctrl);
2385
- } else {
2386
- assert (low_limit->get_int () == 0 , " wrong low limit for range check" );
2387
- // The only problem we have here when offset == min_int
2388
- // since (0-min_int) == min_int. It may be fine for stride > 0
2389
- // but for stride < 0 X will be < original_limit. To avoid it
2390
- // max(pre_limit, original_limit) is used in do_range_check().
2391
- }
2392
- // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
2393
- *pre_limit = adjust_limit ((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl,
2394
- scale_con > 1 && stride_con > 0 );
2395
-
2396
- } else { // stride_con*scale_con < 0
2397
- // For negative stride*scale pre-loop checks for overflow and
2398
- // post-loop for underflow.
2399
- //
2400
2387
// The overflow limit: scale*I+offset < upper_limit
2401
- // For pre-loop compute
2388
+ // For the pre-loop limit compute:
2402
2389
// NOT(scale*I+offset < upper_limit)
2403
2390
// scale*I+offset >= upper_limit
2404
2391
// scale*I+offset+1 > upper_limit
@@ -2407,57 +2394,24 @@ void PhaseIdealLoop::add_constraint(int stride_con, int scale_con, Node *offset,
2407
2394
// else /* scale > 0 and stride < 0 */
2408
2395
// I > (upper_limit-(offset+1))/scale
2409
2396
// )
2410
- //
2411
- // (upper_limit-offset-1) may underflow or overflow.
2412
- // To avoid it min(pre_limit, original_limit) is used
2413
- // in do_range_check() for stride > 0 and max() for < 0.
2414
- Node *one = _igvn.intcon (1 );
2397
+ Node* one = _igvn.longcon (1 );
2415
2398
set_ctrl (one, C->root ());
2416
-
2417
- Node *plus_one = new AddINode (offset, one);
2399
+ Node* plus_one = new AddLNode (offset, one);
2418
2400
register_new_node (plus_one, pre_ctrl);
2419
- // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
2420
- *pre_limit = adjust_limit ((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl,
2421
- scale_con < -1 && stride_con > 0 );
2422
-
2423
- if (low_limit->get_int () == -max_jint) {
2424
- // We need this guard when scale*main_limit+offset >= limit
2425
- // due to underflow. So we need execute main-loop while
2426
- // scale*I+offset+1 > min_int. But (min_int-offset-1) will
2427
- // underflow when (offset+1) > 0 and X will be < main_limit
2428
- // when scale < 0 (and stride > 0). To avoid it we replace
2429
- // positive (offset+1) with 0.
2430
- //
2431
- // Also (min_int+1 == -max_int) is used instead of min_int here
2432
- // to avoid problem with scale == -1 (min_int/(-1) == min_int).
2433
- Node* shift = _igvn.intcon (31 );
2434
- set_ctrl (shift, C->root ());
2435
- Node* sign = new RShiftINode (plus_one, shift);
2436
- register_new_node (sign, pre_ctrl);
2437
- plus_one = new AndINode (plus_one, sign);
2438
- register_new_node (plus_one, pre_ctrl);
2439
- } else {
2440
- assert (low_limit->get_int () == 0 , " wrong low limit for range check" );
2441
- // The only problem we have here when offset == max_int
2442
- // since (max_int+1) == min_int and (0-min_int) == min_int.
2443
- // But it is fine since main loop will either have
2444
- // less iterations or will be skipped in such case.
2445
- }
2446
- // The underflow limit: low_limit <= scale*I+offset.
2447
- // For main-loop compute
2401
+ *pre_limit = adjust_limit (!is_positive_stride, scale, plus_one, upper_limit, *pre_limit, pre_ctrl, round);
2402
+
2403
+ // The underflow limit: low_limit <= scale*I+offset
2404
+ // For the main-loop limit compute:
2448
2405
// scale*I+offset+1 > low_limit
2449
2406
// ( if (scale < 0) /* and stride > 0 */
2450
2407
// I < (low_limit-(offset+1))/scale
2451
2408
// else /* scale > 0 and stride < 0 */
2452
2409
// I > (low_limit-(offset+1))/scale
2453
2410
// )
2454
-
2455
- *main_limit = adjust_limit (stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl,
2456
- false );
2411
+ *main_limit = adjust_limit (is_positive_stride, scale, plus_one, low_limit, *main_limit, pre_ctrl, false );
2457
2412
}
2458
2413
}
2459
2414
2460
-
2461
2415
// ------------------------------is_scaled_iv---------------------------------
2462
2416
// Return true if exp is a constant times an induction var
2463
2417
bool PhaseIdealLoop::is_scaled_iv (Node* exp, Node* iv, int * p_scale) {
@@ -2654,22 +2608,14 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
2654
2608
// Must know if its a count-up or count-down loop
2655
2609
2656
2610
int stride_con = cl->stride_con ();
2657
- Node * zero = _igvn.intcon (0 );
2658
- Node * one = _igvn.intcon (1 );
2611
+ Node* zero = _igvn.longcon (0 );
2612
+ Node* one = _igvn.longcon (1 );
2659
2613
// Use symmetrical int range [-max_jint,max_jint]
2660
- Node * mini = _igvn.intcon (-max_jint);
2614
+ Node* mini = _igvn.longcon (-max_jint);
2661
2615
set_ctrl (zero, C->root ());
2662
2616
set_ctrl (one, C->root ());
2663
2617
set_ctrl (mini, C->root ());
2664
2618
2665
- // Range checks that do not dominate the loop backedge (ie.
2666
- // conditionally executed) can lengthen the pre loop limit beyond
2667
- // the original loop limit. To prevent this, the pre limit is
2668
- // (for stride > 0) MINed with the original loop limit (MAXed
2669
- // stride < 0) when some range_check (rc) is conditionally
2670
- // executed.
2671
- bool conditional_rc = false ;
2672
-
2673
2619
// Count number of range checks and reduce by load range limits, if zero,
2674
2620
// the loop is in canonical form to multiversion.
2675
2621
closed_range_checks = 0 ;
@@ -2757,23 +2703,30 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
2757
2703
// stride_con and scale_con can be negative which will flip about the
2758
2704
// sense of the test.
2759
2705
2706
+ // Perform the limit computations in jlong to avoid overflow
2707
+ jlong lscale_con = scale_con;
2708
+ Node* int_offset = offset;
2709
+ offset = new ConvI2LNode (offset);
2710
+ register_new_node (offset, pre_ctrl);
2711
+ Node* int_limit = limit;
2712
+ limit = new ConvI2LNode (limit);
2713
+ register_new_node (limit, pre_ctrl);
2714
+
2760
2715
// Adjust pre and main loop limits to guard the correct iteration set
2761
2716
if (cmp->Opcode () == Op_CmpU) { // Unsigned compare is really 2 tests
2762
2717
if (b_test._test == BoolTest::lt) { // Range checks always use lt
2763
2718
// The underflow and overflow limits: 0 <= scale*I+offset < limit
2764
- add_constraint (stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
2765
- // (0-offset)/scale could be outside of loop iterations range.
2766
- conditional_rc = true ;
2719
+ add_constraint (stride_con, lscale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
2767
2720
Node* init = cl->init_trip ();
2768
2721
Node* opaque_init = new OpaqueLoopInitNode (C, init);
2769
2722
register_new_node (opaque_init, predicate_proj);
2770
2723
2771
2724
// predicate on first value of first iteration
2772
- predicate_proj = add_range_check_predicate (loop, cl, predicate_proj, scale_con, offset, limit , stride_con, init);
2725
+ predicate_proj = add_range_check_predicate (loop, cl, predicate_proj, scale_con, int_offset, int_limit , stride_con, init);
2773
2726
assert (!skeleton_predicate_has_opaque (predicate_proj->in (0 )->as_If ()), " unexpected" );
2774
2727
2775
2728
// template predicate so it can be updated on next unrolling
2776
- predicate_proj = add_range_check_predicate (loop, cl, predicate_proj, scale_con, offset, limit , stride_con, opaque_init);
2729
+ predicate_proj = add_range_check_predicate (loop, cl, predicate_proj, scale_con, int_offset, int_limit , stride_con, opaque_init);
2777
2730
assert (skeleton_predicate_has_opaque (predicate_proj->in (0 )->as_If ()), " unexpected" );
2778
2731
2779
2732
Node* opaque_stride = new OpaqueLoopStrideNode (C, cl->stride ());
@@ -2782,7 +2735,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
2782
2735
register_new_node (max_value, predicate_proj);
2783
2736
max_value = new AddINode (opaque_init, max_value);
2784
2737
register_new_node (max_value, predicate_proj);
2785
- predicate_proj = add_range_check_predicate (loop, cl, predicate_proj, scale_con, offset, limit , stride_con, max_value);
2738
+ predicate_proj = add_range_check_predicate (loop, cl, predicate_proj, scale_con, int_offset, int_limit , stride_con, max_value);
2786
2739
assert (skeleton_predicate_has_opaque (predicate_proj->in (0 )->as_If ()), " unexpected" );
2787
2740
2788
2741
} else {
@@ -2797,28 +2750,24 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
2797
2750
// Fall into GE case
2798
2751
case BoolTest::ge:
2799
2752
// Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2800
- scale_con = -scale_con ;
2801
- offset = new SubINode (zero, offset);
2753
+ lscale_con = -lscale_con ;
2754
+ offset = new SubLNode (zero, offset);
2802
2755
register_new_node (offset, pre_ctrl);
2803
- limit = new SubINode (zero, limit);
2756
+ limit = new SubLNode (zero, limit);
2804
2757
register_new_node (limit, pre_ctrl);
2805
2758
// Fall into LE case
2806
2759
case BoolTest::le:
2807
2760
if (b_test._test != BoolTest::gt) {
2808
2761
// Convert X <= Y to X < Y+1
2809
- limit = new AddINode (limit, one);
2762
+ limit = new AddLNode (limit, one);
2810
2763
register_new_node (limit, pre_ctrl);
2811
2764
}
2812
2765
// Fall into LT case
2813
2766
case BoolTest::lt:
2814
2767
// The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2815
2768
// Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2816
2769
// to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2817
- add_constraint (stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
2818
- // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2819
- // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2820
- // still be outside of loop range.
2821
- conditional_rc = true ;
2770
+ add_constraint (stride_con, lscale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
2822
2771
break ;
2823
2772
default :
2824
2773
if (PrintOpto) {
@@ -2847,7 +2796,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
2847
2796
--imax;
2848
2797
}
2849
2798
}
2850
- if (limit ->Opcode () == Op_LoadRange) {
2799
+ if (int_limit ->Opcode () == Op_LoadRange) {
2851
2800
closed_range_checks--;
2852
2801
}
2853
2802
} // End of is IF
@@ -2858,7 +2807,8 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
2858
2807
}
2859
2808
2860
2809
// Update loop limits
2861
- if (conditional_rc) {
2810
+ if (pre_limit != orig_limit) {
2811
+ // Computed pre-loop limit can be outside of loop iterations range.
2862
2812
pre_limit = (stride_con > 0 ) ? (Node*)new MinINode (pre_limit, orig_limit)
2863
2813
: (Node*)new MaxINode (pre_limit, orig_limit);
2864
2814
register_new_node (pre_limit, pre_ctrl);
0 commit comments