Skip to content

Commit b3faecf

Browse files
committed
8276116: C2: optimize long range checks in int counted loops
Reviewed-by: kvn
1 parent fe2ae8e commit b3faecf

File tree

8 files changed

+610
-100
lines changed

8 files changed

+610
-100
lines changed

src/hotspot/cpu/x86/x86_32.ad

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13130,6 +13130,24 @@ instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_lo
1313013130
ins_pipe( pipe_cmov_reg_long );
1313113131
%}
1313213132

13133+
instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13134+
match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13135+
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13136+
ins_cost(400);
13137+
expand %{
13138+
cmovLL_reg_LTGE(cmp, flags, dst, src);
13139+
%}
13140+
%}
13141+
13142+
instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13143+
match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13144+
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13145+
ins_cost(500);
13146+
expand %{
13147+
cmovLL_mem_LTGE(cmp, flags, dst, src);
13148+
%}
13149+
%}
13150+
1313313151
// Compare 2 longs and CMOVE ints.
1313413152
instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
1313513153
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));

src/hotspot/share/opto/compile.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3463,7 +3463,7 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
34633463
}
34643464
break;
34653465
case Op_Loop:
3466-
assert(!n->as_Loop()->is_transformed_long_inner_loop() || _loop_opts_cnt == 0, "should have been turned into a counted loop");
3466+
assert(!n->as_Loop()->is_loop_nest_inner_loop() || _loop_opts_cnt == 0, "should have been turned into a counted loop");
34673467
case Op_CountedLoop:
34683468
case Op_LongCountedLoop:
34693469
case Op_OuterStripMinedLoop:

src/hotspot/share/opto/loopTransform.cpp

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,7 @@ void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLo
10651065
// When TRUE, the estimated node budget is also requested.
10661066
//
10671067
// We will actually perform iteration-splitting, a more powerful form of RCE.
1068-
bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional) const {
1068+
bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional, BasicType bt) const {
10691069
if (!provisional && !RangeCheckElimination) return false;
10701070

10711071
// If nodes are depleted, some transform has miscalculated its needs.
@@ -1087,7 +1087,7 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional)
10871087

10881088
BaseCountedLoopNode* cl = _head->as_BaseCountedLoop();
10891089
Node *trip_counter = cl->phi();
1090-
BasicType bt = cl->bt();
1090+
assert(!cl->is_LongCountedLoop() || bt == T_LONG, "only long range checks in long counted loops");
10911091

10921092
// Check loop body for tests of trip-counter plus loop-invariant vs
10931093
// loop-invariant.
@@ -1135,7 +1135,7 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional)
11351135
}
11361136
}
11371137

1138-
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
1138+
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL, bt)) {
11391139
continue;
11401140
}
11411141
}
@@ -1145,7 +1145,9 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional)
11451145
if (is_loop_exit(iff)) {
11461146
// Found valid reason to split iterations (if there is room).
11471147
// NOTE: Usually a gross overestimate.
1148-
return provisional || phase->may_require_nodes(est_loop_clone_sz(2));
1148+
// Long range checks cause the loop to be transformed in a loop nest which only causes a fixed number of nodes
1149+
// to be added
1150+
return provisional || bt == T_LONG || phase->may_require_nodes(est_loop_clone_sz(2));
11491151
}
11501152
} // End of is IF
11511153
}
@@ -2508,34 +2510,52 @@ void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* off
25082510
}
25092511
}
25102512

2513+
bool PhaseIdealLoop::is_iv(Node* exp, Node* iv, BasicType bt) {
2514+
if (exp == iv) {
2515+
return true;
2516+
}
2517+
2518+
if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L && exp->in(1) == iv) {
2519+
return true;
2520+
}
2521+
return false;
2522+
}
2523+
25112524
//------------------------------is_scaled_iv---------------------------------
25122525
// Return true if exp is a constant times an induction var
2513-
bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt) {
2526+
bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt, bool* converted) {
25142527
exp = exp->uncast();
25152528
assert(bt == T_INT || bt == T_LONG, "unexpected int type");
2516-
if (exp == iv) {
2529+
if (is_iv(exp, iv, bt)) {
25172530
if (p_scale != NULL) {
25182531
*p_scale = 1;
25192532
}
25202533
return true;
25212534
}
2535+
if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L) {
2536+
exp = exp->in(1);
2537+
bt = T_INT;
2538+
if (converted != NULL) {
2539+
*converted = true;
2540+
}
2541+
}
25222542
int opc = exp->Opcode();
25232543
// Can't use is_Mul() here as it's true for AndI and AndL
25242544
if (opc == Op_Mul(bt)) {
2525-
if (exp->in(1)->uncast() == iv && exp->in(2)->is_Con()) {
2545+
if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
25262546
if (p_scale != NULL) {
25272547
*p_scale = exp->in(2)->get_integer_as_long(bt);
25282548
}
25292549
return true;
25302550
}
2531-
if (exp->in(2)->uncast() == iv && exp->in(1)->is_Con()) {
2551+
if (is_iv(exp->in(2)->uncast(), iv, bt) && exp->in(1)->is_Con()) {
25322552
if (p_scale != NULL) {
25332553
*p_scale = exp->in(1)->get_integer_as_long(bt);
25342554
}
25352555
return true;
25362556
}
25372557
} else if (opc == Op_LShift(bt)) {
2538-
if (exp->in(1)->uncast() == iv && exp->in(2)->is_Con()) {
2558+
if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
25392559
if (p_scale != NULL) {
25402560
jint shift_amount = exp->in(2)->get_int();
25412561
if (bt == T_INT) {
@@ -2552,9 +2572,9 @@ bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType
25522572

25532573
//-----------------------------is_scaled_iv_plus_offset------------------------------
25542574
// Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
2555-
bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, int depth) {
2575+
bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, bool* converted, int depth) {
25562576
assert(bt == T_INT || bt == T_LONG, "unexpected int type");
2557-
if (is_scaled_iv(exp, iv, p_scale, bt)) {
2577+
if (is_scaled_iv(exp, iv, p_scale, bt, converted)) {
25582578
if (p_offset != NULL) {
25592579
Node *zero = _igvn.integercon(0, bt);
25602580
set_ctrl(zero, C->root());
@@ -2565,13 +2585,13 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scal
25652585
exp = exp->uncast();
25662586
int opc = exp->Opcode();
25672587
if (opc == Op_Add(bt)) {
2568-
if (is_scaled_iv(exp->in(1), iv, p_scale, bt)) {
2588+
if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
25692589
if (p_offset != NULL) {
25702590
*p_offset = exp->in(2);
25712591
}
25722592
return true;
25732593
}
2574-
if (is_scaled_iv(exp->in(2), iv, p_scale, bt)) {
2594+
if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
25752595
if (p_offset != NULL) {
25762596
*p_offset = exp->in(1);
25772597
}
@@ -2581,7 +2601,7 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scal
25812601
Node* offset2 = NULL;
25822602
if (depth < 2 &&
25832603
is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
2584-
p_offset != NULL ? &offset2 : NULL, bt, depth+1)) {
2604+
p_offset != NULL ? &offset2 : NULL, bt, converted, depth+1)) {
25852605
if (p_offset != NULL) {
25862606
Node *ctrl_off2 = get_ctrl(offset2);
25872607
Node* offset = AddNode::make(offset2, exp->in(2), bt);
@@ -2592,7 +2612,7 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scal
25922612
}
25932613
}
25942614
} else if (opc == Op_Sub(bt)) {
2595-
if (is_scaled_iv(exp->in(1), iv, p_scale, bt)) {
2615+
if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
25962616
if (p_offset != NULL) {
25972617
Node *zero = _igvn.integercon(0, bt);
25982618
set_ctrl(zero, C->root());
@@ -2603,7 +2623,7 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scal
26032623
}
26042624
return true;
26052625
}
2606-
if (is_scaled_iv(exp->in(2), iv, p_scale, bt)) {
2626+
if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
26072627
if (p_offset != NULL) {
26082628
// We can't handle a scale of min_jint (or min_jlong) here as -1 * min_jint = min_jint
26092629
if (*p_scale == min_signed_integer(bt)) {
@@ -3432,6 +3452,8 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
34323452
} else if (policy_unswitching(phase)) {
34333453
phase->do_unswitching(this, old_new);
34343454
return false; // need to recalculate idom data
3455+
} else if (_head->is_LongCountedLoop()) {
3456+
phase->create_loop_nest(this, old_new);
34353457
}
34363458
return true;
34373459
}
@@ -3475,7 +3497,8 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
34753497
// unrolling), plus any needed for RCE purposes.
34763498

34773499
bool should_unroll = policy_unroll(phase);
3478-
bool should_rce = policy_range_check(phase, false);
3500+
bool should_rce = policy_range_check(phase, false, T_INT);
3501+
bool should_rce_long = policy_range_check(phase, false, T_LONG);
34793502

34803503
// If not RCE'ing (iteration splitting), then we do not need a pre-loop.
34813504
// We may still need to peel an initial iteration but we will not
@@ -3490,6 +3513,9 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
34903513
// peeling.
34913514
if (should_rce || should_unroll) {
34923515
if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
3516+
if (should_rce_long && phase->create_loop_nest(this, old_new)) {
3517+
return true;
3518+
}
34933519
uint estimate = est_loop_clone_sz(3);
34943520
if (!phase->may_require_nodes(estimate)) {
34953521
return false;
@@ -3531,6 +3557,9 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
35313557
phase->do_peeling(this, old_new);
35323558
}
35333559
}
3560+
if (should_rce_long) {
3561+
phase->create_loop_nest(this, old_new);
3562+
}
35343563
}
35353564
return true;
35363565
}

0 commit comments

Comments
 (0)