Skip to content

Commit 2a8ae2f

Browse files
committed
8300256: C2: vectorization is sometimes skipped on loops where it would succeed
Reviewed-by: kvn, thartmann
1 parent ef0d0a7 commit 2a8ae2f

File tree

5 files changed

+115
-72
lines changed

5 files changed

+115
-72
lines changed

src/hotspot/share/opto/loopTransform.cpp

+16-45
Original file line numberDiff line numberDiff line change
@@ -1042,7 +1042,7 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
10421042
}
10431043

10441044
// Only attempt slp analysis when user controls do not prohibit it
1045-
if (!cl->range_checks_present() && (LoopMaxUnroll > _local_loop_unroll_factor)) {
1045+
if (!range_checks_present() && (LoopMaxUnroll > _local_loop_unroll_factor)) {
10461046
// Once policy_slp_analysis succeeds, mark the loop with the
10471047
// maximal unroll factor so that we minimize analysis passes
10481048
if (future_unroll_cnt >= _local_loop_unroll_factor) {
@@ -1916,7 +1916,7 @@ void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List
19161916
CountedLoopNode *cl = loop->_head->as_CountedLoop();
19171917

19181918
// only process RCE'd main loops
1919-
if (!cl->is_main_loop() || cl->range_checks_present()) return;
1919+
if (!cl->is_main_loop() || loop->range_checks_present()) return;
19201920

19211921
#ifndef PRODUCT
19221922
if (TraceLoopOpts) {
@@ -3003,7 +3003,7 @@ Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoop
30033003

30043004
//------------------------------do_range_check---------------------------------
30053005
// Eliminate range-checks and other trip-counter vs loop-invariant tests.
3006-
int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
3006+
void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30073007
#ifndef PRODUCT
30083008
if (PrintOpto && VerifyLoopOptimizations) {
30093009
tty->print("Range Check Elimination ");
@@ -3016,12 +3016,10 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30163016

30173017
assert(RangeCheckElimination, "");
30183018
CountedLoopNode *cl = loop->_head->as_CountedLoop();
3019-
// If we fail before trying to eliminate range checks, set multiversion state
3020-
int closed_range_checks = 1;
30213019

30223020
// protect against stride not being a constant
30233021
if (!cl->stride_is_con()) {
3024-
return closed_range_checks;
3022+
return;
30253023
}
30263024
// Find the trip counter; we are iteration splitting based on it
30273025
Node *trip_counter = cl->phi();
@@ -3033,7 +3031,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30333031
// Opaque1 node is optimized away and then another round
30343032
// of loop opts attempted.
30353033
if (cl->is_canonical_loop_entry() == NULL) {
3036-
return closed_range_checks;
3034+
return;
30373035
}
30383036

30393037
// Need to find the main-loop zero-trip guard
@@ -3047,7 +3045,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30473045
Node *p_f = iffm->in(0);
30483046
// pre loop may have been optimized out
30493047
if (p_f->Opcode() != Op_IfFalse) {
3050-
return closed_range_checks;
3048+
return;
30513049
}
30523050
CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
30533051
assert(pre_end->loopnode()->is_pre_loop(), "");
@@ -3056,7 +3054,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30563054
// optimized away and then another round of loop opts attempted.
30573055
// We can not optimize this particular loop in that case.
30583056
if (pre_opaq1->Opcode() != Op_Opaque1) {
3059-
return closed_range_checks;
3057+
return;
30603058
}
30613059
Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
30623060
Node *pre_limit = pre_opaq->in(1);
@@ -3068,7 +3066,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30683066
// pre-loop Opaque1 node.
30693067
Node *orig_limit = pre_opaq->original_loop_limit();
30703068
if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP) {
3071-
return closed_range_checks;
3069+
return;
30723070
}
30733071
// Must know if its a count-up or count-down loop
30743072

@@ -3081,10 +3079,6 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30813079
set_ctrl(one, C->root());
30823080
set_ctrl(mini, C->root());
30833081

3084-
// Count number of range checks and reduce by load range limits, if zero,
3085-
// the loop is in canonical form to multiversion.
3086-
closed_range_checks = 0;
3087-
30883082
Node* predicate_proj = cl->skip_strip_mined()->in(LoopNode::EntryControl);
30893083
assert(predicate_proj->is_Proj() && predicate_proj->in(0)->is_If(), "if projection only");
30903084

@@ -3095,7 +3089,6 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30953089
iff->Opcode() == Op_RangeCheck) { // Test?
30963090
// Test is an IfNode, has 2 projections. If BOTH are in the loop
30973091
// we need loop unswitching instead of iteration splitting.
3098-
closed_range_checks++;
30993092
Node *exit = loop->is_loop_exit(iff);
31003093
if (!exit) continue;
31013094
int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
@@ -3264,9 +3257,6 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
32643257
--imax;
32653258
}
32663259
}
3267-
if (int_limit->Opcode() == Op_LoadRange) {
3268-
closed_range_checks--;
3269-
}
32703260
} // End of is IF
32713261
}
32723262
if (predicate_proj != cl->skip_strip_mined()->in(LoopNode::EntryControl)) {
@@ -3316,32 +3306,19 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
33163306
assert(opqzm->outcnt() == 1, "cannot hack shared node");
33173307
_igvn.replace_input_of(opqzm, 1, main_limit);
33183308

3319-
return closed_range_checks;
3309+
return;
33203310
}
33213311

3322-
//------------------------------has_range_checks-------------------------------
3323-
// Check to see if RCE cleaned the current loop of range-checks.
3324-
void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
3325-
assert(RangeCheckElimination, "");
3326-
3327-
// skip if not a counted loop
3328-
if (!loop->is_counted()) return;
3329-
3330-
CountedLoopNode *cl = loop->_head->as_CountedLoop();
3331-
3332-
// skip this loop if it is already checked
3333-
if (cl->has_been_range_checked()) return;
3334-
3335-
// Now check for existence of range checks
3336-
for (uint i = 0; i < loop->_body.size(); i++) {
3337-
Node *iff = loop->_body[i];
3312+
bool IdealLoopTree::compute_has_range_checks() const {
3313+
assert(_head->is_CountedLoop(), "");
3314+
for (uint i = 0; i < _body.size(); i++) {
3315+
Node *iff = _body[i];
33383316
int iff_opc = iff->Opcode();
33393317
if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
3340-
cl->mark_has_range_checks();
3341-
break;
3318+
return true;
33423319
}
33433320
}
3344-
cl->set_has_been_range_checked();
3321+
return false;
33453322
}
33463323

33473324
//-------------------------multi_version_post_loops----------------------------
@@ -4007,13 +3984,7 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
40073984
// with full checks, but the main-loop with no checks. Remove said checks
40083985
// from the main body.
40093986
if (should_rce) {
4010-
if (phase->do_range_check(this, old_new) != 0) {
4011-
cl->mark_has_range_checks();
4012-
} else {
4013-
cl->clear_has_range_checks();
4014-
}
4015-
} else if (PostLoopMultiversioning) {
4016-
phase->has_range_checks(this);
3987+
phase->do_range_check(this, old_new);
40173988
}
40183989

40193990
if (should_unroll && !should_peel && PostLoopMultiversioning &&

src/hotspot/share/opto/loopnode.cpp

+4-5
Original file line numberDiff line numberDiff line change
@@ -3942,7 +3942,7 @@ uint IdealLoopTree::est_loop_flow_merge_sz() const {
39423942
#ifndef PRODUCT
39433943
//------------------------------dump_head--------------------------------------
39443944
// Dump 1 liner for loop header info
3945-
void IdealLoopTree::dump_head() const {
3945+
void IdealLoopTree::dump_head() {
39463946
tty->sp(2 * _nest);
39473947
tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
39483948
if (_irreducible) tty->print(" IRREDUCIBLE");
@@ -3990,7 +3990,7 @@ void IdealLoopTree::dump_head() const {
39903990
if (cl->is_post_loop()) tty->print(" post");
39913991
if (cl->is_reduction_loop()) tty->print(" reduction");
39923992
if (cl->is_vectorized_loop()) tty->print(" vector");
3993-
if (cl->range_checks_present()) tty->print(" rc ");
3993+
if (range_checks_present()) tty->print(" rc ");
39943994
if (cl->is_multiversioned()) tty->print(" multi ");
39953995
}
39963996
if (_has_call) tty->print(" has_call");
@@ -4013,7 +4013,7 @@ void IdealLoopTree::dump_head() const {
40134013

40144014
//------------------------------dump-------------------------------------------
40154015
// Dump loops by loop tree
4016-
void IdealLoopTree::dump() const {
4016+
void IdealLoopTree::dump() {
40174017
dump_head();
40184018
if (_child) _child->dump();
40194019
if (_next) _next ->dump();
@@ -4600,8 +4600,7 @@ void PhaseIdealLoop::build_and_optimize() {
46004600
IdealLoopTree *lpt_next = lpt->_next;
46014601
if (lpt_next && lpt_next->is_counted()) {
46024602
CountedLoopNode *cl = lpt_next->_head->as_CountedLoop();
4603-
has_range_checks(lpt_next);
4604-
if (cl->is_post_loop() && cl->range_checks_present()) {
4603+
if (cl->is_post_loop() && lpt_next->range_checks_present()) {
46054604
if (!cl->is_multiversioned()) {
46064605
if (multi_version_post_loops(lpt, lpt_next) == false) {
46074606
// Cause the rce loop to be optimized away if we fail

src/hotspot/share/opto/loopnode.hpp

+25-20
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,16 @@ class LoopNode : public RegionNode {
7272
DoUnrollOnly = 1<<10,
7373
VectorizedLoop = 1<<11,
7474
HasAtomicPostLoop = 1<<12,
75-
HasRangeChecks = 1<<13,
76-
IsMultiversioned = 1<<14,
77-
StripMined = 1<<15,
78-
SubwordLoop = 1<<16,
79-
ProfileTripFailed = 1<<17,
80-
LoopNestInnerLoop = 1 << 18,
81-
LoopNestLongOuterLoop = 1 << 19};
75+
IsMultiversioned = 1<<13,
76+
StripMined = 1<<14,
77+
SubwordLoop = 1<<15,
78+
ProfileTripFailed = 1<<16,
79+
LoopNestInnerLoop = 1 << 17,
80+
LoopNestLongOuterLoop = 1 << 18};
8281
char _unswitch_count;
8382
enum { _unswitch_max=3 };
8483
char _postloop_flags;
85-
enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
84+
enum { RCEPostLoop = 1 };
8685

8786
// Expected trip count from profile data
8887
float _profile_trip_cnt;
@@ -94,7 +93,6 @@ class LoopNode : public RegionNode {
9493
bool is_inner_loop() const { return _loop_flags & InnerLoop; }
9594
void set_inner_loop() { _loop_flags |= InnerLoop; }
9695

97-
bool range_checks_present() const { return _loop_flags & HasRangeChecks; }
9896
bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
9997
bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
10098
bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
@@ -113,8 +111,6 @@ class LoopNode : public RegionNode {
113111
void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
114112
void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
115113
void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
116-
void mark_has_range_checks() { _loop_flags |= HasRangeChecks; }
117-
void clear_has_range_checks() { _loop_flags &= ~HasRangeChecks; }
118114
void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
119115
void mark_strip_mined() { _loop_flags |= StripMined; }
120116
void clear_strip_mined() { _loop_flags &= ~StripMined; }
@@ -126,8 +122,6 @@ class LoopNode : public RegionNode {
126122
int unswitch_max() { return _unswitch_max; }
127123
int unswitch_count() { return _unswitch_count; }
128124

129-
int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
130-
void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
131125
int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
132126
void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
133127

@@ -621,7 +615,9 @@ class IdealLoopTree : public ResourceObj {
621615
uint8_t _irreducible:1, // True if irreducible
622616
_has_call:1, // True if has call safepoint
623617
_has_sfpt:1, // True if has non-call safepoint
624-
_rce_candidate:1; // True if candidate for range check elimination
618+
_rce_candidate:1, // True if candidate for range check elimination
619+
_has_range_checks:1,
620+
_has_range_checks_computed:1;
625621

626622
Node_List* _safepts; // List of safepoints in this loop
627623
Node_List* _required_safept; // A inner loop cannot delete these safepts;
@@ -633,6 +629,7 @@ class IdealLoopTree : public ResourceObj {
633629
_phase(phase),
634630
_local_loop_unroll_limit(0), _local_loop_unroll_factor(0),
635631
_nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0),
632+
_has_range_checks(0), _has_range_checks_computed(0),
636633
_safepts(NULL),
637634
_required_safept(NULL),
638635
_allow_optimizations(true)
@@ -780,9 +777,20 @@ class IdealLoopTree : public ResourceObj {
780777

781778
void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
782779

780+
bool compute_has_range_checks() const;
781+
bool range_checks_present() {
782+
if (!_has_range_checks_computed) {
783+
if (compute_has_range_checks()) {
784+
_has_range_checks = 1;
785+
}
786+
_has_range_checks_computed = 1;
787+
}
788+
return _has_range_checks;
789+
}
790+
783791
#ifndef PRODUCT
784-
void dump_head() const; // Dump loop head only
785-
void dump() const; // Dump this loop recursively
792+
void dump_head(); // Dump loop head only
793+
void dump(); // Dump this loop recursively
786794
void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
787795
#endif
788796

@@ -1424,10 +1432,7 @@ class PhaseIdealLoop : public PhaseTransform {
14241432
}
14251433

14261434
// Eliminate range-checks and other trip-counter vs loop-invariant tests.
1427-
int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1428-
1429-
// Check to see if do_range_check(...) cleaned the main loop of range-checks
1430-
void has_range_checks(IdealLoopTree *loop);
1435+
void do_range_check(IdealLoopTree *loop, Node_List &old_new);
14311436

14321437
// Process post loops which have range checks and try to build a multi-version
14331438
// guard to safely determine if we can execute the post loop which was RCE'd.

src/hotspot/share/opto/superword.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,10 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
180180
if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) {
181181
IdealLoopTree *lpt_next = cl->is_strip_mined() ? lpt->_parent->_next : lpt->_next;
182182
CountedLoopNode *cl_next = lpt_next->_head->as_CountedLoop();
183-
_phase->has_range_checks(lpt_next);
184183
// Main loop SLP works well for manually unrolled loops. But post loop
185184
// vectorization doesn't work for these. To bail out the optimization
186185
// earlier, we have range check and loop stride conditions below.
187-
if (cl_next->is_post_loop() && !cl_next->range_checks_present() &&
186+
if (cl_next->is_post_loop() && !lpt_next->range_checks_present() &&
188187
cl_next->stride_is_con() && abs(cl_next->stride_con()) == 1) {
189188
if (!cl_next->is_vectorized_loop()) {
190189
// Propagate some main loop attributes to its corresponding scalar
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package compiler.c2.irTests;
25+
26+
import compiler.lib.ir_framework.*;
27+
import jdk.test.lib.Utils;
28+
import jdk.internal.misc.Unsafe;
29+
import java.util.Objects;
30+
import java.util.Random;
31+
32+
/*
33+
* @test
34+
* @bug 8300256
35+
* @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64")
36+
* @modules java.base/jdk.internal.misc
37+
* @library /test/lib /
38+
* @run driver compiler.c2.irTests.TestVectorizationNotRun
39+
*/
40+
41+
public class TestVectorizationNotRun {
42+
private static final Unsafe UNSAFE = Unsafe.getUnsafe();
43+
44+
public static void main(String[] args) {
45+
TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED");
46+
}
47+
48+
static int size = 1024;
49+
static int sizeBytes = 8 * size;
50+
static byte[] byteArray = new byte[sizeBytes];
51+
static long[] longArray = new long[size];
52+
53+
@Test
54+
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
55+
public static void test(byte[] dest, long[] src) {
56+
for (int i = 0; i < src.length; i++) {
57+
if ((i < 0) || (8 > sizeBytes - i)) {
58+
throw new IndexOutOfBoundsException();
59+
}
60+
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + i * 8, src[i]);
61+
}
62+
}
63+
64+
@Run(test = "test")
65+
public static void test_runner() {
66+
test(byteArray, longArray);
67+
}
68+
69+
}

0 commit comments

Comments
 (0)