Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8283187: C2: loop candidate for superword not always unrolled fully if superword fails #7822

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/hotspot/share/opto/loopnode.cpp
Expand Up @@ -4532,7 +4532,14 @@ void PhaseIdealLoop::build_and_optimize() {
sw.transform_loop(lpt, true);
}
} else if (cl->is_main_loop()) {
sw.transform_loop(lpt, true);
if (!sw.transform_loop(lpt, true)) {
// Instigate more unrolling for optimization when vectorization fails.
if (cl->has_passed_slp()) {
C->set_major_progress();
cl->set_notpassed_slp();
cl->mark_do_unroll_only();
}
}
}
}
}
Expand Down
78 changes: 37 additions & 41 deletions src/hotspot/share/opto/superword.cpp
Expand Up @@ -95,38 +95,38 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
static const bool _do_vector_loop_experimental = false; // Experimental vectorization which uses data from loop unrolling.

//------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
assert(UseSuperWord, "should be");
// SuperWord only works with power of two vector sizes.
int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
if (vector_width < 2 || !is_power_of_2(vector_width)) {
return;
return false;
}

assert(lpt->_head->is_CountedLoop(), "must be");
CountedLoopNode *cl = lpt->_head->as_CountedLoop();

if (!cl->is_valid_counted_loop(T_INT)) return; // skip malformed counted loop
if (!cl->is_valid_counted_loop(T_INT)) return false; // skip malformed counted loop

bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
if (post_loop_allowed) {
if (cl->is_reduction_loop()) return; // no predication mapping
if (cl->is_reduction_loop()) return false; // no predication mapping
Node *limit = cl->limit();
if (limit->is_Con()) return; // non constant limits only
if (limit->is_Con()) return false; // non constant limits only
// Now check the limit for expressions we do not handle
if (limit->is_Add()) {
Node *in2 = limit->in(2);
if (in2->is_Con()) {
int val = in2->get_int();
// should not try to program these cases
if (val < 0) return;
if (val < 0) return false;
}
}
}

// skip any loop that has not been assigned max unroll by analysis
if (do_optimization) {
if (SuperWordLoopUnrollAnalysis && cl->slp_max_unroll() == 0) return;
if (SuperWordLoopUnrollAnalysis && cl->slp_max_unroll() == 0) return false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While at it, you could also add braces for the one liner ifs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be done in the new commit

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While at it, you could also add braces for the one liner ifs.

}

// Check for no control flow in body (other than exit)
Expand All @@ -141,28 +141,28 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
lpt->dump_head();
}
#endif
return;
return false;
}

// Make sure the are no extra control users of the loop backedge
if (cl->back_control()->outcnt() != 1) {
return;
return false;
}

// Skip any loops already optimized by slp
if (cl->is_vectorized_loop()) return;
if (cl->is_vectorized_loop()) return false;

if (cl->is_unroll_only()) return;
if (cl->is_unroll_only()) return false;

if (cl->is_main_loop()) {
// Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
CountedLoopEndNode* pre_end = find_pre_loop_end(cl);
if (pre_end == NULL) {
return;
return false;
}
Node* pre_opaq1 = pre_end->limit();
if (pre_opaq1->Opcode() != Op_Opaque1) {
return;
return false;
}
set_pre_loop_end(pre_end);
}
Expand All @@ -175,9 +175,10 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
// For now, define one block which is the entire loop body
set_bb(cl);

bool success = true;
if (do_optimization) {
assert(_packset.length() == 0, "packset must be empty");
SLP_extract();
success = SLP_extract();
if (PostLoopMultiversioning && Matcher::has_predicated_vectors()) {
if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) {
IdealLoopTree *lpt_next = lpt->_next;
Expand All @@ -192,6 +193,7 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
}
}
}
return success;
}

//------------------------------early unrolling analysis------------------------------
Expand Down Expand Up @@ -451,7 +453,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
// inserting scalar promotion, vector creation from multiple scalars, and
// extraction of scalar values from vectors.
//
void SuperWord::SLP_extract() {
bool SuperWord::SLP_extract() {

#ifndef PRODUCT
if (_do_vector_loop && TraceSuperWord) {
Expand All @@ -466,7 +468,7 @@ void SuperWord::SLP_extract() {
#endif
// Ready the block
if (!construct_bb()) {
return; // Exit if no interesting nodes or complex graph.
return false; // Exit if no interesting nodes or complex graph.
}

// build _dg, _disjoint_ptrs
Expand All @@ -483,7 +485,7 @@ void SuperWord::SLP_extract() {
hoist_loads_in_graph(); // this only rebuild the graph; all basic structs need rebuild explicitly

if (!construct_bb()) {
return; // Exit if no interesting nodes or complex graph.
return false; // Exit if no interesting nodes or complex graph.
}
dependence_graph();
compute_max_depth();
Expand Down Expand Up @@ -511,7 +513,7 @@ void SuperWord::SLP_extract() {
find_adjacent_refs();

if (align_to_ref() == NULL) {
return; // Did not find memory reference to align vectors
return false; // Did not find memory reference to align vectors
}

extend_packlist();
Expand Down Expand Up @@ -563,15 +565,15 @@ void SuperWord::SLP_extract() {
// map base types for vector usage
compute_vector_element_type();
} else {
return;
return false;
}
} else {
// for some reason we could not map the slp analysis state of the vectorized loop
return;
return false;
}
}

output();
return output();
}

//------------------------------find_adjacent_refs---------------------------
Expand Down Expand Up @@ -2385,17 +2387,11 @@ void SuperWord::print_loop(bool whole) {

//------------------------------output---------------------------
// Convert packs into vector node operations
void SuperWord::output() {
bool SuperWord::output() {
CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
Compile* C = _phase->C;
if (_packset.length() == 0) {
if (cl->is_main_loop()) {
// Instigate more unrolling for optimization when vectorization fails.
C->set_major_progress();
cl->set_notpassed_slp();
cl->mark_do_unroll_only();
}
return;
return false;
}

#ifndef PRODUCT
Expand Down Expand Up @@ -2429,7 +2425,7 @@ void SuperWord::output() {

if (do_reserve_copy() && !make_reversable.has_reserved()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");})
return;
return false;
}

for (int i = 0; i < _block.length(); i++) {
Expand Down Expand Up @@ -2474,7 +2470,7 @@ void SuperWord::output() {
if (val == NULL) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})
return; //and reverse to backup IG
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Expand Down Expand Up @@ -2518,7 +2514,7 @@ void SuperWord::output() {
if (in1 == NULL) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})
return; //and reverse to backup IG
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Expand All @@ -2527,7 +2523,7 @@ void SuperWord::output() {
if (in2 == NULL) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be NULL, exiting SuperWord");})
return; //and reverse to backup IG
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Expand Down Expand Up @@ -2569,7 +2565,7 @@ void SuperWord::output() {
} else if (is_cmov_pack(p)) {
if (can_process_post_loop) {
// do not refactor of flow in post loop context
return;
return false;
}
if (!n->is_CMove()) {
continue;
Expand All @@ -2586,7 +2582,7 @@ void SuperWord::output() {
if (!bol->is_Bool()) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})
return; //and reverse to backup IG
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Expand All @@ -2602,15 +2598,15 @@ void SuperWord::output() {
if (src1 == NULL) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be NULL, exiting SuperWord");})
return; //and reverse to backup IG
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue
if (src2 == NULL) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be NULL, exiting SuperWord");})
return; //and reverse to backup IG
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Expand All @@ -2634,7 +2630,7 @@ void SuperWord::output() {
} else {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");})
return; //and reverse to backup IG
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Expand All @@ -2643,7 +2639,7 @@ void SuperWord::output() {
if (vn == NULL) {
if (do_reserve_copy()){
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord");})
return; //and reverse to backup IG
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Expand All @@ -2661,7 +2657,7 @@ void SuperWord::output() {
// first check if the vector size if the maximum vector which we can use on the machine,
// other vector size have reduced values for predicated data mapping.
if (vlen_in_bytes != (uint)MaxVectorSize) {
return;
return false;
}
}

Expand Down Expand Up @@ -2734,7 +2730,7 @@ void SuperWord::output() {
make_reversable.use_new();
}
NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})
return;
return true;
}

//------------------------------vector_opd---------------------------
Expand Down
6 changes: 3 additions & 3 deletions src/hotspot/share/opto/superword.hpp
Expand Up @@ -286,7 +286,7 @@ class SuperWord : public ResourceObj {
public:
SuperWord(PhaseIdealLoop* phase);

void transform_loop(IdealLoopTree* lpt, bool do_optimization);
bool transform_loop(IdealLoopTree* lpt, bool do_optimization);

void unrolling_analysis(int &local_loop_unroll_factor);

Expand Down Expand Up @@ -422,7 +422,7 @@ class SuperWord : public ResourceObj {
// methods

// Extract the superword level parallelism
void SLP_extract();
bool SLP_extract();
// Find the adjacent memory references and create pack pairs for them.
void find_adjacent_refs();
// Tracing support
Expand Down Expand Up @@ -509,7 +509,7 @@ class SuperWord : public ResourceObj {
Node* find_last_mem_state(Node_List* pk, Node* first_mem);

// Convert packs into vector node operations
void output();
bool output();
// Create a vector operand for the nodes in pack p for operand: in(opd_idx)
Node* vector_opd(Node_List* p, int opd_idx);
// Can code be generated for pack p?
Expand Down
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2022, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package compiler.c2.irTests;

import compiler.lib.ir_framework.*;
import sun.hotspot.WhiteBox;

/*
* @test
* @bug 8283187
* @summary C2: loop candidate for superword not always unrolled fully if superword fails
* @library /test/lib /
* @build sun.hotspot.WhiteBox
* @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
* @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI compiler.c2.irTests.TestSuperwordFailsUnrolling
*/

public class TestSuperwordFailsUnrolling {
private static int v = 0;
private final static WhiteBox wb = WhiteBox.getWhiteBox();

public static void main(String[] args) {
Object avx = wb.getVMFlag("UseAVX");
if (avx != null && ((Long)avx) > 2) {
TestFramework.runWithFlags("-XX:UseAVX=2");
}
TestFramework.run();
}

@Test
@IR(applyIf = { "UsePopCountInstruction", "true" }, counts = { IRNode.POPCOUNT_L, "10" })
private static int test(long[] array1, long[] array2) {
v = 0;
for (int i = 0; i < array1.length; i++) {
v += Long.bitCount(array1[i]);
}
return v;
}

@Run(test = "test")
void test_runner() {
long[] array = new long[1000];
test(array, array);
}
}