diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h index 5ed0c64ad3f86..b35da47f59cb8 100644 --- a/polly/include/polly/ScheduleTreeTransform.h +++ b/polly/include/polly/ScheduleTreeTransform.h @@ -154,6 +154,39 @@ struct RecursiveScheduleTreeVisitor } }; +/// Recursively visit all nodes of a schedule tree while allowing changes. +/// +/// The visit methods return an isl::schedule_node that is used to continue +/// visiting the tree. Structural changes such as returning a different node +/// will confuse the visitor. +template +struct ScheduleNodeRewriter + : public RecursiveScheduleTreeVisitor { + Derived &getDerived() { return *static_cast(this); } + const Derived &getDerived() const { + return *static_cast(this); + } + + isl::schedule_node visitNode(isl::schedule_node Node, Args... args) { + return getDerived().visitChildren(Node); + } + + isl::schedule_node visitChildren(isl::schedule_node Node, Args... args) { + if (!Node.has_children()) + return Node; + + isl::schedule_node It = Node.first_child(); + while (true) { + It = getDerived().visit(It, std::forward(args)...); + if (!It.has_next_sibling()) + break; + It = It.next_sibling(); + } + return It.parent(); + } +}; + /// Is this node the marker for its parent band? bool isBandMark(const isl::schedule_node &Node); diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 03878d5c8e4ba..0a6461139542d 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -384,6 +384,19 @@ ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node, return Result; } +struct InsertSimdMarkers : public ScheduleNodeRewriter { + isl::schedule_node visitBand(isl::schedule_node_band Band) { + isl::schedule_node Node = visitChildren(Band); + + // Only add SIMD markers to innermost bands. + if (!Node.first_child().isa()) + return Node; + + isl::id LoopMarker = isl::id::alloc(Band.ctx(), "SIMD", nullptr); + return Band.insert_mark(LoopMarker); + } +}; + isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand( isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) { assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band); @@ -408,16 +421,19 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand( Node = Node.child(0); // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise, // we will have troubles to match it in the backend. - isl::schedule_node_band NodeBand = - Node.as().set_ast_build_options( - isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }")); - Node = isl::manage(isl_schedule_node_band_sink(NodeBand.release())); - Node = Node.child(0); - if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf) - Node = Node.parent(); - auto LoopMarker = isl::id::alloc(Node.ctx(), "SIMD", nullptr); + Node = Node.as().set_ast_build_options( + isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }")); + + // Sink the inner loop into the smallest possible statements to make them + // represent a single vector instruction if possible. + Node = isl::manage(isl_schedule_node_band_sink(Node.release())); + + // Add SIMD markers to those vector statements. + InsertSimdMarkers SimdMarkerInserter; + Node = SimdMarkerInserter.visit(Node); + PrevectOpts++; - return Node.insert_mark(LoopMarker); + return Node.parent(); } static bool isSimpleInnermostBand(const isl::schedule_node &Node) { diff --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp index a2cb538021fbe..01f18eadb4d9d 100644 --- a/polly/lib/Transform/ScheduleTreeTransform.cpp +++ b/polly/lib/Transform/ScheduleTreeTransform.cpp @@ -118,35 +118,6 @@ static isl::schedule rebuildBand(isl::schedule_node_band OldBand, return NewBand.get_schedule(); } -/// Recursively visit all nodes of a schedule tree while allowing changes. -/// -/// The visit methods return an isl::schedule_node that is used to continue -/// visiting the tree. Structural changes such as returning a different node -/// will confuse the visitor. -template -struct ScheduleNodeRewriter - : public RecursiveScheduleTreeVisitor { - Derived &getDerived() { return *static_cast(this); } - const Derived &getDerived() const { - return *static_cast(this); - } - - isl::schedule_node visitNode(const isl::schedule_node &Node, Args... args) { - if (!Node.has_children()) - return Node; - - isl::schedule_node It = Node.first_child(); - while (true) { - It = getDerived().visit(It, std::forward(args)...); - if (!It.has_next_sibling()) - break; - It = It.next_sibling(); - } - return It.parent(); - } -}; - /// Rewrite a schedule tree by reconstructing it bottom-up. /// /// By default, the original schedule tree is reconstructed. To build a diff --git a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll index 0c0bc12eb4cd4..9d0c6b5b9479f 100644 --- a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll +++ b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll @@ -80,15 +80,17 @@ cleanup: ; preds = %for.cond, %entry ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i0) mod 32)]; Stmt_for_body23[i0, i1] -> [((i0) mod 32)] }]" ; CHECK: permutable: 1 ; CHECK: child: -; CHECK: mark: "SIMD" -; CHECK: child: -; CHECK: sequence: -; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }" +; CHECK: sequence: +; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }" +; CHECK: child: +; CHECK: mark: "SIMD" ; CHECK: child: ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]" ; CHECK: permutable: 1 ; CHECK: coincident: [ 1 ] -; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }" +; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }" +; CHECK: child: +; CHECK: mark: "SIMD" ; CHECK: child: ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]" ; CHECK: permutable: 1 diff --git a/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll new file mode 100644 index 0000000000000..3bd1f98385004 --- /dev/null +++ b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-vectorizer=stripmine -polly-isl-arg=--no-schedule-serialize-sccs -polly-tiling=0 -polly-opt-isl -analyze - < %s | FileCheck %s + +; isl_schedule_node_band_sink may sink into multiple children. +; https://llvm.org/PR52637 + +%struct.v4l2_sliced_vbi_data = type { [48 x i8] } + +define void @vivid_vbi_gen_sliced() { +entry: + br label %for.body + +for.body: ; preds = %vivid_vbi_gen_teletext.exit, %entry + %i.015 = phi i32 [ 0, %entry ], [ %inc, %vivid_vbi_gen_teletext.exit ] + %data0.014 = phi %struct.v4l2_sliced_vbi_data* [ null, %entry ], [ %incdec.ptr, %vivid_vbi_gen_teletext.exit ] + %arraydecay = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 0 + %arrayidx.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 6 + %0 = load i8, i8* %arrayidx.i, align 1 + store i8 %0, i8* %arraydecay, align 1 + br label %for.body.for.body_crit_edge.i + +for.body.for.body_crit_edge.i: ; preds = %for.body.for.body_crit_edge.i, %for.body + %inc10.i13 = phi i32 [ 1, %for.body ], [ %inc10.i, %for.body.for.body_crit_edge.i ] + %arrayidx2.phi.trans.insert.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 %inc10.i13 + store i8 0, i8* %arrayidx2.phi.trans.insert.i, align 1 + %inc10.i = add nuw nsw i32 %inc10.i13, 1 + %exitcond.not.i = icmp eq i32 %inc10.i13, 42 + br i1 %exitcond.not.i, label %vivid_vbi_gen_teletext.exit, label %for.body.for.body_crit_edge.i + +vivid_vbi_gen_teletext.exit: ; preds = %for.body.for.body_crit_edge.i + %incdec.ptr = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 1 + %inc = add nuw nsw i32 %i.015, 1 + %exitcond.not = icmp eq i32 %i.015, 1 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %vivid_vbi_gen_teletext.exit + ret void +} + + +; CHECK: schedule: +; CHECK: schedule: +; CHECK: mark: "SIMD" +; CHECK: schedule: +; CHECK: mark: "SIMD" +; CHECK: schedule: