diff --git a/polly/include/polly/DependenceInfo.h b/polly/include/polly/DependenceInfo.h index a8b11191d619ce..3d70ea2b74e541 100644 --- a/polly/include/polly/DependenceInfo.h +++ b/polly/include/polly/DependenceInfo.h @@ -124,6 +124,10 @@ struct Dependences { /// dependences. bool isValidSchedule(Scop &S, const StatementToIslMapTy &NewSchedules) const; + /// Return true of the schedule @p NewSched is a schedule for @S that does not + /// violate any dependences. + bool isValidSchedule(Scop &S, isl::schedule NewSched) const; + /// Print the stored dependence information. void print(llvm::raw_ostream &OS) const; diff --git a/polly/include/polly/ManualOptimizer.h b/polly/include/polly/ManualOptimizer.h index 066eb4d84c511c..988926334eb1a7 100644 --- a/polly/include/polly/ManualOptimizer.h +++ b/polly/include/polly/ManualOptimizer.h @@ -15,8 +15,13 @@ #include "isl/isl-noexceptions.h" +namespace llvm { +class OptimizationRemarkEmitter; +} + namespace polly { class Scop; +struct Dependences; /// Apply loop-transformation metadata. /// @@ -30,7 +35,9 @@ class Scop; /// @return The transformed schedule with all mark-nodes with loop /// transformations applied. Returns NULL in case of an error or @p /// Sched itself if no transformation has been applied. -isl::schedule applyManualTransformations(Scop *S, isl::schedule Sched); +isl::schedule applyManualTransformations(Scop *S, isl::schedule Sched, + const Dependences &D, + llvm::OptimizationRemarkEmitter *ORE); } // namespace polly #endif /* POLLY_MANUALOPTIMIZER_H */ diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h index 5fd0d6ad4dd0ca..e8685313c83c4a 100644 --- a/polly/include/polly/ScheduleTreeTransform.h +++ b/polly/include/polly/ScheduleTreeTransform.h @@ -178,6 +178,9 @@ isl::schedule applyFullUnroll(isl::schedule_node BandToUnroll); /// Replace the AST band @p BandToUnroll by a partially unrolled equivalent. isl::schedule applyPartialUnroll(isl::schedule_node BandToUnroll, int Factor); +/// Loop-distribute the band @p BandToFission as much as possible. +isl::schedule applyMaxFission(isl::schedule_node BandToFission); + /// Build the desired set of partial tile prefixes. /// /// We build a set of partial tile prefixes, which are prefixes of the vector diff --git a/polly/lib/Analysis/DependenceInfo.cpp b/polly/lib/Analysis/DependenceInfo.cpp index 709bce7ea3b605..0ac7ff1a14c0b6 100644 --- a/polly/lib/Analysis/DependenceInfo.cpp +++ b/polly/lib/Analysis/DependenceInfo.cpp @@ -636,6 +636,19 @@ void Dependences::calculateDependences(Scop &S) { LLVM_DEBUG(dump()); } +bool Dependences::isValidSchedule(Scop &S, isl::schedule NewSched) const { + // TODO: Also check permutable/coincident flags as well. + + StatementToIslMapTy NewSchedules; + for (auto NewMap : NewSched.get_map().get_map_list()) { + auto Stmt = reinterpret_cast( + NewMap.get_tuple_id(isl::dim::in).get_user()); + NewSchedules[Stmt] = NewMap; + } + + return isValidSchedule(S, NewSchedules); +} + bool Dependences::isValidSchedule( Scop &S, const StatementToIslMapTy &NewSchedule) const { if (LegalityCheckDisabled) diff --git a/polly/lib/Transform/ManualOptimizer.cpp b/polly/lib/Transform/ManualOptimizer.cpp index 2a77f7d49ae222..2c05927582e289 100644 --- a/polly/lib/Transform/ManualOptimizer.cpp +++ b/polly/lib/Transform/ManualOptimizer.cpp @@ -11,11 +11,14 @@ //===----------------------------------------------------------------------===// #include "polly/ManualOptimizer.h" +#include "polly/DependenceInfo.h" +#include "polly/Options.h" #include "polly/ScheduleTreeTransform.h" #include "polly/Support/ScopHelper.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/Metadata.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -25,6 +28,12 @@ using namespace polly; using namespace llvm; namespace { + +static cl::opt IgnoreDepcheck( + "polly-pragma-ignore-depcheck", + cl::desc("Skip the dependency check for pragma-based transformations"), + cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + /// Same as llvm::hasUnrollTransformation(), but takes a LoopID as argument /// instead of a Loop. static TransformationMode hasUnrollTransformation(MDNode *LoopID) { @@ -48,6 +57,31 @@ static TransformationMode hasUnrollTransformation(MDNode *LoopID) { return TM_Unspecified; } +// Return the first DebugLoc in the list. +static DebugLoc findFirstDebugLoc(MDNode *MD) { + if (MD) { + for (const MDOperand &X : drop_begin(MD->operands(), 1)) { + Metadata *A = X.get(); + if (!isa(A)) + continue; + return cast(A); + } + } + + return {}; +} + +static DebugLoc findTransformationDebugLoc(MDNode *LoopMD, StringRef Name) { + // First find dedicated transformation location + // (such as the location of #pragma clang loop) + MDNode *MD = findOptionMDForLoopID(LoopMD, Name); + if (DebugLoc K = findFirstDebugLoc(MD)) + return K; + + // Otherwise, fall back to the location of the loop itself + return findFirstDebugLoc(LoopMD); +} + /// Apply full or partial unrolling. static isl::schedule applyLoopUnroll(MDNode *LoopMD, isl::schedule_node BandToUnroll) { @@ -78,6 +112,15 @@ static isl::schedule applyLoopUnroll(MDNode *LoopMD, return {}; } +static isl::schedule applyLoopFission(MDNode *LoopMD, + isl::schedule_node BandToFission) { + // TODO: Make it possible to selectively fission substatements. + // TODO: Apply followup loop properties. + // TODO: Instead of fission every statement, find the maximum set that does + // not cause a dependency violation. + return applyMaxFission(BandToFission); +} + // Return the properties from a LoopID. Scalar properties are ignored. static auto getLoopMDProps(MDNode *LoopMD) { return map_range( @@ -96,14 +139,76 @@ class SearchTransformVisitor BaseTy &getBase() { return *this; } const BaseTy &getBase() const { return *this; } + polly::Scop *S; + const Dependences *D; + OptimizationRemarkEmitter *ORE; + // Set after a transformation is applied. Recursive search must be aborted // once this happens to ensure that any new followup transformation is // transformed in innermost-first order. isl::schedule Result; + /// Check wether a schedule after a transformation is legal. Return the old + /// schedule without the transformation. + isl::schedule + checkDependencyViolation(llvm::MDNode *LoopMD, llvm::Value *CodeRegion, + const isl::schedule_node &OrigBand, + StringRef DebugLocAttr, StringRef TransPrefix, + StringRef RemarkName, StringRef TransformationName) { + if (D->isValidSchedule(*S, Result)) + return Result; + + LLVMContext &Ctx = LoopMD->getContext(); + LLVM_DEBUG(dbgs() << "Dependency violation detected\n"); + + DebugLoc TransformLoc = findTransformationDebugLoc(LoopMD, DebugLocAttr); + + if (IgnoreDepcheck) { + LLVM_DEBUG(dbgs() << "Still accepting transformation due to " + "-polly-pragma-ignore-depcheck\n"); + if (ORE) { + ORE->emit( + OptimizationRemark(DEBUG_TYPE, RemarkName, TransformLoc, CodeRegion) + << (Twine("Could not verify dependencies for ") + + TransformationName + + "; still applying because of -polly-pragma-ignore-depcheck") + .str()); + } + return Result; + } + + LLVM_DEBUG(dbgs() << "Rolling back transformation\n"); + + if (ORE) { + ORE->emit(DiagnosticInfoOptimizationFailure(DEBUG_TYPE, RemarkName, + TransformLoc, CodeRegion) + << (Twine("not applying ") + TransformationName + + ": cannot ensure semantic equivalence due to possible " + "dependency violations") + .str()); + } + + // If illegal, revert and remove the transformation to not risk re-trying + // indefintely. + MDNode *NewLoopMD = + makePostTransformationMetadata(Ctx, LoopMD, {TransPrefix}, {}); + BandAttr *Attr = getBandAttr(OrigBand); + Attr->Metadata = NewLoopMD; + + // Roll back old schedule. + return OrigBand.get_schedule(); + } + public: - static isl::schedule applyOneTransformation(const isl::schedule &Sched) { - SearchTransformVisitor Transformer; + SearchTransformVisitor(polly::Scop *S, const Dependences *D, + OptimizationRemarkEmitter *ORE) + : S(S), D(D), ORE(ORE) {} + + static isl::schedule applyOneTransformation(polly::Scop *S, + const Dependences *D, + OptimizationRemarkEmitter *ORE, + const isl::schedule &Sched) { + SearchTransformVisitor Transformer(S, D, ORE); Transformer.visit(Sched); return Transformer.Result; } @@ -125,6 +230,14 @@ class SearchTransformVisitor return; } + // CodeRegion used but ORE to determine code hotness. + // TODO: Works only for original loop; for transformed loops, should track + // where the loop's body code comes from. + Loop *Loop = Attr->OriginalLoop; + Value *CodeRegion = nullptr; + if (Loop) + CodeRegion = Loop->getHeader(); + MDNode *LoopMD = Attr->Metadata; if (!LoopMD) return; @@ -146,6 +259,15 @@ class SearchTransformVisitor Result = applyLoopUnroll(LoopMD, Band); if (!Result.is_null()) return; + } else if (AttrName == "llvm.loop.distribute.enable") { + Result = applyLoopFission(LoopMD, Band); + if (!Result.is_null()) + Result = checkDependencyViolation( + LoopMD, CodeRegion, Band, "llvm.loop.distribute.loc", + "llvm.loop.distribute.", "FailedRequestedFission", + "loop fission/distribution"); + if (!Result.is_null()) + return; } // not a loop transformation; look for next property @@ -162,11 +284,14 @@ class SearchTransformVisitor } // namespace -isl::schedule polly::applyManualTransformations(Scop *S, isl::schedule Sched) { +isl::schedule +polly::applyManualTransformations(Scop *S, isl::schedule Sched, + const Dependences &D, + OptimizationRemarkEmitter *ORE) { // Search the loop nest for transformations until fixpoint. while (true) { isl::schedule Result = - SearchTransformVisitor::applyOneTransformation(Sched); + SearchTransformVisitor::applyOneTransformation(S, &D, ORE, Sched); if (Result.is_null()) { // No (more) transformation has been found. break; diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 6e48ffe811f566..93bf26f4db51cb 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -55,6 +55,7 @@ #include "polly/Support/ISLOStream.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "isl/options.h" @@ -668,7 +669,9 @@ static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) { static bool runIslScheduleOptimizer( Scop &S, function_ref GetDeps, - TargetTransformInfo *TTI, isl::schedule &LastSchedule) { + TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, + isl::schedule &LastSchedule) { + // Skip SCoPs in case they're already optimised by PPCGCodeGeneration if (S.isToBeSkipped()) return false; @@ -689,8 +692,8 @@ static bool runIslScheduleOptimizer( bool HasUserTransformation = false; if (PragmaBasedOpts) { - isl::schedule ManuallyTransformed = - applyManualTransformations(&S, Schedule); + isl::schedule ManuallyTransformed = applyManualTransformations( + &S, Schedule, GetDeps(Dependences::AL_Statement), ORE); if (ManuallyTransformed.is_null()) { LLVM_DEBUG(dbgs() << "Error during manual optimization\n"); return false; @@ -864,7 +867,9 @@ static bool runIslScheduleOptimizer( walkScheduleTreeForStatistics(Schedule, 2); } - if (!ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule)) + // Skip profitability check if user transformation(s) have been applied. + if (!HasUserTransformation && + !ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule)) return false; auto ScopStats = S.getStatistics(); @@ -893,9 +898,11 @@ bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) { return getAnalysis().getDependences( Dependences::AL_Statement); }; + OptimizationRemarkEmitter &ORE = + getAnalysis().getORE(); TargetTransformInfo *TTI = &getAnalysis().getTTI(F); - return runIslScheduleOptimizer(S, getDependences, TTI, LastSchedule); + return runIslScheduleOptimizer(S, getDependences, TTI, &ORE, LastSchedule); } static void runScheduleOptimizerPrinter(raw_ostream &OS, @@ -930,8 +937,10 @@ void IslScheduleOptimizerWrapperPass::getAnalysisUsage( ScopPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); } } // namespace @@ -945,6 +954,7 @@ INITIALIZE_PASS_BEGIN(IslScheduleOptimizerWrapperPass, "polly-opt-isl", INITIALIZE_PASS_DEPENDENCY(DependenceInfo); INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass); INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass); +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); INITIALIZE_PASS_END(IslScheduleOptimizerWrapperPass, "polly-opt-isl", "Polly - Optimize schedule of SCoP", false, false) @@ -956,9 +966,10 @@ runIslScheduleOptimizerUsingNPM(Scop &S, ScopAnalysisManager &SAM, auto GetDeps = [&Deps](Dependences::AnalysisLevel) -> const Dependences & { return Deps.getDependences(Dependences::AL_Statement); }; + OptimizationRemarkEmitter ORE(&S.getFunction()); TargetTransformInfo *TTI = &SAR.TTI; isl::schedule LastSchedule; - bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, LastSchedule); + bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, &ORE, LastSchedule); if (OS) { *OS << "Printing analysis 'Polly - Optimize schedule of SCoP' for region: '" << S.getName() << "' in function '" << S.getFunction().getName() diff --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp index f8ca4497447405..f23d696bff1e87 100644 --- a/polly/lib/Transform/ScheduleTreeTransform.cpp +++ b/polly/lib/Transform/ScheduleTreeTransform.cpp @@ -397,6 +397,10 @@ static bool isBandWithSingleLoop(const isl::schedule_node &Node) { } #endif +static bool isLeaf(const isl::schedule_node &Node) { + return isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf; +} + /// Create an isl::id representing the output loop after a transformation. static isl::id createGeneratedLoopAttr(isl::ctx Ctx, MDNode *FollowupLoopMD) { // Don't need to id the followup. @@ -728,3 +732,46 @@ isl::schedule_node polly::applyRegisterTiling(isl::schedule_node Node, return Node.as().set_ast_build_options( isl::union_set(Ctx, "{unroll[x]}")); } + +/// Find statements and sub-loops in (possibly nested) sequences. +static void +collectFussionableStmts(isl::schedule_node Node, + SmallVectorImpl &ScheduleStmts) { + if (isBand(Node) || isLeaf(Node)) { + ScheduleStmts.push_back(Node); + return; + } + + if (Node.has_children()) { + isl::schedule_node C = Node.first_child(); + while (true) { + collectFussionableStmts(C, ScheduleStmts); + if (!C.has_next_sibling()) + break; + C = C.next_sibling(); + } + } +} + +isl::schedule polly::applyMaxFission(isl::schedule_node BandToFission) { + isl::ctx Ctx = BandToFission.ctx(); + BandToFission = removeMark(BandToFission); + isl::schedule_node BandBody = BandToFission.child(0); + + SmallVector FissionableStmts; + collectFussionableStmts(BandBody, FissionableStmts); + size_t N = FissionableStmts.size(); + + // Collect the domain for each of the statements that will get their own loop. + isl::union_set_list DomList = isl::union_set_list(Ctx, N); + for (size_t i = 0; i < N; ++i) { + isl::schedule_node BodyPart = FissionableStmts[i]; + DomList = DomList.add(BodyPart.get_domain()); + } + + // Apply the fission by copying the entire loop, but inserting a filter for + // the statement domains for each fissioned loop. + isl::schedule_node Fissioned = BandToFission.insert_sequence(DomList); + + return Fissioned.get_schedule(); +} diff --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll new file mode 100644 index 00000000000000..f8c311be07c75f --- /dev/null +++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -analyze < %s | FileCheck %s --match-full-lines --check-prefix=ON +; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=0 -analyze < %s | FileCheck %s --match-full-lines --check-prefix=OFF +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 42.0, double* %A + %c = fadd double 21.0, 21.0 + store double %c, double* %B + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for, !llvm.loop !2 + +exit: + br label %return + +return: + ret void +} + + +!2 = distinct !{!2, !5} +!5 = !{!"llvm.loop.distribute.enable"} + + +; ON: Printing analysis 'Polly - Optimize schedule of SCoP' for region: 'for => return' in function 'func': +; ON: Calculated schedule: +; ON-NEXT: domain: "[n] -> { Stmt_body[i0] : 0 <= i0 < n; Stmt_body_b[i0] : 0 <= i0 < n }" +; ON-NEXT: child: +; ON-NEXT: sequence: +; ON-NEXT: - filter: "[n] -> { Stmt_body[i0] : 0 <= i0 < n }" +; ON-NEXT: child: +; ON-NEXT: schedule: "[n] -> [{ Stmt_body[i0] -> [(i0)] }]" +; ON-NEXT: - filter: "[n] -> { Stmt_body_b[i0] : 0 <= i0 < n }" +; ON-NEXT: child: +; ON-NEXT: schedule: "[n] -> [{ Stmt_body_b[i0] -> [(i0)] }]" + + +; OFF-LABEL: Printing analysis 'Polly - Optimize schedule of SCoP' for region: 'for => return' in function 'func': +; OFF-NEXT: Calculated schedule: +; OFF-NEXT: n/a + diff --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll new file mode 100644 index 00000000000000..f464f5a367e067 --- /dev/null +++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll @@ -0,0 +1,109 @@ +; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -disable-output < %s 2>&1 | FileCheck %s --match-full-lines +; +; CHECK: warning: distribute_illegal.c:2:3: not applying loop fission/distribution: cannot ensure semantic equivalence due to possible dependency violations +; +; void foo(double *A,double *B) { +; for (int i = 1; i < 128; ++i) { +; A[i] = i; +; B[i] = A[i+1]; +; } +; } + +source_filename = "distribute_illegal.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo(double* %A, double* %B) #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata double* %A, metadata !13, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata double* %B, metadata !14, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i32 1, metadata !15, metadata !DIExpression()), !dbg !19 + br label %for.cond, !dbg !20 + +for.cond: + %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ], !dbg !19 + call void @llvm.dbg.value(metadata i32 %i.0, metadata !15, metadata !DIExpression()), !dbg !19 + %cmp = icmp slt i32 %i.0, 128, !dbg !21 + br i1 %cmp, label %for.body, label %for.end, !dbg !23 + +for.body: + %conv = sitofp i32 %i.0 to double, !dbg !24 + %idxprom = sext i32 %i.0 to i64, !dbg !26 + %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom, !dbg !26 + store double %conv, double* %arrayidx, align 8, !dbg !27, !tbaa !28 + + %add = add nsw i32 %i.0, 1, !dbg !32 + %idxprom1 = sext i32 %add to i64, !dbg !33 + %arrayidx2 = getelementptr inbounds double, double* %A, i64 %idxprom1, !dbg !33 + %0 = load double, double* %arrayidx2, align 8, !dbg !33, !tbaa !28 + %idxprom3 = sext i32 %i.0 to i64, !dbg !34 + %arrayidx4 = getelementptr inbounds double, double* %B, i64 %idxprom3, !dbg !34 + store double %0, double* %arrayidx4, align 8, !dbg !35, !tbaa !28 + + %inc = add nsw i32 %i.0, 1, !dbg !36 + call void @llvm.dbg.value(metadata i32 %inc, metadata !15, metadata !DIExpression()), !dbg !19 + br label %for.cond, !dbg !37, !llvm.loop !38 + +for.end: + ret void, !dbg !41 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2 + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2 + +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #2 = { argmemonly nofree nosync nounwind willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "distribute_illegal.c", directory: "/path/to") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 1} +!6 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) +!12 = !{!13, !14, !15} +!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!14 = !DILocalVariable(name: "B", arg: 2, scope: !7, file: !1, line: 1, type: !10) +!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !17) +!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 2, column: 3) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !DILocation(line: 0, scope: !7) +!19 = !DILocation(line: 0, scope: !16) +!20 = !DILocation(line: 2, column: 8, scope: !16) +!21 = !DILocation(line: 2, column: 21, scope: !22) +!22 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3) +!23 = !DILocation(line: 2, column: 3, scope: !16) +!24 = !DILocation(line: 3, column: 12, scope: !25) +!25 = distinct !DILexicalBlock(scope: !22, file: !1, line: 2, column: 33) +!26 = !DILocation(line: 3, column: 5, scope: !25) +!27 = !DILocation(line: 3, column: 10, scope: !25) +!28 = !{!29, !29, i64 0} +!29 = !{!"double", !30, i64 0} +!30 = !{!"omnipotent char", !31, i64 0} +!31 = !{!"Simple C/C++ TBAA"} +!32 = !DILocation(line: 4, column: 15, scope: !25) +!33 = !DILocation(line: 4, column: 12, scope: !25) +!34 = !DILocation(line: 4, column: 5, scope: !25) +!35 = !DILocation(line: 4, column: 10, scope: !25) +!36 = !DILocation(line: 2, column: 28, scope: !22) +!37 = !DILocation(line: 2, column: 3, scope: !22) +!38 = distinct !{!38, !23, !39, !40, !100} +!39 = !DILocation(line: 5, column: 3, scope: !16) +!40 = !{!"llvm.loop.mustprogress"} +!41 = !DILocation(line: 6, column: 1, scope: !7) +!100 = !{!"llvm.loop.distribute.enable"} diff --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll new file mode 100644 index 00000000000000..866b81420dc1bb --- /dev/null +++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll @@ -0,0 +1,111 @@ +; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -disable-output < %s 2<&1 | FileCheck %s --match-full-lines +; +; CHECK: warning: distribute_illegal.c:1:42: not applying loop fission/distribution: cannot ensure semantic equivalence due to possible dependency violations +; +; void foo(double *A,double *B) { +; for (int i = 1; i < 128; ++i) { +; A[i] = i; +; B[i] = A[i+1]; +; } +; } + +source_filename = "distribute_illegal.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo(double* %A, double* %B) #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata double* %A, metadata !13, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata double* %B, metadata !14, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i32 1, metadata !15, metadata !DIExpression()), !dbg !19 + br label %for.cond, !dbg !20 + +for.cond: + %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ], !dbg !19 + call void @llvm.dbg.value(metadata i32 %i.0, metadata !15, metadata !DIExpression()), !dbg !19 + %cmp = icmp slt i32 %i.0, 128, !dbg !21 + br i1 %cmp, label %for.body, label %for.end, !dbg !23 + +for.body: + %conv = sitofp i32 %i.0 to double, !dbg !24 + %idxprom = sext i32 %i.0 to i64, !dbg !26 + %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom, !dbg !26 + store double %conv, double* %arrayidx, align 8, !dbg !27, !tbaa !28 + + %add = add nsw i32 %i.0, 1, !dbg !32 + %idxprom1 = sext i32 %add to i64, !dbg !33 + %arrayidx2 = getelementptr inbounds double, double* %A, i64 %idxprom1, !dbg !33 + %0 = load double, double* %arrayidx2, align 8, !dbg !33, !tbaa !28 + %idxprom3 = sext i32 %i.0 to i64, !dbg !34 + %arrayidx4 = getelementptr inbounds double, double* %B, i64 %idxprom3, !dbg !34 + store double %0, double* %arrayidx4, align 8, !dbg !35, !tbaa !28 + + %inc = add nsw i32 %i.0, 1, !dbg !36 + call void @llvm.dbg.value(metadata i32 %inc, metadata !15, metadata !DIExpression()), !dbg !19 + br label %for.cond, !dbg !37, !llvm.loop !38 + +for.end: + ret void, !dbg !41 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2 + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2 + +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #2 = { argmemonly nofree nosync nounwind willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "distribute_illegal.c", directory: "/path/to") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 1} +!6 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) +!12 = !{!13, !14, !15} +!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!14 = !DILocalVariable(name: "B", arg: 2, scope: !7, file: !1, line: 1, type: !10) +!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !17) +!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 2, column: 3) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !DILocation(line: 0, scope: !7) +!19 = !DILocation(line: 0, scope: !16) +!20 = !DILocation(line: 2, column: 8, scope: !16) +!21 = !DILocation(line: 2, column: 21, scope: !22) +!22 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3) +!23 = !DILocation(line: 2, column: 3, scope: !16) +!24 = !DILocation(line: 3, column: 12, scope: !25) +!25 = distinct !DILexicalBlock(scope: !22, file: !1, line: 2, column: 33) +!26 = !DILocation(line: 3, column: 5, scope: !25) +!27 = !DILocation(line: 3, column: 10, scope: !25) +!28 = !{!29, !29, i64 0} +!29 = !{!"double", !30, i64 0} +!30 = !{!"omnipotent char", !31, i64 0} +!31 = !{!"Simple C/C++ TBAA"} +!32 = !DILocation(line: 4, column: 15, scope: !25) +!33 = !DILocation(line: 4, column: 12, scope: !25) +!34 = !DILocation(line: 4, column: 5, scope: !25) +!35 = !DILocation(line: 4, column: 10, scope: !25) +!36 = !DILocation(line: 2, column: 28, scope: !22) +!37 = !DILocation(line: 2, column: 3, scope: !22) +!38 = distinct !{!38, !23, !39, !40, !100, !101} +!39 = !DILocation(line: 5, column: 3, scope: !16) +!40 = !{!"llvm.loop.mustprogress"} +!41 = !DILocation(line: 6, column: 1, scope: !7) +!100 = !{!"llvm.loop.distribute.enable"} +!101 = !{!"llvm.loop.distribute.loc", !102} +!102 = !DILocation(line: 1, column: 42, scope: !16)