diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 6a4bf6e594d14..6cd7321d0c4e0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7593,6 +7593,36 @@ Note that setting ``llvm.loop.interleave.count`` to 1 disables interleaving multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0 then the interleave count will be determined automatically. +'``llvm.loop.vectorize.reassociate_fpreductions.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata selectively allows or disallows reassociating floating-point +reductions, which otherwise may be unsafe to reassociate, during loop +vectorization. For example, a floating point ``ADD`` reduction without +``reassoc`` fast-math flags may be vectorized provided that this metadata +allows it. The first operand is the string +``llvm.loop.vectorize.reassociate_fpreductions.enable`` +and the second operand is a bit. If the bit operand value is 1 unsafe +reduction reassociations are enabled. A value of 0 disables unsafe +reduction reassociations. + +Note that the reassociation of floating point reductions that is allowed +by other means is considered safe, so this metadata is a no-op +in such cases. + +For example, reassociation of floating point reduction +in a loop with ``!{!"llvm.loop.vectorize.enable", i1 1}`` metadata is allowed +regardless of the value of +``llvm.loop.vectorize.reassociate_fpreductions.enable``. + +Similarly, the reassociation is allowed for reduction operations +with ``reassoc`` fast-math flags always. + +.. code-block:: llvm + + !0 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 0} + !1 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 1} + '``llvm.loop.vectorize.enable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index d654ac3ec9273..5911501ca2d3e 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -64,7 +64,8 @@ class LoopVectorizeHints { HK_FORCE, HK_ISVECTORIZED, HK_PREDICATE, - HK_SCALABLE + HK_SCALABLE, + HK_REASSOCIATE_FP_REDUCTIONS, }; /// Hint - associates name and validation with the hint value. @@ -97,6 +98,10 @@ class LoopVectorizeHints { /// Says whether we should use fixed width or scalable vectorization. Hint Scalable; + /// Says whether unsafe reassociation of reductions is allowed + /// during the loop vectorization. + Hint ReassociateFPReductions; + /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } @@ -162,6 +167,13 @@ class LoopVectorizeHints { return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly; } + enum ForceKind getReassociateFPReductions() const { + if ((ForceKind)ReassociateFPReductions.Value == FK_Undefined && + hasDisableAllTransformsHint(TheLoop)) + return FK_Disabled; + return (ForceKind)ReassociateFPReductions.Value; + } + /// If hints are provided that force vectorization, use the AlwaysPrint /// pass name to force the frontend to print the diagnostic. const char *vectorizeAnalysisPassName() const; @@ -173,6 +185,10 @@ class LoopVectorizeHints { /// error accumulates in the loop. bool allowReordering() const; + /// Returns true iff the loop hints allow reassociating floating-point + /// reductions for the purpose of vectorization. + bool allowFPReductionReassociation() const; + bool isPotentiallyUnsafe() const { // Avoid FP vectorization if the target is unsure about proper support. // This may be related to the SIMD unit in the target not handling diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 8e09e6f8d4935..dffff6f7278a1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -97,6 +97,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) { case HK_ISVECTORIZED: case HK_PREDICATE: case HK_SCALABLE: + case HK_REASSOCIATE_FP_REDUCTIONS: return (Val == 0 || Val == 1); } return false; @@ -112,6 +113,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, IsVectorized("isvectorized", 0, HK_ISVECTORIZED), Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE), + ReassociateFPReductions("vectorize.reassociate_fpreductions.enable", + FK_Undefined, HK_REASSOCIATE_FP_REDUCTIONS), TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -254,6 +257,11 @@ bool LoopVectorizeHints::allowReordering() const { EC.getKnownMinValue() > 1); } +bool LoopVectorizeHints::allowFPReductionReassociation() const { + return HintsAllowReordering && + getReassociateFPReductions() == LoopVectorizeHints::FK_Enabled; +} + void LoopVectorizeHints::getHintsFromMetadata() { MDNode *LoopID = TheLoop->getLoopID(); if (!LoopID) @@ -300,8 +308,13 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { return; unsigned Val = C->getZExtValue(); - Hint *Hints[] = {&Width, &Interleave, &Force, - &IsVectorized, &Predicate, &Scalable}; + Hint *Hints[] = {&Width, + &Interleave, + &Force, + &IsVectorized, + &Predicate, + &Scalable, + &ReassociateFPReductions}; for (auto *H : Hints) { if (Name == H->Name) { if (H->validate(Val)) @@ -1311,22 +1324,25 @@ bool LoopVectorizationLegality::canVectorizeFPMath( return true; // If the above is false, we have ExactFPMath & do not allow reordering. - // If the EnableStrictReductions flag is set, first check if we have any - // Exact FP induction vars, which we cannot vectorize. - if (!EnableStrictReductions || - any_of(getInductionVars(), [&](auto &Induction) -> bool { + // First check if we have any Exact FP induction vars, which we cannot + // vectorize. + if (any_of(getInductionVars(), [&](auto &Induction) -> bool { InductionDescriptor IndDesc = Induction.second; return IndDesc.getExactFPMathInst(); })) return false; - // We can now only vectorize if all reductions with Exact FP math also - // have the isOrdered flag set, which indicates that we can move the - // reduction operations in-loop. - return (all_of(getReductionVars(), [&](auto &Reduction) -> bool { - const RecurrenceDescriptor &RdxDesc = Reduction.second; - return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered(); - })); + // We can now only vectorize if EnableStrictReductions flag is set and + // all reductions with Exact FP math also have the isOrdered flag set, + // which indicates that we can move the reduction operations in-loop. + // If the hints allow reassociating FP reductions, then skip + // all the checks. + return (Hints->allowFPReductionReassociation() || + all_of(getReductionVars(), [&](auto &Reduction) -> bool { + const RecurrenceDescriptor &RdxDesc = Reduction.second; + return !RdxDesc.hasExactFPMath() || + (EnableStrictReductions && RdxDesc.isOrdered()); + })); } bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fc8ebebcf21b7..608715453e40d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1000,9 +1000,10 @@ class LoopVectorizationCostModel { /// Returns true if we should use strict in-order reductions for the given /// RdxDesc. This is true if the -enable-strict-reductions flag is passed, /// the IsOrdered flag of RdxDesc is set and we do not allow reordering - /// of FP operations. + /// of FP operations or FP reductions. bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const { - return !Hints->allowReordering() && RdxDesc.isOrdered(); + return !Hints->allowReordering() && + !Hints->allowFPReductionReassociation() && RdxDesc.isOrdered(); } /// \returns The smallest bitwidth each instruction can be represented with. diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-reassociate.ll new file mode 100644 index 0000000000000..1e760c841f3dd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-reassociate.ll @@ -0,0 +1,130 @@ +; Check that the loops with a floating-point reduction are vectorized +; according to llvm.loop.vectorize.reassociate_fpreductions.enable metadata. +; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define float @test_enable(ptr readonly captures(none) %array, float %init) { +; CHECK-LABEL: define float @test_enable( +; CHECK: fadd contract <4 x float> {{.*}} +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD0:[0-9]+]] +; CHECK: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}}) +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD3:[0-9]+]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi float [ %init, %entry ], [ %red.next, %loop ] + %gep = getelementptr float, ptr %array, i64 %iv + %element = load float, ptr %gep, align 4 + %red.next = fadd contract float %red, %element + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0 + +exit: + %result = phi float [ %red.next, %loop ] + ret float %result +} + +; The reduction is unsafe, and the metadata does not allow +; vectorizing it: +define float @test_disable(ptr readonly captures(none) %array, float %init) { +; CHECK-LABEL: define float @test_disable( +; CHECK-NOT: <4 x float> +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD4:[0-9]+]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi float [ %init, %entry ], [ %red.next, %loop ] + %gep = getelementptr float, ptr %array, i64 %iv + %element = load float, ptr %gep, align 4 + %red.next = fadd contract float %red, %element + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !2 + +exit: + %result = phi float [ %red.next, %loop ] + ret float %result +} + +; Forced vectorization "makes" the reduction reassociation safe, +; so setting llvm.loop.vectorize.reassociate_fpreductions.enable +; to false does not have effect: +define float @test_disable_with_forced_vectorization(ptr readonly captures(none) %array, float %init) { +; CHECK-LABEL: define float @test_disable_with_forced_vectorization( +; CHECK: fadd contract <4 x float> {{.*}} +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD6:[0-9]+]] +; CHECK: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}}) +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD7:[0-9]+]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi float [ %init, %entry ], [ %red.next, %loop ] + %gep = getelementptr float, ptr %array, i64 %iv + %element = load float, ptr %gep, align 4 + %red.next = fadd contract float %red, %element + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !4 + +exit: + %result = phi float [ %red.next, %loop ] + ret float %result +} + +; 'fast' math makes reduction reassociation safe, +; so setting llvm.loop.vectorize.reassociate_fpreductions.enable +; to false does not have effect: +define float @test_disable_with_fast_math(ptr readonly captures(none) %array, float %init) { +; CHECK-LABEL: define float @test_disable_with_fast_math( +; CHECK: fadd fast <4 x float> {{.*}} +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD8:[0-9]+]] +; CHECK: call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> {{.*}}) +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD9:[0-9]+]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi float [ %init, %entry ], [ %red.next, %loop ] + %gep = getelementptr float, ptr %array, i64 %iv + %element = load float, ptr %gep, align 4 + %red.next = fadd fast float %red, %element + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !2 + +exit: + %result = phi float [ %red.next, %loop ] + ret float %result +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 true} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 false} +!4 = distinct !{!4, !3, !5} +!5 = !{!"llvm.loop.vectorize.enable", i1 true} + +; CHECK-NOT: llvm.loop.vectorize.reassociate_fpreductions.enable +; CHECK: ![[MD0]] = distinct !{![[MD0]], ![[MD1:[0-9]+]], ![[MD2:[0-9]+]]} +; CHECK: ![[MD1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: ![[MD2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: ![[MD3]] = distinct !{![[MD3]], ![[MD2]], ![[MD1]]} +; CHECK: ![[MD4]] = distinct !{![[MD4]], ![[MD5:[0-9]+]]} +; CHECK: ![[MD5]] = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 false} +; CHECK: ![[MD6]] = distinct !{![[MD6]], ![[MD1]], ![[MD2]]} +; CHECK: ![[MD7]] = distinct !{![[MD7]], ![[MD2]], ![[MD1]]} +; CHECK: ![[MD8]] = distinct !{![[MD8]], ![[MD1]], ![[MD2]]} +; CHECK: ![[MD9]] = distinct !{![[MD9]], ![[MD2]], ![[MD1]]}