diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fb696bea671af..6ac46e62cbaf5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1554,6 +1554,9 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { /// True if the intrinsic may have side-effects. bool MayHaveSideEffects; + /// True if the intrinsic is safe to speculatively execute. + bool IsSafeToSpeculativelyExecute; + public: VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, @@ -1577,6 +1580,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { MayHaveSideEffects = MayWriteToMemory || !Attrs.hasAttribute(Attribute::NoUnwind) || !Attrs.hasAttribute(Attribute::WillReturn); + IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable); } ~VPWidenIntrinsicRecipe() override = default; @@ -1616,6 +1620,11 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { /// Returns true if the intrinsic may have side-effects. bool mayHaveSideEffects() const { return MayHaveSideEffects; } + /// Returns true if the intrinsic is safe to speculatively execute. + bool isSafeToSpeculativelyExecute() const { + return IsSafeToSpeculativelyExecute; + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c8a2d84a535d3..97bd5cade18b6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" @@ -2087,6 +2088,16 @@ void VPlanTransforms::cse(VPlan &Plan) { } } +static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) { + if (auto *WC = dyn_cast(R)) + return WC->getCalledScalarFunction()->isSpeculatable(); + if (auto *WI = dyn_cast(R)) + return WI->isSafeToSpeculativelyExecute(); + if (auto *RepR = dyn_cast(R)) + return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr()); + return !R->mayHaveSideEffects(); +} + /// Move loop-invariant recipes out of the vector loop region in \p Plan. static void licm(VPlan &Plan) { VPBasicBlock *Preheader = Plan.getVectorPreheader(); @@ -2095,6 +2106,11 @@ static void licm(VPlan &Plan) { // out of a loop region. Does not address legality concerns such as aliasing // or speculation safety. auto CannotHoistRecipe = [](VPRecipeBase &R) { + // TODO: Relax checks in the future, e.g. we could also hoist reads, if + // their memory location is not modified in the vector loop. + if (R.mayReadOrWriteMemory() || R.isPhi()) + return true; + // Allocas cannot be hoisted. auto *RepR = dyn_cast(&R); return RepR && RepR->getOpcode() == Instruction::Alloca; @@ -2104,14 +2120,13 @@ static void licm(VPlan &Plan) { // preheader. Preform a shallow traversal of the vector loop region, to // exclude recipes in replicate regions. VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(LoopRegion->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (CannotHoistRecipe(R)) continue; - // TODO: Relax checks in the future, e.g. we could also hoist reads, if - // their memory location is not modified in the vector loop. - if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() || + if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) || any_of(R.operands(), [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); })) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index 5970608794b55..48e64771ac152 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -45,8 +45,9 @@ define void @test1(ptr %dst, {i64, i64} %sv) { ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop.body @@ -91,18 +92,19 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) { ; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1 ; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 ; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]]) ; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]] -; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]]) ; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll index 84d21110e8da9..f5a30bef177df 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll @@ -1,11 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" define void @test1() #0 personality ptr @__CxxFrameHandler3 { +; CHECK-LABEL: define void @test1( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null) +; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]] +; CHECK: [[CATCH_DISPATCH]]: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller +; CHECK: [[CATCH:.*]]: +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; CHECK: [[TRY_CONT]]: +; CHECK-NEXT: ret void +; CHECK: [[UNREACHABLE]]: +; CHECK-NEXT: unreachable +; entry: invoke void @_CxxThrowException(ptr null, ptr null) - to label %unreachable unwind label %catch.dispatch + to label %unreachable unwind label %catch.dispatch catch.dispatch: ; preds = %entry %0 = catchswitch within none [label %catch] unwind to caller @@ -31,9 +55,6 @@ unreachable: ; preds = %entry unreachable } -; CHECK-LABEL: define void @test1( -; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null] -; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)