diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 39e0449f66845..be490d48118b6 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1785,6 +1785,50 @@ ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy, return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, ValTy, CostKind); } +InstructionCost +ARMTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, + TTI::TargetCostKind CostKind) { + EVT ValVT = TLI->getValueType(DL, Ty); + + // In general floating point reductions are a series of elementwise + // operations, with free extracts on each step. These are either in-order or + // treewise depending on whether that is allowed by the fast math flags. + if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) && + ((ValVT.getVectorElementType() == MVT::f32 && ST->hasVFP2Base()) || + (ValVT.getVectorElementType() == MVT::f64 && ST->hasFP64()) || + (ValVT.getVectorElementType() == MVT::f16 && ST->hasFullFP16()))) { + unsigned NumElts = cast(Ty)->getNumElements(); + unsigned EltSize = ValVT.getScalarSizeInBits(); + unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1); + InstructionCost VecCost; + while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) { + Type *VecTy = FixedVectorType::get(Ty->getElementType(), NumElts/2); + IntrinsicCostAttributes ICA(IID, VecTy, {VecTy, VecTy}, FMF); + VecCost += getIntrinsicInstrCost(ICA, CostKind); + NumElts /= 2; + } + + // For fp16 we need to extract the upper lane elements. MVE can add a + // VREV+FMIN/MAX to perform another vector step instead. + InstructionCost ExtractCost = 0; + if (ST->hasMVEFloatOps() && ValVT.getVectorElementType() == MVT::f16 && + NumElts == 8) { + VecCost += ST->getMVEVectorCostFactor(CostKind) * 2; + NumElts /= 2; + } else if (ValVT.getVectorElementType() == MVT::f16) + ExtractCost = cast(Ty)->getNumElements() / 2; + + IntrinsicCostAttributes ICA(IID, Ty->getElementType(), + {Ty->getElementType(), Ty->getElementType()}, + FMF); + return VecCost + ExtractCost + + (NumElts - 1) * getIntrinsicInstrCost(ICA, CostKind); + } + + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); +} + InstructionCost ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 588704d5b7e56..bb4b321b53009 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -290,6 +290,10 @@ class ARMTTIImpl : public BasicTTIImplBase { VectorType *ValTy, TTI::TargetCostKind CostKind); + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, + TTI::TargetCostKind CostKind); + InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index 5bc936e1fe38e..1565a138b93d3 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -294,19 +294,19 @@ define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) { define void @reduce_fmax(<16 x float> %va) { ; THRU-LABEL: 'reduce_fmax' -; THRU-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; THRU-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmax' -; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmax' -; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'reduce_fmax' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll index 14b27062eebb6..48edae8c7d137 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll @@ -11,22 +11,22 @@ define void @fmin_strict() { ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'fmin_strict' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) @@ -65,22 +65,22 @@ define void @fmin_unordered() { ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'fmin_unordered' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) @@ -118,22 +118,22 @@ define void @fmax_strict() { ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'fmax_strict' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) @@ -172,22 +172,22 @@ define void @fmax_unordered() { ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) -; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) ; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'fmax_unordered' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)