diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index b73ed208ed74c..0a8565e621030 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -34,35 +34,72 @@ static cl::opt SLPMaxVF( "exclusively by SLP vectorizer."), cl::Hidden); -InstructionCost -RISCVTTIImpl::getRISCVInstructionCost(ArrayRef OpCodes, MVT VT, - TTI::TargetCostKind CostKind) { - // Check if the type is valid for all CostKind - if (!VT.isVector()) - return InstructionCost::getInvalid(); - size_t NumInstr = OpCodes.size(); - if (CostKind == TTI::TCK_CodeSize) - return NumInstr; +static InstructionCost getRVVBaseCost(unsigned Op, MVT VT, + const RISCVTTIImpl *TTI, + const RISCVTargetLowering *TLI) { + InstructionCost LMULCost = TLI->getLMULCost(VT); + switch (Op) { + case RISCV::VRGATHER_VI: + return TLI->getVRGatherVICost(VT); + case RISCV::VRGATHER_VV: + return TLI->getVRGatherVVCost(VT); + case RISCV::VSLIDEUP_VI: + case RISCV::VSLIDEDOWN_VI: + return TLI->getVSlideVICost(VT); + case RISCV::VSLIDEUP_VX: + case RISCV::VSLIDEDOWN_VX: + return TLI->getVSlideVXCost(VT); + case RISCV::VREDMAX_VS: + case RISCV::VREDMIN_VS: + case RISCV::VREDMAXU_VS: + case RISCV::VREDMINU_VS: + case RISCV::VREDSUM_VS: + case RISCV::VREDAND_VS: + case RISCV::VREDOR_VS: + case RISCV::VREDXOR_VS: + case RISCV::VFREDMAX_VS: + case RISCV::VFREDMIN_VS: + case RISCV::VFREDUSUM_VS: { + unsigned VL = VT.getVectorMinNumElements(); + if (!VT.isFixedLengthVector()) + VL *= *(TTI->getVScaleForTuning()); + return Log2_32_Ceil(VL); + } + case RISCV::VFREDOSUM_VS: { + unsigned VL = VT.getVectorMinNumElements(); + if (!VT.isFixedLengthVector()) + VL *= *(TTI->getVScaleForTuning()); + return VL; + } + case RISCV::VMV_X_S: + case RISCV::VMV_S_X: + case RISCV::VFMV_F_S: + case RISCV::VFMV_S_F: + case RISCV::VMOR_MM: + case RISCV::VMXOR_MM: + case RISCV::VMAND_MM: + case RISCV::VMANDN_MM: + case RISCV::VMNAND_MM: + case RISCV::VCPOP_M: + case RISCV::VFIRST_M: + return 1; + default: + return LMULCost; + } +} + +static InstructionCost getSiFiveX280RVVCost(ArrayRef OpCodes, MVT VT, + TTI::TargetCostKind CostKind, + const RISCVTTIImpl *TTI, + const RISCVTargetLowering *TLI) { InstructionCost LMULCost = TLI->getLMULCost(VT); + size_t NumInstr = OpCodes.size(); if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency)) return LMULCost * NumInstr; InstructionCost Cost = 0; + unsigned VScale = 8; for (auto Op : OpCodes) { switch (Op) { - case RISCV::VRGATHER_VI: - Cost += TLI->getVRGatherVICost(VT); - break; - case RISCV::VRGATHER_VV: - Cost += TLI->getVRGatherVVCost(VT); - break; - case RISCV::VSLIDEUP_VI: - case RISCV::VSLIDEDOWN_VI: - Cost += TLI->getVSlideVICost(VT); - break; - case RISCV::VSLIDEUP_VX: - case RISCV::VSLIDEDOWN_VX: - Cost += TLI->getVSlideVXCost(VT); - break; case RISCV::VREDMAX_VS: case RISCV::VREDMIN_VS: case RISCV::VREDMAXU_VS: @@ -76,37 +113,63 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef OpCodes, MVT VT, case RISCV::VFREDUSUM_VS: { unsigned VL = VT.getVectorMinNumElements(); if (!VT.isFixedLengthVector()) - VL *= *getVScaleForTuning(); - Cost += Log2_32_Ceil(VL); + VL *= VScale; + // For the cases with small VL, we use a lookup table for accurate + // cost estimation. + unsigned LookUpSiFive7ReduceLatency[] = {0, 20, 27, 32, 34, + 38, 40, 41, 42}; + if (VL <= 32) { + Cost += LookUpSiFive7ReduceLatency[divideCeil(VL, 4)]; + break; + } + Cost += 6 + 7 * Log2_32_Ceil(VL); break; } case RISCV::VFREDOSUM_VS: { unsigned VL = VT.getVectorMinNumElements(); if (!VT.isFixedLengthVector()) - VL *= *getVScaleForTuning(); - Cost += VL; + VL *= VScale; + Cost += VL * 6; break; } case RISCV::VMV_X_S: - case RISCV::VMV_S_X: case RISCV::VFMV_F_S: - case RISCV::VFMV_S_F: - case RISCV::VMOR_MM: - case RISCV::VMXOR_MM: - case RISCV::VMAND_MM: - case RISCV::VMANDN_MM: - case RISCV::VMNAND_MM: case RISCV::VCPOP_M: case RISCV::VFIRST_M: - Cost += 1; + /* Vector-to-scalar communication */ + Cost += 8; break; default: - Cost += LMULCost; + Cost += getRVVBaseCost(Op, VT, TTI, TLI); + break; } } return Cost; } +InstructionCost +RISCVTTIImpl::getRISCVInstructionCost(ArrayRef OpCodes, MVT VT, + TTI::TargetCostKind CostKind) { + // Check if the type is valid for all CostKind + if (!VT.isVector()) + return InstructionCost::getInvalid(); + size_t NumInstr = OpCodes.size(); + if (CostKind == TTI::TCK_CodeSize) + return NumInstr; + + if (ST->getProcFamily() == RISCVSubtarget::SiFive7) + return getSiFiveX280RVVCost(OpCodes, VT, CostKind, this, TLI); + + InstructionCost LMULCost = TLI->getLMULCost(VT); + if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency)) + return LMULCost * NumInstr; + InstructionCost Cost = 0; + for (auto Op : OpCodes) + Cost += getRVVBaseCost(Op, VT, this, TLI); + + return Cost; +} + InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy() && diff --git a/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll b/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll new file mode 100644 index 0000000000000..01632f9ab8064 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll @@ -0,0 +1,675 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=riscv64 -mcpu=sifive-x280 -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=THROUGHPUT +; RUN: opt < %s -mtriple=riscv64 -mcpu=sifive-x280 -passes="print" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE + +declare i1 @llvm.vector.reduce.add.v16i1(<16 x i1>) +declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.add.v256i8(<256 x i8>) +declare i8 @llvm.vector.reduce.add.v512i8(<512 x i8>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>) +declare i16 @llvm.vector.reduce.add.v256i16(<256 x i16>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.add.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>) + +define void @reduce_add() { +; THROUGHPUT-LABEL: 'reduce_add' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %1 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %4 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %5 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %6 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %7 = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %8 = call i8 @llvm.vector.reduce.add.v512i8(<512 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %9 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %10 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %11 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %12 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %13 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %14 = call i16 @llvm.vector.reduce.add.v256i16(<256 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %16 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %17 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %18 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %19 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %20 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %21 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %22 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %23 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %24 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %25 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %26 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = call i1 @llvm.vector.reduce.add.nxv1i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call i1 @llvm.vector.reduce.add.nxv2i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %29 = call i1 @llvm.vector.reduce.add.nxv4i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %30 = call i1 @llvm.vector.reduce.add.nxv8i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %31 = call i1 @llvm.vector.reduce.add.nxv16i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %32 = call i8 @llvm.vector.reduce.add.nxv1i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %33 = call i8 @llvm.vector.reduce.add.nxv2i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %34 = call i8 @llvm.vector.reduce.add.nxv4i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %35 = call i8 @llvm.vector.reduce.add.nxv8i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %36 = call i8 @llvm.vector.reduce.add.nxv16i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %37 = call i8 @llvm.vector.reduce.add.nxv32i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %38 = call i8 @llvm.vector.reduce.add.nxv64i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %39 = call i16 @llvm.vector.reduce.add.nxv1i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %40 = call i16 @llvm.vector.reduce.add.nxv2i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %41 = call i16 @llvm.vector.reduce.add.nxv4i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %42 = call i16 @llvm.vector.reduce.add.nxv8i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %43 = call i16 @llvm.vector.reduce.add.nxv16i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %44 = call i16 @llvm.vector.reduce.add.nxv32i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %45 = call i32 @llvm.vector.reduce.add.nxv1i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %46 = call i32 @llvm.vector.reduce.add.nxv2i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %47 = call i32 @llvm.vector.reduce.add.nxv4i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %48 = call i32 @llvm.vector.reduce.add.nxv8i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %49 = call i32 @llvm.vector.reduce.add.nxv16i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %50 = call i64 @llvm.vector.reduce.add.nxv1i64( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %51 = call i64 @llvm.vector.reduce.add.nxv2i64( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %52 = call i64 @llvm.vector.reduce.add.nxv4i64( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %53 = call i64 @llvm.vector.reduce.add.nxv8i64( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'reduce_add' +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call i8 @llvm.vector.reduce.add.v512i8(<512 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = call i16 @llvm.vector.reduce.add.v256i16(<256 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %16 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call i1 @llvm.vector.reduce.add.nxv1i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call i1 @llvm.vector.reduce.add.nxv2i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call i1 @llvm.vector.reduce.add.nxv4i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call i1 @llvm.vector.reduce.add.nxv8i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i1 @llvm.vector.reduce.add.nxv16i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %32 = call i8 @llvm.vector.reduce.add.nxv1i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = call i8 @llvm.vector.reduce.add.nxv2i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %34 = call i8 @llvm.vector.reduce.add.nxv4i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %35 = call i8 @llvm.vector.reduce.add.nxv8i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %36 = call i8 @llvm.vector.reduce.add.nxv16i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %37 = call i8 @llvm.vector.reduce.add.nxv32i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %38 = call i8 @llvm.vector.reduce.add.nxv64i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = call i16 @llvm.vector.reduce.add.nxv1i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %40 = call i16 @llvm.vector.reduce.add.nxv2i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = call i16 @llvm.vector.reduce.add.nxv4i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = call i16 @llvm.vector.reduce.add.nxv8i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %43 = call i16 @llvm.vector.reduce.add.nxv16i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %44 = call i16 @llvm.vector.reduce.add.nxv32i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %45 = call i32 @llvm.vector.reduce.add.nxv1i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %46 = call i32 @llvm.vector.reduce.add.nxv2i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = call i32 @llvm.vector.reduce.add.nxv4i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %48 = call i32 @llvm.vector.reduce.add.nxv8i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %49 = call i32 @llvm.vector.reduce.add.nxv16i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %50 = call i64 @llvm.vector.reduce.add.nxv1i64( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = call i64 @llvm.vector.reduce.add.nxv2i64( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = call i64 @llvm.vector.reduce.add.nxv4i64( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %53 = call i64 @llvm.vector.reduce.add.nxv8i64( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef) + call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) + call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) + call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) + call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) + call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) + call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> undef) + call i8 @llvm.vector.reduce.add.v512i8(<512 x i8> undef) + call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) + call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) + call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) + call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) + call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) + call i16 @llvm.vector.reduce.add.v256i16(<256 x i16> undef) + call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) + call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) + call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) + call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) + call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) + call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) + call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) + call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) + call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) + call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) + call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) + call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) + + call i1 @llvm.vector.reduce.add.nxv1i1( undef) + call i1 @llvm.vector.reduce.add.nxv2i1( undef) + call i1 @llvm.vector.reduce.add.nxv4i1( undef) + call i1 @llvm.vector.reduce.add.nxv8i1( undef) + call i1 @llvm.vector.reduce.add.nxv16i1( undef) + call i8 @llvm.vector.reduce.add.nxv1i8( undef) + call i8 @llvm.vector.reduce.add.nxv2i8( undef) + call i8 @llvm.vector.reduce.add.nxv4i8( undef) + call i8 @llvm.vector.reduce.add.nxv8i8( undef) + call i8 @llvm.vector.reduce.add.nxv16i8( undef) + call i8 @llvm.vector.reduce.add.nxv32i8( undef) + call i8 @llvm.vector.reduce.add.nxv64i8( undef) + call i16 @llvm.vector.reduce.add.nxv1i16( undef) + call i16 @llvm.vector.reduce.add.nxv2i16( undef) + call i16 @llvm.vector.reduce.add.nxv4i16( undef) + call i16 @llvm.vector.reduce.add.nxv8i16( undef) + call i16 @llvm.vector.reduce.add.nxv16i16( undef) + call i16 @llvm.vector.reduce.add.nxv32i16( undef) + call i32 @llvm.vector.reduce.add.nxv1i32( undef) + call i32 @llvm.vector.reduce.add.nxv2i32( undef) + call i32 @llvm.vector.reduce.add.nxv4i32( undef) + call i32 @llvm.vector.reduce.add.nxv8i32( undef) + call i32 @llvm.vector.reduce.add.nxv16i32( undef) + call i64 @llvm.vector.reduce.add.nxv1i64( undef) + call i64 @llvm.vector.reduce.add.nxv2i64( undef) + call i64 @llvm.vector.reduce.add.nxv4i64( undef) + call i64 @llvm.vector.reduce.add.nxv8i64( undef) + ret void +} + +declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) +declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) +declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) +declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) +declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) +declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>) +declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>) +declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>) +declare half @llvm.vector.reduce.fadd.v256f16(half, <256 x half>) +declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>) +declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>) +declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>) +declare float @llvm.vector.reduce.fadd.v128f32(float, <128 x float>) +declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>) +declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) +declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) +declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) +declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) +declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>) + +define void @ordered_reduce_fadd() { +; THROUGHPUT-LABEL: 'ordered_reduce_fadd' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %3 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %4 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %5 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %6 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %7 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %8 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1545 for instruction: %9 = call half @llvm.vector.reduce.fadd.v256f16(half 0xH0000, <256 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %10 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %11 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %12 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %13 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %14 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1545 for instruction: %15 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %16 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %17 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %18 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %19 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %20 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %21 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %22 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %23 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %25 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %26 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %27 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %28 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %29 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %30 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %31 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %32 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %33 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %34 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %35 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %36 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %37 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %38 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %39 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'ordered_reduce_fadd' +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call half @llvm.vector.reduce.fadd.v256f16(half 0xH0000, <256 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %16 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %32 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %34 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %35 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %36 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %37 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %38 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) + call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) + call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) + call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) + call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) + call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) + call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) + call half @llvm.vector.reduce.fadd.v256(half 0.0, <256 x half> undef) + call half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, undef) + call half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, undef) + call half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, undef) + call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, undef) + call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, undef) + call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, undef) + call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) + call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) + call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) + call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) + call float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef) + call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) + call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) + call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) + call float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, undef) + call float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, undef) + call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, undef) + call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, undef) + call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, undef) + call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) + call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) + call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) + call double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef) + call double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) + call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) + call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) + call double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) + call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) + call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) + call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) + ret void +} + +define void @fast_reduce_fadd() { +; THROUGHPUT-LABEL: 'fast_reduce_fadd' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %3 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %4 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %5 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %6 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %7 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %8 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %9 = call fast half @llvm.vector.reduce.fadd.v256f16(half 0xH0000, <256 x half> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %10 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %11 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %12 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %13 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %14 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %15 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %16 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %17 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %18 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %19 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %20 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %21 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %22 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %23 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %24 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %25 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %26 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %27 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %28 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %29 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %30 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %31 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %32 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %33 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %34 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %35 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %36 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %37 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %38 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %39 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'fast_reduce_fadd' +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call fast half @llvm.vector.reduce.fadd.v256f16(half 0xH0000, <256 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %16 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %32 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %34 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %35 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %36 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %37 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %38 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) + call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) + call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) + call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) + call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) + call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) + call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) + call fast half @llvm.vector.reduce.fadd.v256(half 0.0, <256 x half> undef) + call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, undef) + call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, undef) + call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, undef) + call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, undef) + call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, undef) + call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, undef) + call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) + call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) + call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) + call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) + call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef) + call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) + call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) + call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) + call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, undef) + call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, undef) + call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, undef) + call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, undef) + call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, undef) + call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) + call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) + call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) + call fast double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef) + call fast double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) + call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) + call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) + call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, undef) + call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, undef) + call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, undef) + call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, undef) + ret void +} + +declare i1 @llvm.vector.reduce.smax.v16i1(<16 x i1>) +declare i1 @llvm.vector.reduce.smax.v32i1(<32 x i1>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) +declare i8 @llvm.vector.reduce.smax.v256i8(<256 x i8>) +declare i8 @llvm.vector.reduce.smax.v512i8(<512 x i8>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smax.v128i16(<128 x i16>) +declare i16 @llvm.vector.reduce.smax.v256i16(<256 x i16>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smax.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.smax.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smax.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.smax.v64i64(<64 x i64>) + +define void @reduce_smax() { +; THROUGHPUT-LABEL: 'reduce_smax' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %2 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %3 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %4 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %5 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %6 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %7 = call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %8 = call i8 @llvm.vector.reduce.smax.v512i8(<512 x i8> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %9 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %10 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %11 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %12 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %13 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %14 = call i16 @llvm.vector.reduce.smax.v256i16(<256 x i16> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %15 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %16 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %17 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %18 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %19 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %20 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %21 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %22 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %23 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %24 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %25 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %26 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %27 = call i1 @llvm.vector.reduce.smax.nxv1i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %28 = call i1 @llvm.vector.reduce.smax.nxv2i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %29 = call i1 @llvm.vector.reduce.smax.nxv4i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %30 = call i1 @llvm.vector.reduce.smax.nxv8i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %31 = call i1 @llvm.vector.reduce.smax.nxv16i1( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %32 = call i8 @llvm.vector.reduce.smax.nxv1i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %33 = call i8 @llvm.vector.reduce.smax.nxv2i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %34 = call i8 @llvm.vector.reduce.smax.nxv4i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %35 = call i8 @llvm.vector.reduce.smax.nxv8i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %36 = call i8 @llvm.vector.reduce.smax.nxv16i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %37 = call i8 @llvm.vector.reduce.smax.nxv32i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %38 = call i8 @llvm.vector.reduce.smax.nxv64i8( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %39 = call i16 @llvm.vector.reduce.smax.nxv1i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %40 = call i16 @llvm.vector.reduce.smax.nxv2i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %41 = call i16 @llvm.vector.reduce.smax.nxv4i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %42 = call i16 @llvm.vector.reduce.smax.nxv8i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %43 = call i16 @llvm.vector.reduce.smax.nxv16i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %44 = call i16 @llvm.vector.reduce.smax.nxv32i16( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %45 = call i32 @llvm.vector.reduce.smax.nxv1i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %46 = call i32 @llvm.vector.reduce.smax.nxv2i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %47 = call i32 @llvm.vector.reduce.smax.nxv4i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %48 = call i32 @llvm.vector.reduce.smax.nxv8i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %49 = call i32 @llvm.vector.reduce.smax.nxv16i32( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %50 = call i64 @llvm.vector.reduce.smax.nxv1i64( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %51 = call i64 @llvm.vector.reduce.smax.nxv2i64( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %52 = call i64 @llvm.vector.reduce.smax.nxv4i64( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %53 = call i64 @llvm.vector.reduce.smax.nxv8i64( undef) +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'reduce_smax' +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call i8 @llvm.vector.reduce.smax.v512i8(<512 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = call i16 @llvm.vector.reduce.smax.v256i16(<256 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %16 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = call i1 @llvm.vector.reduce.smax.nxv1i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call i1 @llvm.vector.reduce.smax.nxv2i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call i1 @llvm.vector.reduce.smax.nxv4i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call i1 @llvm.vector.reduce.smax.nxv8i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = call i1 @llvm.vector.reduce.smax.nxv16i1( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %32 = call i8 @llvm.vector.reduce.smax.nxv1i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = call i8 @llvm.vector.reduce.smax.nxv2i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %34 = call i8 @llvm.vector.reduce.smax.nxv4i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %35 = call i8 @llvm.vector.reduce.smax.nxv8i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %36 = call i8 @llvm.vector.reduce.smax.nxv16i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %37 = call i8 @llvm.vector.reduce.smax.nxv32i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %38 = call i8 @llvm.vector.reduce.smax.nxv64i8( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = call i16 @llvm.vector.reduce.smax.nxv1i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %40 = call i16 @llvm.vector.reduce.smax.nxv2i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = call i16 @llvm.vector.reduce.smax.nxv4i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = call i16 @llvm.vector.reduce.smax.nxv8i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %43 = call i16 @llvm.vector.reduce.smax.nxv16i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %44 = call i16 @llvm.vector.reduce.smax.nxv32i16( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %45 = call i32 @llvm.vector.reduce.smax.nxv1i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %46 = call i32 @llvm.vector.reduce.smax.nxv2i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = call i32 @llvm.vector.reduce.smax.nxv4i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %48 = call i32 @llvm.vector.reduce.smax.nxv8i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %49 = call i32 @llvm.vector.reduce.smax.nxv16i32( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %50 = call i64 @llvm.vector.reduce.smax.nxv1i64( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = call i64 @llvm.vector.reduce.smax.nxv2i64( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = call i64 @llvm.vector.reduce.smax.nxv4i64( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %53 = call i64 @llvm.vector.reduce.smax.nxv8i64( undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef) + call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef) + call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) + call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) + call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) + call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) + call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> undef) + call i8 @llvm.vector.reduce.smax.v512i8(<512 x i8> undef) + call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) + call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) + call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) + call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) + call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) + call i16 @llvm.vector.reduce.smax.v256i16(<256 x i16> undef) + call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) + call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) + call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) + call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) + call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) + call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) + call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) + call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) + call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) + call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) + call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) + call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) + call i1 @llvm.vector.reduce.smax.nxv1i1( undef) + call i1 @llvm.vector.reduce.smax.nxv2i1( undef) + call i1 @llvm.vector.reduce.smax.nxv4i1( undef) + call i1 @llvm.vector.reduce.smax.nxv8i1( undef) + call i1 @llvm.vector.reduce.smax.nxv16i1( undef) + call i8 @llvm.vector.reduce.smax.nxv1i8( undef) + call i8 @llvm.vector.reduce.smax.nxv2i8( undef) + call i8 @llvm.vector.reduce.smax.nxv4i8( undef) + call i8 @llvm.vector.reduce.smax.nxv8i8( undef) + call i8 @llvm.vector.reduce.smax.nxv16i8( undef) + call i8 @llvm.vector.reduce.smax.nxv32i8( undef) + call i8 @llvm.vector.reduce.smax.nxv64i8( undef) + call i16 @llvm.vector.reduce.smax.nxv1i16( undef) + call i16 @llvm.vector.reduce.smax.nxv2i16( undef) + call i16 @llvm.vector.reduce.smax.nxv4i16( undef) + call i16 @llvm.vector.reduce.smax.nxv8i16( undef) + call i16 @llvm.vector.reduce.smax.nxv16i16( undef) + call i16 @llvm.vector.reduce.smax.nxv32i16( undef) + call i32 @llvm.vector.reduce.smax.nxv1i32( undef) + call i32 @llvm.vector.reduce.smax.nxv2i32( undef) + call i32 @llvm.vector.reduce.smax.nxv4i32( undef) + call i32 @llvm.vector.reduce.smax.nxv8i32( undef) + call i32 @llvm.vector.reduce.smax.nxv16i32( undef) + call i64 @llvm.vector.reduce.smax.nxv1i64( undef) + call i64 @llvm.vector.reduce.smax.nxv2i64( undef) + call i64 @llvm.vector.reduce.smax.nxv4i64( undef) + call i64 @llvm.vector.reduce.smax.nxv8i64( undef) + ret void +}