diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index b73ed208ed74c..0a8565e621030 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -34,35 +34,72 @@ static cl::opt<unsigned> SLPMaxVF(
         "exclusively by SLP vectorizer."),
     cl::Hidden);
 
-InstructionCost
-RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
-                                      TTI::TargetCostKind CostKind) {
-  // Check if the type is valid for all CostKind
-  if (!VT.isVector())
-    return InstructionCost::getInvalid();
-  size_t NumInstr = OpCodes.size();
-  if (CostKind == TTI::TCK_CodeSize)
-    return NumInstr;
+static InstructionCost getRVVBaseCost(unsigned Op, MVT VT,
+                                      const RISCVTTIImpl *TTI,
+                                      const RISCVTargetLowering *TLI) {
+  InstructionCost LMULCost = TLI->getLMULCost(VT);
+  switch (Op) {
+  case RISCV::VRGATHER_VI:
+    return TLI->getVRGatherVICost(VT);
+  case RISCV::VRGATHER_VV:
+    return TLI->getVRGatherVVCost(VT);
+  case RISCV::VSLIDEUP_VI:
+  case RISCV::VSLIDEDOWN_VI:
+    return TLI->getVSlideVICost(VT);
+  case RISCV::VSLIDEUP_VX:
+  case RISCV::VSLIDEDOWN_VX:
+    return TLI->getVSlideVXCost(VT);
+  case RISCV::VREDMAX_VS:
+  case RISCV::VREDMIN_VS:
+  case RISCV::VREDMAXU_VS:
+  case RISCV::VREDMINU_VS:
+  case RISCV::VREDSUM_VS:
+  case RISCV::VREDAND_VS:
+  case RISCV::VREDOR_VS:
+  case RISCV::VREDXOR_VS:
+  case RISCV::VFREDMAX_VS:
+  case RISCV::VFREDMIN_VS:
+  case RISCV::VFREDUSUM_VS: {
+    unsigned VL = VT.getVectorMinNumElements();
+    if (!VT.isFixedLengthVector())
+      VL *= *(TTI->getVScaleForTuning());
+    return Log2_32_Ceil(VL);
+  }
+  case RISCV::VFREDOSUM_VS: {
+    unsigned VL = VT.getVectorMinNumElements();
+    if (!VT.isFixedLengthVector())
+      VL *= *(TTI->getVScaleForTuning());
+    return VL;
+  }
+  case RISCV::VMV_X_S:
+  case RISCV::VMV_S_X:
+  case RISCV::VFMV_F_S:
+  case RISCV::VFMV_S_F:
+  case RISCV::VMOR_MM:
+  case RISCV::VMXOR_MM:
+  case RISCV::VMAND_MM:
+  case RISCV::VMANDN_MM:
+  case RISCV::VMNAND_MM:
+  case RISCV::VCPOP_M:
+  case RISCV::VFIRST_M:
+    return 1;
+  default:
+    return LMULCost;
+  }
+}
+
+static InstructionCost getSiFiveX280RVVCost(ArrayRef<unsigned> OpCodes, MVT VT,
+                                            TTI::TargetCostKind CostKind,
+                                            const RISCVTTIImpl *TTI,
+                                            const RISCVTargetLowering *TLI) {
   InstructionCost LMULCost = TLI->getLMULCost(VT);
+  size_t NumInstr = OpCodes.size();
   if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency))
     return LMULCost * NumInstr;
   InstructionCost Cost = 0;
+  unsigned VScale = 8;
   for (auto Op : OpCodes) {
     switch (Op) {
-    case RISCV::VRGATHER_VI:
-      Cost += TLI->getVRGatherVICost(VT);
-      break;
-    case RISCV::VRGATHER_VV:
-      Cost += TLI->getVRGatherVVCost(VT);
-      break;
-    case RISCV::VSLIDEUP_VI:
-    case RISCV::VSLIDEDOWN_VI:
-      Cost += TLI->getVSlideVICost(VT);
-      break;
-    case RISCV::VSLIDEUP_VX:
-    case RISCV::VSLIDEDOWN_VX:
-      Cost += TLI->getVSlideVXCost(VT);
-      break;
     case RISCV::VREDMAX_VS:
     case RISCV::VREDMIN_VS:
     case RISCV::VREDMAXU_VS:
@@ -76,37 +113,63 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
     case RISCV::VFREDUSUM_VS: {
       unsigned VL = VT.getVectorMinNumElements();
       if (!VT.isFixedLengthVector())
-        VL *= *getVScaleForTuning();
-      Cost += Log2_32_Ceil(VL);
+        VL *= VScale;
+      // For the cases with small VL, we use a lookup table for accurate
+      // cost estimation.
+      unsigned LookUpSiFive7ReduceLatency[] = {0,  20, 27, 32, 34,
+                                               38, 40, 41, 42};
+      if (VL <= 32) {
+        Cost += LookUpSiFive7ReduceLatency[divideCeil(VL, 4)];
+        break;
+      }
+      Cost += 6 + 7 * Log2_32_Ceil(VL);
       break;
     }
     case RISCV::VFREDOSUM_VS: {
       unsigned VL = VT.getVectorMinNumElements();
       if (!VT.isFixedLengthVector())
-        VL *= *getVScaleForTuning();
-      Cost += VL;
+        VL *= VScale;
+      Cost += VL * 6;
       break;
     }
     case RISCV::VMV_X_S:
-    case RISCV::VMV_S_X:
     case RISCV::VFMV_F_S:
-    case RISCV::VFMV_S_F:
-    case RISCV::VMOR_MM:
-    case RISCV::VMXOR_MM:
-    case RISCV::VMAND_MM:
-    case RISCV::VMANDN_MM:
-    case RISCV::VMNAND_MM:
     case RISCV::VCPOP_M:
     case RISCV::VFIRST_M:
-      Cost += 1;
+      /* Vector-to-scalar communication */
+      Cost += 8;
       break;
     default:
-      Cost += LMULCost;
+      Cost += getRVVBaseCost(Op, VT, TTI, TLI);
+      break;
     }
   }
   return Cost;
 }
 
+InstructionCost
+RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
+                                      TTI::TargetCostKind CostKind) {
+  // Check if the type is valid for all CostKind
+  if (!VT.isVector())
+    return InstructionCost::getInvalid();
+  size_t NumInstr = OpCodes.size();
+  if (CostKind == TTI::TCK_CodeSize)
+    return NumInstr;
+
+  if (ST->getProcFamily() == RISCVSubtarget::SiFive7)
+    return getSiFiveX280RVVCost(OpCodes, VT, CostKind, this, TLI);
+
+  InstructionCost LMULCost = TLI->getLMULCost(VT);
+  if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency))
+    return LMULCost * NumInstr;
+  InstructionCost Cost = 0;
+  for (auto Op : OpCodes)
+    Cost += getRVVBaseCost(Op, VT, this, TLI);
+
+  return Cost;
+}
+
 InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                                             TTI::TargetCostKind CostKind) {
   assert(Ty->isIntegerTy() &&
diff --git a/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll b/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll
new file mode 100644
index 0000000000000..01632f9ab8064
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll
@@ -0,0 +1,675 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv64 -mcpu=sifive-x280 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=THROUGHPUT
+; RUN: opt < %s -mtriple=riscv64 -mcpu=sifive-x280 -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+
+declare i1 @llvm.vector.reduce.add.v16i1(<16 x i1>)
+declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.add.v256i8(<256 x i8>)
+declare i8 @llvm.vector.reduce.add.v512i8(<512 x i8>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>)
+declare i16 @llvm.vector.reduce.add.v256i16(<256 x i16>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.add.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>)
+
+define void @reduce_add() {
+; THROUGHPUT-LABEL: 'reduce_add'
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %1 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %2 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %4 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %5 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %6 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %7 = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %8 = call i8 @llvm.vector.reduce.add.v512i8(<512 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %9 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %10 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %11 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %12 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %13 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %14 = call i16 @llvm.vector.reduce.add.v256i16(<256 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %16 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %17 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %18 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %19 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %20 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %21 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %22 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %23 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %24 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %25 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %26 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = call i1 @llvm.vector.reduce.add.nxv1i1(<vscale x 1 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %28 = call i1 @llvm.vector.reduce.add.nxv2i1(<vscale x 2 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = call i1 @llvm.vector.reduce.add.nxv4i1(<vscale x 4 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %30 = call i1 @llvm.vector.reduce.add.nxv8i1(<vscale x 8 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %31 = call i1 @llvm.vector.reduce.add.nxv16i1(<vscale x 16 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %32 = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %33 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %34 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %35 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %36 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %37 = call i8 @llvm.vector.reduce.add.nxv32i8(<vscale x 32 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %38 = call i8 @llvm.vector.reduce.add.nxv64i8(<vscale x 64 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %39 = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %40 = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %41 = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %42 = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %43 = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %44 = call i16 @llvm.vector.reduce.add.nxv32i16(<vscale x 32 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %45 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %46 = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %47 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %48 = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %49 = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %50 = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %51 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %52 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %53 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_add'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call i8 @llvm.vector.reduce.add.v512i8(<512 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %11 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = call i16 @llvm.vector.reduce.add.v256i16(<256 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %16 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %17 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = call i1 @llvm.vector.reduce.add.nxv1i1(<vscale x 1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call i1 @llvm.vector.reduce.add.nxv2i1(<vscale x 2 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = call i1 @llvm.vector.reduce.add.nxv4i1(<vscale x 4 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = call i1 @llvm.vector.reduce.add.nxv8i1(<vscale x 8 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call i1 @llvm.vector.reduce.add.nxv16i1(<vscale x 16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %32 = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %34 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %36 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %37 = call i8 @llvm.vector.reduce.add.nxv32i8(<vscale x 32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %38 = call i8 @llvm.vector.reduce.add.nxv64i8(<vscale x 64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %40 = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %42 = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %43 = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = call i16 @llvm.vector.reduce.add.nxv32i16(<vscale x 32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %45 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %46 = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %47 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %48 = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %50 = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %51 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %53 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+  call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+  call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+  call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+  call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+  call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+  call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> undef)
+  call i8 @llvm.vector.reduce.add.v512i8(<512 x i8> undef)
+  call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+  call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+  call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+  call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+  call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+  call i16 @llvm.vector.reduce.add.v256i16(<256 x i16> undef)
+  call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+  call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+  call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+  call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+  call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+  call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+  call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+  call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+  call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+  call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+  call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+  call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+
+  call i1 @llvm.vector.reduce.add.nxv1i1(<vscale x 1 x i1> undef)
+  call i1 @llvm.vector.reduce.add.nxv2i1(<vscale x 2 x i1> undef)
+  call i1 @llvm.vector.reduce.add.nxv4i1(<vscale x 4 x i1> undef)
+  call i1 @llvm.vector.reduce.add.nxv8i1(<vscale x 8 x i1> undef)
+  call i1 @llvm.vector.reduce.add.nxv16i1(<vscale x 16 x i1> undef)
+  call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> undef)
+  call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
+  call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
+  call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
+  call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
+  call i8 @llvm.vector.reduce.add.nxv32i8(<vscale x 32 x i8> undef)
+  call i8 @llvm.vector.reduce.add.nxv64i8(<vscale x 64 x i8> undef)
+  call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> undef)
+  call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> undef)
+  call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> undef)
+  call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> undef)
+  call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> undef)
+  call i16 @llvm.vector.reduce.add.nxv32i16(<vscale x 32 x i16> undef)
+  call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
+  call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> undef)
+  call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> undef)
+  call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef)
+  call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> undef)
+  call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> undef)
+  call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
+  call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
+  call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
+  ret void
+}
+
+declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
+declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
+declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
+declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
+declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
+declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>)
+declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>)
+declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>)
+declare half @llvm.vector.reduce.fadd.v256f16(half, <256 x half>)
+declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>)
+declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
+declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>)
+declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>)
+declare float @llvm.vector.reduce.fadd.v128f32(float, <128 x float>)
+declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
+declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
+declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
+declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
+declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>)
+declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>)
+
+define void @ordered_reduce_fadd() {
+; THROUGHPUT-LABEL: 'ordered_reduce_fadd'
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %3 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %4 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 105 for instruction: %5 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %6 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %7 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 777 for instruction: %8 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1545 for instruction: %9 = call half @llvm.vector.reduce.fadd.v256f16(half 0xH0000, <256 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %10 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 105 for instruction: %11 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %12 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %13 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 777 for instruction: %14 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1545 for instruction: %15 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %16 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %17 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %18 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %19 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 105 for instruction: %20 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %21 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %22 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 777 for instruction: %23 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 105 for instruction: %25 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %26 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %27 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 777 for instruction: %28 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %29 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %30 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %31 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %32 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 105 for instruction: %33 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %34 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %35 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 105 for instruction: %36 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %37 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %38 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 777 for instruction: %39 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'ordered_reduce_fadd'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call half @llvm.vector.reduce.fadd.v256f16(half 0xH0000, <256 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %11 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %16 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %17 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %27 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %32 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %34 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %36 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %37 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %38 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %39 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
+  call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+  call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+  call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+  call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+  call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef)
+  call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef)
+  call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef)
+  call half @llvm.vector.reduce.fadd.v256(half 0.0, <256 x half> undef)
+  call half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef)
+  call half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef)
+  call half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef)
+  call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef)
+  call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef)
+  call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+  call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
+  call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
+  call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
+  call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
+  call float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef)
+  call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef)
+  call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef)
+  call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef)
+  call float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef)
+  call float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef)
+  call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef)
+  call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef)
+  call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+  call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
+  call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
+  call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
+  call double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef)
+  call double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef)
+  call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
+  call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
+  call double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef)
+  call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
+  call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
+  call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef)
+  ret void
+}
+
+define void @fast_reduce_fadd() {
+; THROUGHPUT-LABEL: 'fast_reduce_fadd'
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %3 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %4 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %5 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %6 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %7 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %8 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %9 = call fast half @llvm.vector.reduce.fadd.v256f16(half 0xH0000, <256 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %10 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %11 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %12 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %13 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %14 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %15 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %16 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %17 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %18 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %19 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %20 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %21 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %22 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %23 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %24 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %25 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %26 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %27 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %28 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %29 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %30 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %31 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %32 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %33 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %34 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %35 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %36 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %37 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %38 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %39 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'fast_reduce_fadd'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call fast half @llvm.vector.reduce.fadd.v256f16(half 0xH0000, <256 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %11 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %16 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %17 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %27 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %32 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %34 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %36 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %37 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %38 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.v256(half 0.0, <256 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef)
+  call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+  call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef)
+  call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+  call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef)
+  call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+  ret void
+}
+
+declare i1 @llvm.vector.reduce.smax.v16i1(<16 x i1>)
+declare i1 @llvm.vector.reduce.smax.v32i1(<32 x i1>)
+declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.smax.v256i8(<256 x i8>)
+declare i8 @llvm.vector.reduce.smax.v512i8(<512 x i8>)
+declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.smax.v128i16(<128 x i16>)
+declare i16 @llvm.vector.reduce.smax.v256i16(<256 x i16>)
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.smax.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.smax.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.smax.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.smax.v64i64(<64 x i64>)
+
+define void @reduce_smax() {
+; THROUGHPUT-LABEL: 'reduce_smax'
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %2 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %3 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %4 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %5 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %6 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %7 = call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %8 = call i8 @llvm.vector.reduce.smax.v512i8(<512 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %9 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %10 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %11 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %12 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %13 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %14 = call i16 @llvm.vector.reduce.smax.v256i16(<256 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %15 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %16 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %17 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %18 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %19 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %20 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %21 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %22 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %23 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %24 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %25 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %26 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %27 = call i1 @llvm.vector.reduce.smax.nxv1i1(<vscale x 1 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %28 = call i1 @llvm.vector.reduce.smax.nxv2i1(<vscale x 2 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %29 = call i1 @llvm.vector.reduce.smax.nxv4i1(<vscale x 4 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %30 = call i1 @llvm.vector.reduce.smax.nxv8i1(<vscale x 8 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %31 = call i1 @llvm.vector.reduce.smax.nxv16i1(<vscale x 16 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %32 = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %33 = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %34 = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %35 = call i8 @llvm.vector.reduce.smax.nxv8i8(<vscale x 8 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %36 = call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %37 = call i8 @llvm.vector.reduce.smax.nxv32i8(<vscale x 32 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %38 = call i8 @llvm.vector.reduce.smax.nxv64i8(<vscale x 64 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %39 = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %40 = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %41 = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %42 = call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %43 = call i16 @llvm.vector.reduce.smax.nxv16i16(<vscale x 16 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %44 = call i16 @llvm.vector.reduce.smax.nxv32i16(<vscale x 32 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %45 = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %46 = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %47 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %48 = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %49 = call i32 @llvm.vector.reduce.smax.nxv16i32(<vscale x 16 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %50 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %51 = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %52 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %53 = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_smax'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call i8 @llvm.vector.reduce.smax.v512i8(<512 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %11 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = call i16 @llvm.vector.reduce.smax.v256i16(<256 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %16 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %17 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %27 = call i1 @llvm.vector.reduce.smax.nxv1i1(<vscale x 1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = call i1 @llvm.vector.reduce.smax.nxv2i1(<vscale x 2 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call i1 @llvm.vector.reduce.smax.nxv4i1(<vscale x 4 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = call i1 @llvm.vector.reduce.smax.nxv8i1(<vscale x 8 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = call i1 @llvm.vector.reduce.smax.nxv16i1(<vscale x 16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %32 = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %34 = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = call i8 @llvm.vector.reduce.smax.nxv8i8(<vscale x 8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %36 = call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %37 = call i8 @llvm.vector.reduce.smax.nxv32i8(<vscale x 32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %38 = call i8 @llvm.vector.reduce.smax.nxv64i8(<vscale x 64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %40 = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %42 = call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %43 = call i16 @llvm.vector.reduce.smax.nxv16i16(<vscale x 16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = call i16 @llvm.vector.reduce.smax.nxv32i16(<vscale x 32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %45 = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %46 = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %47 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %48 = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = call i32 @llvm.vector.reduce.smax.nxv16i32(<vscale x 16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %50 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %51 = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %53 = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef)
+  call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
+  call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.v512i8(<512 x i8> undef)
+  call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.v256i16(<256 x i16> undef)
+  call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+  call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+  call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+  call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+  call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+  call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+  call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+  call i1 @llvm.vector.reduce.smax.nxv1i1(<vscale x 1 x i1> undef)
+  call i1 @llvm.vector.reduce.smax.nxv2i1(<vscale x 2 x i1> undef)
+  call i1 @llvm.vector.reduce.smax.nxv4i1(<vscale x 4 x i1> undef)
+  call i1 @llvm.vector.reduce.smax.nxv8i1(<vscale x 8 x i1> undef)
+  call i1 @llvm.vector.reduce.smax.nxv16i1(<vscale x 16 x i1> undef)
+  call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.nxv8i8(<vscale x 8 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.nxv32i8(<vscale x 32 x i8> undef)
+  call i8 @llvm.vector.reduce.smax.nxv64i8(<vscale x 64 x i8> undef)
+  call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.nxv16i16(<vscale x 16 x i16> undef)
+  call i16 @llvm.vector.reduce.smax.nxv32i16(<vscale x 32 x i16> undef)
+  call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> undef)
+  call i32 @llvm.vector.reduce.smax.nxv16i32(<vscale x 16 x i32> undef)
+  call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef)
+  call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> undef)
+  call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> undef)
+  call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> undef)
+  ret void
+}