From 9eb0d55a4c485554b88f8473e141b497b36e4494 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung@sifive.com>
Date: Mon, 5 Feb 2024 07:13:42 -0800
Subject: [PATCH 1/3] [RISCV][CostModel] Estimate cost of
 llvm.vector.reduce.fmaximum/fminimum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ‘llvm.vector.reduce.fmaximum/fminimum.*’ intrinsics propagate NaNs.
and if any element of the vector is a NaN.

Following #79402, the patch add the cost of NaN check  (vmfne + vcpop)
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 47 ++++++++++
 .../CostModel/RISCV/reduce-fmaximum.ll        | 91 +++++++++++++------
 .../CostModel/RISCV/reduce-fminimum.ll        | 52 +++++------
 3 files changed, 138 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 8f46fdc2f7ca9..39ef29c6af30c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1001,6 +1001,53 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
       return getArithmeticReductionCost(Instruction::And, Ty, FMF, CostKind);
   }
 
+  if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
+    SmallVector<unsigned, 5> SplitOps;
+    SmallVector<unsigned, 3> Opcodes;
+    InstructionCost ExtraCost = 0;
+    switch (IID) {
+    case Intrinsic::maximum:
+      if (FMF.noNaNs()) {
+        SplitOps = {RISCV::VFMAX_VV};
+        Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
+      } else {
+        SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
+                    RISCV::VMERGE_VVM, RISCV::VFMAX_VV};
+        Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
+                   RISCV::VFMV_F_S};
+        // Cost of Canonical Nan
+        // lui a0, 523264
+        // fmv.w.x fa0, a0
+        ExtraCost = 2;
+      }
+      break;
+
+    case Intrinsic::minimum:
+      if (FMF.noNaNs()) {
+        SplitOps = {RISCV::VFMIN_VV};
+        Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
+      } else {
+        SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
+                    RISCV::VMERGE_VVM, RISCV::VFMIN_VV};
+        Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
+                   RISCV::VFMV_F_S};
+        // Cost of Canonical Nan
+        // lui a0, 523264
+        // fmv.w.x fa0, a0
+        ExtraCost = 2;
+      }
+      break;
+    }
+    // Add a cost for data larger than LMUL8
+    InstructionCost SplitCost =
+        (LT.first > 1)
+            ? (LT.first - 1) *
+                  getRISCVInstructionCost(SplitOps, LT.second, CostKind)
+            : 0;
+    return ExtraCost + SplitCost +
+           getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+  }
+
   // IR Reduction is composed by two vmv and one rvv reduction instruction.
   InstructionCost BaseCost = 2;
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 1618c3833a972..4fd57e06d07b1 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -6,23 +6,37 @@
 
 define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -32,6 +46,13 @@ define float @reduce_fmaximum_f32(float %arg) {
 %V32   = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 %V64   = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 %V128   = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
 ret float undef
 }
 declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
@@ -44,21 +65,33 @@ declare float @llvm.vector.reduce.fmaximum.v128f32(<128 x float>)
 
 define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
@@ -67,6 +100,12 @@ define double @reduce_fmaximum_f64(double %arg) {
 %V16   = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 %V32   = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 %V64   = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
 ret double undef
 }
 declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 35b18645b1f2d..269f2dcd7caac 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -6,23 +6,23 @@
 
 define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -44,21 +44,21 @@ declare float @llvm.vector.reduce.fminimum.v128f32(<128 x float>)
 
 define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)

From 00bd3c20f73e83283580d3c5824a70aa01d2cd1b Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung@sifive.com>
Date: Wed, 20 Mar 2024 10:37:30 -0700
Subject: [PATCH 2/3] Address comments

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 36 +++++++++---------
 .../CostModel/RISCV/reduce-fmaximum.ll        | 38 +++++++++----------
 .../CostModel/RISCV/reduce-fminimum.ll        | 30 +++++++--------
 3 files changed, 51 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 39ef29c6af30c..530a6165d5b5d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1002,50 +1002,48 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
   }
 
   if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
-    SmallVector<unsigned, 5> SplitOps;
     SmallVector<unsigned, 3> Opcodes;
     InstructionCost ExtraCost = 0;
     switch (IID) {
     case Intrinsic::maximum:
       if (FMF.noNaNs()) {
-        SplitOps = {RISCV::VFMAX_VV};
         Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
       } else {
-        SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
-                    RISCV::VMERGE_VVM, RISCV::VFMAX_VV};
         Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
                    RISCV::VFMV_F_S};
-        // Cost of Canonical Nan
+        // Cost of Canonical Nan + branch
         // lui a0, 523264
         // fmv.w.x fa0, a0
-        ExtraCost = 2;
+        Type *DstTy = Ty->getScalarType();
+        const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);
+        Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
+        ExtraCost = 1 +
+                    getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,
+                                     TTI::CastContextHint::None, CostKind) +
+                    getCFInstrCost(Instruction::Br, CostKind);
       }
       break;
 
     case Intrinsic::minimum:
       if (FMF.noNaNs()) {
-        SplitOps = {RISCV::VFMIN_VV};
         Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
       } else {
-        SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
-                    RISCV::VMERGE_VVM, RISCV::VFMIN_VV};
         Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
                    RISCV::VFMV_F_S};
-        // Cost of Canonical Nan
+        // Cost of Canonical Nan + branch
         // lui a0, 523264
         // fmv.w.x fa0, a0
-        ExtraCost = 2;
+        Type *DstTy = Ty->getScalarType();
+        const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);
+        Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
+        ExtraCost = 1 +
+                    getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,
+                                     TTI::CastContextHint::None, CostKind) +
+                    getCFInstrCost(Instruction::Br, CostKind);
       }
       break;
     }
-    // Add a cost for data larger than LMUL8
-    InstructionCost SplitCost =
-        (LT.first > 1)
-            ? (LT.first - 1) *
-                  getRISCVInstructionCost(SplitOps, LT.second, CostKind)
-            : 0;
-    return ExtraCost + SplitCost +
-           getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+    return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
   }
 
   // IR Reduction is composed by two vmv and one rvv reduction instruction.
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 4fd57e06d07b1..f91f13b2d9ec6 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -12,31 +12,31 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -70,28 +70,28 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 269f2dcd7caac..86b84025ad541 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -12,17 +12,17 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -49,16 +49,16 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)

From 145c3bb383481081dcda5b7e11fb9d9668c9d34a Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung@sifive.com>
Date: Thu, 21 Mar 2024 07:32:22 -0700
Subject: [PATCH 3/3] Use getScalarSizeInBits

---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 530a6165d5b5d..f75b3d3caa62f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1015,7 +1015,7 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
         // lui a0, 523264
         // fmv.w.x fa0, a0
         Type *DstTy = Ty->getScalarType();
-        const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);
+        const unsigned EltTyBits = DstTy->getScalarSizeInBits();
         Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
         ExtraCost = 1 +
                     getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,