From 9eb0d55a4c485554b88f8473e141b497b36e4494 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Mon, 5 Feb 2024 07:13:42 -0800 Subject: [PATCH 1/3] [RISCV][CostModel] Estimate cost of llvm.vector.reduce.fmaximum/fminimum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ‘llvm.vector.reduce.fmaximum/fminimum.*’ intrinsics propagate NaNs. and if any element of the vector is a NaN. Following #79402, the patch add the cost of NaN check (vmfne + vcpop) --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 47 ++++++++++ .../CostModel/RISCV/reduce-fmaximum.ll | 91 +++++++++++++------ .../CostModel/RISCV/reduce-fminimum.ll | 52 +++++------ 3 files changed, 138 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 8f46fdc2f7ca9..39ef29c6af30c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1001,6 +1001,53 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, return getArithmeticReductionCost(Instruction::And, Ty, FMF, CostKind); } + if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) { + SmallVector SplitOps; + SmallVector Opcodes; + InstructionCost ExtraCost = 0; + switch (IID) { + case Intrinsic::maximum: + if (FMF.noNaNs()) { + SplitOps = {RISCV::VFMAX_VV}; + Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S}; + } else { + SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV, + RISCV::VMERGE_VVM, RISCV::VFMAX_VV}; + Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS, + RISCV::VFMV_F_S}; + // Cost of Canonical Nan + // lui a0, 523264 + // fmv.w.x fa0, a0 + ExtraCost = 2; + } + break; + + case Intrinsic::minimum: + if (FMF.noNaNs()) { + SplitOps = {RISCV::VFMIN_VV}; + Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S}; + } else { + SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV, + RISCV::VMERGE_VVM, RISCV::VFMIN_VV}; + Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS, + RISCV::VFMV_F_S}; + // Cost of Canonical Nan + // lui a0, 523264 + // fmv.w.x fa0, a0 + ExtraCost = 2; + } + break; + } + // Add a cost for data larger than LMUL8 + InstructionCost SplitCost = + (LT.first > 1) + ? (LT.first - 1) * + getRISCVInstructionCost(SplitOps, LT.second, CostKind) + : 0; + return ExtraCost + SplitCost + + getRISCVInstructionCost(Opcodes, LT.second, CostKind); + } + // IR Reduction is composed by two vmv and one rvv reduction instruction. InstructionCost BaseCost = 2; diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll index 1618c3833a972..4fd57e06d07b1 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll @@ -6,23 +6,37 @@ define float @reduce_fmaximum_f32(float %arg) { ; CHECK-LABEL: 'reduce_fmaximum_f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef ; ; SIZE-LABEL: 'reduce_fmaximum_f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef ; %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) @@ -32,6 +46,13 @@ define float @reduce_fmaximum_f32(float %arg) { %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) +call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) +call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) +call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) +call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) +call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) +call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) ret float undef } declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>) @@ -44,21 +65,33 @@ declare float @llvm.vector.reduce.fmaximum.v128f32(<128 x float>) define double @reduce_fmaximum_f64(double %arg) { ; CHECK-LABEL: 'reduce_fmaximum_f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef ; ; SIZE-LABEL: 'reduce_fmaximum_f64' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef ; %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) @@ -67,6 +100,12 @@ define double @reduce_fmaximum_f64(double %arg) { %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) +call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) +call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) +call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) +call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) +call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) ret double undef } declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll index 35b18645b1f2d..269f2dcd7caac 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll @@ -6,23 +6,23 @@ define float @reduce_fmaximum_f32(float %arg) { ; CHECK-LABEL: 'reduce_fmaximum_f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef ; ; SIZE-LABEL: 'reduce_fmaximum_f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef ; %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) @@ -44,21 +44,21 @@ declare float @llvm.vector.reduce.fminimum.v128f32(<128 x float>) define double @reduce_fmaximum_f64(double %arg) { ; CHECK-LABEL: 'reduce_fmaximum_f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef ; ; SIZE-LABEL: 'reduce_fmaximum_f64' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef ; %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) From 00bd3c20f73e83283580d3c5824a70aa01d2cd1b Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Wed, 20 Mar 2024 10:37:30 -0700 Subject: [PATCH 2/3] Address comments --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 36 +++++++++--------- .../CostModel/RISCV/reduce-fmaximum.ll | 38 +++++++++---------- .../CostModel/RISCV/reduce-fminimum.ll | 30 +++++++-------- 3 files changed, 51 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 39ef29c6af30c..530a6165d5b5d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1002,50 +1002,48 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, } if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) { - SmallVector SplitOps; SmallVector Opcodes; InstructionCost ExtraCost = 0; switch (IID) { case Intrinsic::maximum: if (FMF.noNaNs()) { - SplitOps = {RISCV::VFMAX_VV}; Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S}; } else { - SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV, - RISCV::VMERGE_VVM, RISCV::VFMAX_VV}; Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S}; - // Cost of Canonical Nan + // Cost of Canonical Nan + branch // lui a0, 523264 // fmv.w.x fa0, a0 - ExtraCost = 2; + Type *DstTy = Ty->getScalarType(); + const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy); + Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits); + ExtraCost = 1 + + getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy, + TTI::CastContextHint::None, CostKind) + + getCFInstrCost(Instruction::Br, CostKind); } break; case Intrinsic::minimum: if (FMF.noNaNs()) { - SplitOps = {RISCV::VFMIN_VV}; Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S}; } else { - SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV, - RISCV::VMERGE_VVM, RISCV::VFMIN_VV}; Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S}; - // Cost of Canonical Nan + // Cost of Canonical Nan + branch // lui a0, 523264 // fmv.w.x fa0, a0 - ExtraCost = 2; + Type *DstTy = Ty->getScalarType(); + const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy); + Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits); + ExtraCost = 1 + + getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy, + TTI::CastContextHint::None, CostKind) + + getCFInstrCost(Instruction::Br, CostKind); } break; } - // Add a cost for data larger than LMUL8 - InstructionCost SplitCost = - (LT.first > 1) - ? (LT.first - 1) * - getRISCVInstructionCost(SplitOps, LT.second, CostKind) - : 0; - return ExtraCost + SplitCost + - getRISCVInstructionCost(Opcodes, LT.second, CostKind); + return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind); } // IR Reduction is composed by two vmv and one rvv reduction instruction. diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll index 4fd57e06d07b1..f91f13b2d9ec6 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll @@ -12,31 +12,31 @@ define float @reduce_fmaximum_f32(float %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef ; ; SIZE-LABEL: 'reduce_fmaximum_f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef ; %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) @@ -70,28 +70,28 @@ define double @reduce_fmaximum_f64(double %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef ; ; SIZE-LABEL: 'reduce_fmaximum_f64' -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef ; %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll index 269f2dcd7caac..86b84025ad541 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll @@ -12,17 +12,17 @@ define float @reduce_fmaximum_f32(float %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef ; ; SIZE-LABEL: 'reduce_fmaximum_f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef ; %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) @@ -49,16 +49,16 @@ define double @reduce_fmaximum_f64(double %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef ; ; SIZE-LABEL: 'reduce_fmaximum_f64' -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef ; %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) From 145c3bb383481081dcda5b7e11fb9d9668c9d34a Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Thu, 21 Mar 2024 07:32:22 -0700 Subject: [PATCH 3/3] Use getScalarSizeInBits --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 530a6165d5b5d..f75b3d3caa62f 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1015,7 +1015,7 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, // lui a0, 523264 // fmv.w.x fa0, a0 Type *DstTy = Ty->getScalarType(); - const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy); + const unsigned EltTyBits = DstTy->getScalarSizeInBits(); Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits); ExtraCost = 1 + getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,