[RISCV] Fix missing scaling by LMUL in cost model #73342

simeonkr · 2023-11-24T15:34:49Z

Ensure that we consistently scale by LMUL, whenever it makes sense to do so. Some context: https://discourse.llvm.org/t/role-of-lmul-in-cost-model/75197

llvmbot · 2023-11-24T15:35:18Z

@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-risc-v

Author: Simeon K (simeonkr)

Changes

Ensure that we consistently scale by LMUL, whenever it makes sense to do so.

Patch is 23.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73342.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+17-16)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll (+10-13)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+53-44)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 3a2f2f39cd1c9b0..92f394df60cba94 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -438,7 +438,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
     InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second);
     // Mask operation additionally required extend and truncate
     InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
-    return LT.first * (LenCost + GatherCost + ExtendCost);
+    return LT.first * TLI->getLMULCost(LT.second) *
+           (LenCost + GatherCost + ExtendCost);
   }
   }
   return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
@@ -1082,7 +1083,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     // These all use the same code.
     auto LT = getTypeLegalizationCost(RetTy);
     if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
-      return LT.first * 8;
+      return LT.first * TLI->getLMULCost(LT.second) * 8;
     break;
   }
   case Intrinsic::umin:
@@ -1092,7 +1093,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     auto LT = getTypeLegalizationCost(RetTy);
     if ((ST->hasVInstructions() && LT.second.isVector()) ||
         (LT.second.isScalarInteger() && ST->hasStdExtZbb()))
-      return LT.first;
+      return LT.first * TLI->getLMULCost(LT.second);
     break;
   }
   case Intrinsic::sadd_sat:
@@ -1103,7 +1104,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   case Intrinsic::sqrt: {
     auto LT = getTypeLegalizationCost(RetTy);
     if (ST->hasVInstructions() && LT.second.isVector())
-      return LT.first;
+      return LT.first * TLI->getLMULCost(LT.second);
     break;
   }
   case Intrinsic::ctpop: {
@@ -1117,7 +1118,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     if (ST->hasVInstructions() && LT.second.isVector()) {
       // vrsub.vi v10, v8, 0
       // vmax.vv v8, v8, v10
-      return LT.first * 2;
+      return LT.first * TLI->getLMULCost(LT.second) * 2;
     }
     break;
   }
@@ -1125,14 +1126,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   case Intrinsic::experimental_stepvector: {
     unsigned Cost = 1; // vid
     auto LT = getTypeLegalizationCost(RetTy);
-    return Cost + (LT.first - 1);
+    return Cost + (LT.first - 1) * TLI->getLMULCost(LT.second);
   }
   case Intrinsic::vp_rint: {
     // RISC-V target uses at least 5 instructions to lower rounding intrinsics.
     unsigned Cost = 5;
     auto LT = getTypeLegalizationCost(RetTy);
     if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
-      return Cost * LT.first;
+      return Cost * LT.first * TLI->getLMULCost(LT.second);
     break;
   }
   case Intrinsic::vp_nearbyint: {
@@ -1140,7 +1141,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     unsigned Cost = 7;
     auto LT = getTypeLegalizationCost(RetTy);
     if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
-      return Cost * LT.first;
+      return Cost * LT.first * TLI->getLMULCost(LT.second);
     break;
   }
   case Intrinsic::vp_ceil:
@@ -1154,7 +1155,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     auto LT = getTypeLegalizationCost(RetTy);
     unsigned VPISD = getISDForVPIntrinsicID(ICA.getID());
     if (TLI->isOperationCustom(VPISD, LT.second))
-      return Cost * LT.first;
+      return Cost * LT.first * TLI->getLMULCost(LT.second);
     break;
   }
   }
@@ -1163,7 +1164,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     auto LT = getTypeLegalizationCost(RetTy);
     if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,
                                             ICA.getID(), LT.second))
-      return LT.first * Entry->Cost;
+      return LT.first * TLI->getLMULCost(LT.second) * Entry->Cost;
   }
 
   return BaseT::getIntrinsicInstrCost(ICA, CostKind);
@@ -1417,10 +1418,10 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
         // vmandn.mm v8, v8, v9
         // vmand.mm v9, v0, v9
         // vmor.mm v0, v9, v8
-        return LT.first * 3;
+        return LT.first * TLI->getLMULCost(LT.second) * 3;
       }
       // vselect and max/min are supported natively.
-      return LT.first * 1;
+      return LT.first * TLI->getLMULCost(LT.second) * 1;
     }
 
     if (ValTy->getScalarSizeInBits() == 1) {
@@ -1429,13 +1430,13 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
       //  vmandn.mm v8, v8, v9
       //  vmand.mm v9, v0, v9
       //  vmor.mm v0, v9, v8
-      return LT.first * 5;
+      return LT.first * TLI->getLMULCost(LT.second) * 5;
     }
 
     // vmv.v.x v10, a0
     // vmsne.vi v0, v10, 0
     // vmerge.vvm v8, v9, v8, v0
-    return LT.first * 3;
+    return LT.first * TLI->getLMULCost(LT.second) * 3;
   }
 
   if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
@@ -1444,7 +1445,7 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 
     // Support natively.
     if (CmpInst::isIntPredicate(VecPred))
-      return LT.first * 1;
+      return LT.first * TLI->getLMULCost(LT.second) * 1;
 
     // If we do not support the input floating point vector type, use the base
     // one which will calculate as:
@@ -1463,7 +1464,7 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     case CmpInst::FCMP_OLT:
     case CmpInst::FCMP_OLE:
     case CmpInst::FCMP_UNE:
-      return LT.first * 1;
+      return LT.first * TLI->getLMULCost(LT.second) * 1;
     // TODO: Other comparisons?
     default:
       break;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
index c49f7d4a5d5f13d..2a7626ab0a78075 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
@@ -102,10 +102,10 @@ define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) {
 ; CHECK-LABEL: @uniform_store_i1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 64
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[N_VEC]], 8
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP1]]
@@ -116,17 +116,14 @@ define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) {
 ; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> <i64 0, i64 8, i64 16, i64 24, i64 32, i64 40, i64 48, i64 56, i64 64, i64 72, i64 80, i64 88, i64 96, i64 104, i64 112, i64 120, i64 128, i64 136, i64 144, i64 152, i64 160, i64 168, i64 176, i64 184, i64 192, i64 200, i64 208, i64 216, i64 224, i64 232, i64 240, i64 248>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> <i64 256, i64 264, i64 272, i64 280, i64 288, i64 296, i64 304, i64 312, i64 320, i64 328, i64 336, i64 344, i64 352, i64 360, i64 368, i64 376, i64 384, i64 392, i64 400, i64 408, i64 416, i64 424, i64 432, i64 440, i64 448, i64 456, i64 464, i64 472, i64 480, i64 488, i64 496, i64 504>
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP2]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP3]], i64 1
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <32 x ptr> [[TMP4]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <32 x ptr> [[TMP5]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <32 x i1> [[TMP7]], i32 31
-; CHECK-NEXT:    store i1 [[TMP8]], ptr [[DST:%.*]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 512
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <32 x ptr> [[TMP3]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <32 x i1> [[TMP4]], i32 31
+; CHECK-NEXT:    store i1 [[TMP5]], ptr [[DST:%.*]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 256
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index cad64f5c7e2beef..8c0f31a52e64795 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -36,10 +36,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 20 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 20 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
@@ -51,31 +51,35 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
-; CHECK-NEXT:  Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
+; CHECK-NEXT:  Live-in vp<%0> = vector-trip-count
 ; CHECK-NEXT:  vp<%1> = original trip-count
-; CHECK:       ph:
+; CHECK-EMPTY:
+; CHECK-NEXT:  ph:
 ; CHECK-NEXT:    EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64)
 ; CHECK-NEXT:  No successors
-; CHECK:       vector.ph:
+; CHECK-EMPTY:
+; CHECK-NEXT:  vector.ph:
 ; CHECK-NEXT:  Successor(s): vector loop
-; CHECK:       <x1> vector loop: {
+; CHECK-EMPTY:
+; CHECK-NEXT:  <x1> vector loop: {
 ; CHECK-NEXT:    vector.body:
-; CHECK-NEXT:    EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
-; CHECK-NEXT:    vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
-; CHECK-NEXT:    vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1>
-; CHECK-NEXT:    CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
-; CHECK-NEXT:    CLONE ir<%idxprom> = zext ir<%i.0>
-; CHECK-NEXT:    CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT:    WIDEN ir<%1> = load ir<%arrayidx>
-; CHECK-NEXT:    WIDEN ir<%add9> = add ir<%1>, ir<1>
-; CHECK-NEXT:    CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:    WIDEN store ir<%arrayidx3>, ir<%add9>
-; CHECK-NEXT:    EMIT vp<[[IV_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]>
-; CHECK-NEXT:    EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VEC_TC]]>
+; CHECK-NEXT:      EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; CHECK-NEXT:      vp<%3> = DERIVED-IV ir<%n> + vp<%2> * ir<-1>
+; CHECK-NEXT:      vp<%4> = SCALAR-STEPS vp<%3>, ir<-1>
+; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<%4>, ir<-1>
+; CHECK-NEXT:      CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT:      CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
+; CHECK-NEXT:      WIDEN ir<%1> = load ir<%arrayidx>
+; CHECK-NEXT:      WIDEN ir<%add9> = add ir<%1>, ir<1>
+; CHECK-NEXT:      CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
+; CHECK-NEXT:      WIDEN store ir<%arrayidx3>, ir<%add9>
+; CHECK-NEXT:      EMIT vp<%11> = VF * UF + nuw vp<%2>
+; CHECK-NEXT:      EMIT branch-on-count vp<%11>, vp<%0>
 ; CHECK-NEXT:    No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Successor(s): middle.block
-; CHECK:       middle.block:
+; CHECK-EMPTY:
+; CHECK-NEXT:  middle.block:
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
@@ -83,10 +87,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 20 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 20 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
@@ -109,7 +113,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 ; CHECK-NEXT:  LV: The target has 31 registers of RISCV::GPRRC register class
 ; CHECK-NEXT:  LV: The target has 32 registers of RISCV::VRRC register class
-; CHECK-NEXT:  LV: Loop cost is 28
+; CHECK-NEXT:  LV: Loop cost is 46
 ; CHECK-NEXT:  LV: IC is 1
 ; CHECK-NEXT:  LV: VF is vscale x 4
 ; CHECK-NEXT:  LV: Not Interleaving.
@@ -119,6 +123,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
 ; CHECK-NEXT:  LV: Vectorizing: innermost loop.
+; CHECK-EMPTY:
 ;
 entry:
   %cmp7 = icmp sgt i32 %n, 0
@@ -173,10 +178,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 20 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 20 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
@@ -188,31 +193,35 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
-; CHECK-NEXT:  Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
+; CHECK-NEXT:  Live-in vp<%0> = vector-trip-count
 ; CHECK-NEXT:  vp<%1> = original trip-count
-; CHECK:       ph:
+; CHECK-EMPTY:
+; CHECK-NEXT:  ph:
 ; CHECK-NEXT:    EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64)
 ; CHECK-NEXT:  No successors
-; CHECK:       vector.ph:
+; CHECK-EMPTY:
+; CHECK-NEXT:  vector.ph:
 ; CHECK-NEXT:  Successor(s): vector loop
-; CHECK:       <x1> vector loop: {
+; CHECK-EMPTY:
+; CHECK-NEXT:  <x1> vector loop: {
 ; CHECK-NEXT:    vector.body:
-; CHECK-NEXT:    EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
-; CHECK-NEXT:    vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
-; CHECK-NEXT:    vp<[[STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1>
-; CHECK-NEXT:    CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
-; CHECK-NEXT:    CLONE ir<%idxprom> = zext ir<%i.0>
-; CHECK-NEXT:    CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT:    WIDEN ir<%1> = load ir<%arrayidx>
-; CHECK-NEXT:    WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00>
-; CHECK-NEXT:    CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:    WIDEN store ir<%arrayidx3>, ir<%conv1>
-; CHECK-NEXT:    EMIT vp<[[IV_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]>
-; CHECK-NEXT:    EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VEC_TC]]>
+; CHECK-NEXT:      EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; CHECK-NEXT:      vp<%3> = DERIVED-IV ir<%n> + vp<%2> * ir<-1>
+; CHECK-NEXT:      vp<%4> = SCALAR-STEPS vp<%3>, ir<-1>
+; CHECK-NEXT:    ...
[truncated]

lukel97 · 2023-11-29T05:19:55Z

From the llvm test suite, this reduces the VF in a couple of places, e.g. in SingleSource/Regression/C/gcc-c-torture/execute/loop-2d.c:

--- build.head/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-loop-2d.dir/loop-2d.s   2023-11-27 04:14:09.994315098 +0000
+++ build/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-loop-2d.dir/loop-2d.s        2023-11-27 04:14:04.982461178 +0000
@@ -12,7 +12,7 @@
 .Lpcrel_hi0:
        auipc   a1, %pcrel_hi(a)
        addi    a2, a1, %pcrel_lo(.Lpcrel_hi0)
-       li      a1, 16
+       li      a1, 8
        add     a3, a2, a3
        bgeu    a0, a1, .LBB0_3
 # %bb.2:
@@ -21,38 +21,35 @@
 .LBB0_3:                                # %vector.ph
        slli    a4, a0, 32
        srli    a4, a4, 32
-       andi    a5, a4, -16
+       andi    a5, a4, -8
        slli    a1, a5, 2
        sub     a3, a3, a1
        subw    a1, a0, a5
-       vsetivli        zero, 8, e32, m2, ta, ma
-       vid.v   v8
-       vrsub.vi        v8, v8, 0
+       vsetivli        zero, 4, e32, m1, ta, ma
+       vid.v   v9
+       vrsub.vi        v8, v9, 0
        vadd.vx v8, v8, a0
        slli    a0, a0, 2
        add     a0, a0, a2
-       addi    a0, a0, -64
+       addi    a0, a0, -32
        li      a6, 3
        addi    a7, a2, -3
-       addi    t0, a2, -27
-       vsetvli zero, zero, e16, m1, ta, ma
-       vid.v   v10
-       vrsub.vi        v10, v10, 7
+       addi    t0, a2, -15
+       vrsub.vi        v9, v9, 3
        mv      t1, a5
 .LBB0_4:                                # %vector.body
                                         # =>This Inner Loop Header: Depth=1
-       vsetvli zero, zero, e32, m2, ta, ma
-       vmul.vx v12, v8, a6
-       vadd.vx v14, v12, a7
-       vadd.vx v12, v12, t0
-       vrgatherei16.vv v16, v14, v10
-       addi    t2, a0, 32
-       vse32.v v16, (t2)
-       vrgatherei16.vv v14, v12, v10
-       vse32.v v14, (a0)
-       vadd.vi v8, v8, -16
-       addi    t1, t1, -16
-       addi    a0, a0, -64
+       vmul.vx v10, v8, a6
+       vadd.vx v11, v10, a7
+       vadd.vx v10, v10, t0
+       vrgather.vv     v12, v11, v9
+       addi    t2, a0, 16
+       vse32.v v12, (t2)
+       vrgather.vv     v11, v10, v9
+       vse32.v v11, (a0)
+       vadd.vi v8, v8, -8
+       addi    t1, t1, -8
+       addi    a0, a0, -32
        bnez    t1, .LBB0_4
 # %bb.5:                                # %middle.block
        beq     a5, a4, .LBB0_8

I presume in this specific case its coming from experimental_stepvector being more expensive now.

(As an aside, should we be able to PRE on that vsetvli zero, zero, e32, m2, ta, ma in the loop body?)

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

ShivaChen · 2023-12-08T07:51:37Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

@@ -1082,7 +1083,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    // These all use the same code.
    auto LT = getTypeLegalizationCost(RetTy);
    if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
-      return LT.first * 8;
+      return LT.first * TLI->getLMULCost(LT.second) * 8;


Should only consider LMUL for vector type?

Thanks for catching this.

ShivaChen · 2023-12-08T07:52:29Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

@@ -1092,7 +1093,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    auto LT = getTypeLegalizationCost(RetTy);
    if ((ST->hasVInstructions() && LT.second.isVector()) ||
        (LT.second.isScalarInteger() && ST->hasStdExtZbb()))


Should only consider LMUL for vector type?

Change to

if (ST->hasVInstructions() && LT.second.isVector()) return LT.first * TLI->getLMULCost(LT.second); else if (LT.second.isScalarInteger() && ST->hasStdExtZbb()) return LT.first;

ShivaChen · 2023-12-08T07:53:40Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

@@ -438,7 +438,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
    InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second);
    // Mask operation additionally required extend and truncate
    InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
-    return LT.first * (LenCost + GatherCost + ExtendCost);
+    return LT.first * TLI->getLMULCost(LT.second) *
+           (LenCost + GatherCost + ExtendCost);


It seems GatherCost has consider LMUL.

You're right. I'm assuming ExtendCost still needs to depend on LMUL, though?

simeonkr · 2023-12-15T16:50:38Z

Ping. @lukel97 I think the regression you pointed out to me should have disappeared after the latest changes.

lukel97 · 2024-01-02T04:56:59Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

@@ -1154,7 +1157,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    auto LT = getTypeLegalizationCost(RetTy);
    unsigned VPISD = getISDForVPIntrinsicID(ICA.getID());
    if (TLI->isOperationCustom(VPISD, LT.second))
-      return Cost * LT.first;
+      return Cost * LT.first * TLI->getLMULCost(LT.second);


Looks like only 5 of the 7 costed instructions are vector instructions here, we should ignore LMUL for those FRM swaps:

declare <vscale x 2 x float> @llvm.vp.roundtozero.nxv2f32 (<vscale x 2 x float>, <vscale x 2 x i1>, i32) define <vscale x 2 x float> @f(<vscale x 2 x float> %x, <vscale x 2 x i1> %mask, i32 %avl) { %1 = call <vscale x 2 x float> @llvm.vp.roundtozero.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %mask, i32 %avl) ret <vscale x 2 x float> %1 }

f: # @f .cfi_startproc # %bb.0: slli a0, a0, 32 srli a0, a0, 32 vsetvli zero, a0, e32, m1, ta, ma vfabs.v v9, v8, v0.t # costed lui a0, 307200 fmv.w.x fa5, a0 vsetvli zero, zero, e32, m1, ta, mu vmflt.vf v0, v9, fa5, v0.t # costed vsetvli zero, zero, e32, m1, ta, ma fsrmi a0, 1 # costed, but LMUL independent vfcvt.x.f.v v9, v8, v0.t # costed fsrm a0 # costed, but LMUL independent vfcvt.f.x.v v9, v9, v0.t # costed vsetvli zero, zero, e32, m1, ta, mu vfsgnj.vv v8, v9, v8, v0.t # costed ret

Thanks, I think it's correct now.

lukel97 · 2024-01-02T05:01:14Z

Ping. @lukel97 I think the regression you pointed out to me should have disappeared after the latest changes.

Thanks. I also ran this on TSCV2 with -O3 -march=rv64gv, and there doesn't seem to be any changes between this and 50c298f

simeonkr · 2024-01-18T09:45:46Z

Ping

lukel97

LGTM. I should mention that this prevents some loops being vectorized in the llvm-test-suite (-march=rv64gv -O2). But this is probably what we want, since if we use LMUL=2 in the loop vectorizer, then we were costing certain operations as half of what they should be. I've attached the diff below
upstream-scale-lmul.diff.txt

lukel97 · 2024-01-18T12:40:13Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

    }
    break;
  }
  // TODO: add more intrinsic
  case Intrinsic::experimental_stepvector: {
    unsigned Cost = 1; // vid
    auto LT = getTypeLegalizationCost(RetTy);
-    return Cost + (LT.first - 1);
+    return (Cost + (LT.first - 1)) * TLI->getLMULCost(LT.second);


Side note not related to this PR, Isn't this just

Suggested change

return (Cost + (LT.first - 1)) * TLI->getLMULCost(LT.second);

return LT.first * TLI->getLMULCost(LT.second);

preames

Do we really have no cost-model testing for any of the places you're changing? If so, you should really fix that before landing this change.

preames · 2024-01-18T15:52:17Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

@@ -1163,7 +1165,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    auto LT = getTypeLegalizationCost(RetTy);
    if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,
                                            ICA.getID(), LT.second))
-      return LT.first * Entry->Cost;
+      return LT.first * TLI->getLMULCost(LT.second) * Entry->Cost;


This line is incorrect. The LMUL is already considered in the entry in the cost table.

The entries in the cost table seem to be the same across various LMULs though, am I missing something here?
E.g. nxv1i64 is LMUL1, nxv2i64 is LMUL2 etc.

{Intrinsic::bswap, MVT::nxv1i64, 31}, {Intrinsic::bswap, MVT::nxv2i64, 31}, {Intrinsic::bswap, MVT::nxv4i64, 31}, {Intrinsic::bswap, MVT::nxv8i64, 31},

I guess we can't exactly tell what percentage of instructions that make up the cost actually need to be scaled by LMUL. Would it be safe to say though that most of them are vector instructions that read and write full vector groups?

The cost table may be wrong and need updating, but the structure already accounts for LMUL scaling. We could also rethink the table (i.e. have a table for linearly scaled cases, and one for all others), but that's different from the way this patch approaches it.

(Edit: Update race, I was replying to Luke's earlier comment despite the way the web interface threads it.)

p.s. For forward progress, I would suggest this patch just drop the intrinsic table case and we handle that in a separate PR.

Thinking about this further. The actual values in the table aren't LMUL sensitive. It does depend on element type (i.e. SEW). I went ahead and modified the table (8bf624a) to make that explicit.

(Edit - Deleted part of the comment as my mistake was in the revised reasoning, but it doesn't change the overall conclusion)

Given that, LGTM to the change as whole.

Ensure that we consistently scale by LMUL, whenever it makes sense to do so.

lukel97

Apropos of what @preames said, I think these a good chunk of these changes are actually covered by the cost model tests, they just need updated. Here's the list from the premerge CI:

  LLVM :: Analysis/CostModel/RISCV/abs.ll
  LLVM :: Analysis/CostModel/RISCV/active_lane_mask.ll
  LLVM :: Analysis/CostModel/RISCV/fp-min-max-abs.ll
  LLVM :: Analysis/CostModel/RISCV/fp-sqrt-pow.ll
  LLVM :: Analysis/CostModel/RISCV/fround.ll
  LLVM :: Analysis/CostModel/RISCV/int-bit-manip.ll
  LLVM :: Analysis/CostModel/RISCV/int-min-max.ll
  LLVM :: Analysis/CostModel/RISCV/int-sat-math.ll
  LLVM :: Analysis/CostModel/RISCV/rvv-cmp.ll
  LLVM :: Analysis/CostModel/RISCV/rvv-intrinsics.ll
  LLVM :: Analysis/CostModel/RISCV/rvv-select.ll
  LLVM :: Analysis/CostModel/RISCV/rvv-shuffle.ll
  LLVM :: Analysis/CostModel/RISCV/stepvector.ll
  LLVM :: Transforms/SLPVectorizer/RISCV/ctpop.ll

This is to add test coverage for a change in #73342

lukel97

Thanks, the changes in the cost model tests look good to me. Just one minor thing in the SLP test, otherwise LGTM

llvm/test/Transforms/SLPVectorizer/RISCV/ctpop.ll

lukel97 · 2024-01-26T12:17:31Z

llvm/test/Transforms/SLPVectorizer/RISCV/ctpop.ll

+; RV64-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 3
+; RV64-NEXT:    [[TMP4:%.*]] = call i64 @llvm.ctpop.i64(i64 [[VECEXT_3]])
+; RV64-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
+; RV64-NEXT:    ret <4 x i64> [[VECINS_3]]


Not for this patch, but we're overcosting the intrinsics in the table now since not all of the instructions need to be scaled by LMUL. E.g. here:

define <4 x i64> @ctpop_v4i64(<4 x i64> %v) { %w = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %v) ret <4 x i64> %w }

gives us

vsetivli zero, 4, e64, m2, ta, ma vsrl.vi v10, v8, 1 lui a0, 349525 addiw a0, a0, 1365 slli a1, a0, 32 add a0, a0, a1 vand.vx v10, v10, a0 vsub.vv v8, v8, v10 lui a0, 209715 addiw a0, a0, 819 slli a1, a0, 32 add a0, a0, a1 vand.vx v10, v8, a0 vsrl.vi v8, v8, 2 vand.vx v8, v8, a0 vadd.vv v8, v10, v8 vsrl.vi v10, v8, 4 vadd.vv v8, v8, v10 lui a0, 61681 addiw a0, a0, -241 slli a1, a0, 32 add a0, a0, a1 vand.vx v8, v8, a0 lui a0, 4112 addiw a0, a0, 257 slli a1, a0, 32 add a0, a0, a1 vmul.vx v8, v8, a0 li a0, 56 vsrl.vx v8, v8, a0

Only 12 of the instructions actually need to be multiplied by LMUL. We should probably look at using getRISCVInstructionCost to model such sequences of mixed scalar+vector instructions better. cc @arcbbb

llvm/test/Transforms/SLPVectorizer/RISCV/ctpop.ll

lukel97

Reverse ping/request for other reviews?

lukel97 · 2024-04-04T17:20:26Z

@arcbbb is part of this now covered by your recent work?

arcbbb · 2024-04-05T07:12:56Z

@arcbbb is part of this now covered by your recent work?

Thanks for mentioning me in this thread!
The intrinsic stepvector and smin/smax/umin/umax are covered in #87301 and #87245.
And my next target would be to implement getCmpSelInstrCost with getRISCVInstructionCost.

llvmbot added backend:RISC-V llvm:transforms labels Nov 24, 2023

artagnon requested review from fhahn, preames and skachkov-sc November 27, 2023 10:03

topperc reviewed Nov 29, 2023

View reviewed changes

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

ShivaChen reviewed Dec 8, 2023

View reviewed changes

lukel97 reviewed Jan 2, 2024

View reviewed changes

simeonkr force-pushed the upstream-scale-lmul branch from 62a8956 to 223e5ae Compare January 3, 2024 14:07

lukel97 approved these changes Jan 18, 2024

View reviewed changes

preames reviewed Jan 18, 2024

View reviewed changes

Simeon Krastnikov added 6 commits January 19, 2024 16:50

[RISCV] Fix missing scaling by LMUL in cost model

3a3d02a

Ensure that we consistently scale by LMUL, whenever it makes sense to do so.

fixup! [RISCV] Fix missing scaling by LMUL in cost model

2b466ca

fixup! [RISCV] Fix missing scaling by LMUL in cost model

50f94b6

fixup! [RISCV] Fix missing scaling by LMUL in cost model

7775999

fixup! [RISCV] Fix missing scaling by LMUL in cost model

204d328

fixup! [RISCV] Fix missing scaling by LMUL in cost model

c77cd41

simeonkr force-pushed the upstream-scale-lmul branch from 223e5ae to c77cd41 Compare January 19, 2024 16:57

lukel97 requested changes Jan 24, 2024

View reviewed changes

lukel97 added a commit that referenced this pull request Jan 24, 2024

[RISCV] Add tests for reverse shuffles of i1 vectors. NFC

f3b495f

This is to add test coverage for a change in #73342

fixup! [RISCV] Fix missing scaling by LMUL in cost model

5fcf221

llvmbot added the llvm:analysis label Jan 25, 2024

lukel97 approved these changes Jan 26, 2024

View reviewed changes

fixup! [RISCV] Fix missing scaling by LMUL in cost model

1cd6c55

lukel97 reviewed Jan 26, 2024

View reviewed changes

llvm/test/Transforms/SLPVectorizer/RISCV/ctpop.ll Outdated Show resolved Hide resolved

fixup! [RISCV] Fix missing scaling by LMUL in cost model

a5519e6

lukel97 approved these changes Feb 19, 2024

View reviewed changes

simeonkr requested a review from ShivaChen February 20, 2024 20:26

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Fix missing scaling by LMUL in cost model #73342

[RISCV] Fix missing scaling by LMUL in cost model #73342

simeonkr commented Nov 24, 2023 •

edited

llvmbot commented Nov 24, 2023 •

edited

lukel97 commented Nov 29, 2023

ShivaChen Dec 8, 2023

simeonkr Dec 8, 2023

ShivaChen Dec 8, 2023

ShivaChen Dec 11, 2023

ShivaChen Dec 8, 2023

simeonkr Dec 8, 2023

simeonkr commented Dec 15, 2023

lukel97 Jan 2, 2024

simeonkr Jan 3, 2024

lukel97 commented Jan 2, 2024

simeonkr commented Jan 18, 2024

lukel97 left a comment •

edited

lukel97 Jan 18, 2024

preames left a comment

preames Jan 18, 2024

lukel97 Jan 18, 2024

lukel97 Jan 18, 2024

preames Jan 18, 2024 •

edited

preames Jan 18, 2024

preames Jan 19, 2024 •

edited

lukel97 left a comment

lukel97 left a comment

lukel97 Jan 26, 2024

lukel97 left a comment

lukel97 commented Apr 4, 2024

arcbbb commented Apr 5, 2024

	return (Cost + (LT.first - 1)) * TLI->getLMULCost(LT.second);
	return LT.first * TLI->getLMULCost(LT.second);

[RISCV] Fix missing scaling by LMUL in cost model #73342

Are you sure you want to change the base?

[RISCV] Fix missing scaling by LMUL in cost model #73342

Conversation

simeonkr commented Nov 24, 2023 • edited

llvmbot commented Nov 24, 2023 • edited

lukel97 commented Nov 29, 2023

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

simeonkr commented Dec 15, 2023

Choose a reason for hiding this comment

Choose a reason for hiding this comment

lukel97 commented Jan 2, 2024

simeonkr commented Jan 18, 2024

lukel97 left a comment • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

preames left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

preames Jan 18, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

preames Jan 19, 2024 • edited

Choose a reason for hiding this comment

lukel97 left a comment

Choose a reason for hiding this comment

lukel97 left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

lukel97 left a comment

Choose a reason for hiding this comment

lukel97 commented Apr 4, 2024

arcbbb commented Apr 5, 2024

simeonkr commented Nov 24, 2023 •

edited

llvmbot commented Nov 24, 2023 •

edited

lukel97 left a comment •

edited

preames Jan 18, 2024 •

edited

preames Jan 19, 2024 •

edited