From eef4161ce6d8f398b9d1b6a6b801b82766ecc693 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Fri, 28 Nov 2025 01:40:26 -0800 Subject: [PATCH 1/2] [TTI][RISCV] Add cost modelling for intrinsic vp.load.ff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is a rework of #160470 (which was reverted). With getMemIntrinsicCost() now available, we can re‑land the change and reduce vp_load_ff boilerplate. --- .../llvm/Analysis/TargetTransformInfo.h | 3 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 13 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 +++ llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 + .../Target/RISCV/RISCVTargetTransformInfo.cpp | 18 +++ .../Target/RISCV/RISCVTargetTransformInfo.h | 4 + .../Analysis/CostModel/RISCV/vp-intrinsics.ll | 130 +++++++++--------- 7 files changed, 121 insertions(+), 67 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index e24e22da5681b..99525607f744a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -157,7 +157,8 @@ class MemIntrinsicCostAttributes { Alignment(Alignment) {} LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy, - Align Alignment, unsigned AddressSpace) + Align Alignment, + unsigned AddressSpace = 0) : DataTy(DataTy), IID(Id), AddressSpace(AddressSpace), Alignment(Alignment) {} diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index b1beb68feca46..4d9cfea5d1bab 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1817,7 +1817,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } } } - + if (ICA.getID() == Intrinsic::vp_load_ff) { + Type *RetTy = ICA.getReturnType(); + Type *DataTy = cast(RetTy)->getElementType(0); + Align Alignment; + if (auto *VPI = dyn_cast_or_null(ICA.getInst())) + Alignment = VPI->getPointerAlignment().valueOrOne(); + return thisT()->getMemIntrinsicInstrCost( + MemIntrinsicCostAttributes(ICA.getID(), DataTy, Alignment), + CostKind); + } if (ICA.getID() == Intrinsic::vp_scatter) { if (ICA.isTypeBasedOnly()) { IntrinsicCostAttributes MaskedScatter( @@ -3076,6 +3085,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::masked_compressstore: case Intrinsic::masked_expandload: return thisT()->getExpandCompressMemoryOpCost(MICA, CostKind); + case Intrinsic::vp_load_ff: + return InstructionCost::getInvalid(); default: llvm_unreachable("unexpected intrinsic"); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index be53f51afe79f..4550e40166525 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -25364,6 +25364,22 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, return true; } +bool RISCVTargetLowering::isLegalFirstFaultLoad(EVT DataType, + Align Alignment) const { + if (!Subtarget.hasVInstructions()) + return false; + + EVT ScalarType = DataType.getScalarType(); + if (!isLegalElementTypeForRVV(ScalarType)) + return false; + + if (!Subtarget.enableUnalignedVectorMem() && + Alignment < ScalarType.getStoreSize()) + return false; + + return true; +} + MachineInstr * RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 9b46936f195e6..69fcada6494a2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -429,6 +429,10 @@ class RISCVTargetLowering : public TargetLowering { /// alignment is legal. bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const; + /// Return true if a fault-only-first load of the given result type and + /// alignment is legal. + bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const; + unsigned getMaxSupportedInterleaveFactor() const override { return 8; } bool fallBackToDAGISel(const Instruction &Inst) const override; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 4788a428d7e64..1cc7efb0ba135 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1007,6 +1007,24 @@ InstructionCost RISCVTTIImpl::getScalarizationOverhead( return Cost; } +InstructionCost +RISCVTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, + TTI::TargetCostKind CostKind) const { + Type *DataTy = MICA.getDataType(); + Align Alignment = MICA.getAlignment(); + switch (MICA.getID()) { + case Intrinsic::vp_load_ff: { + EVT DataTypeVT = TLI->getValueType(DL, DataTy); + if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment)) + return BaseT::getMemIntrinsicInstrCost(MICA, CostKind); + + return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, nullptr); + } + } + return BaseT::getMemIntrinsicInstrCost(MICA, CostKind); +} + InstructionCost RISCVTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 5efa330b3ad71..2a73fe6255382 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -143,6 +143,10 @@ class RISCVTTIImpl final : public BasicTTIImplBase { bool shouldConsiderVectorizationRegPressure() const override { return true; } + InstructionCost + getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, + TTI::TargetCostKind CostKind) const override; + InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override; diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index 71746caf35f2e..ba792d8f0955b 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -836,74 +836,74 @@ define void @abs() { ret void } -define void @load() { +define void @load(ptr %src) { ; CHECK-LABEL: 'load' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call @llvm.vp.load.nxv2i8.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = load , ptr undef, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call @llvm.vp.load.nxv4i8.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = load , ptr undef, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call @llvm.vp.load.nxv8i8.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t22 = load , ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call @llvm.vp.load.nxv16i8.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = load , ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call @llvm.vp.load.nxv2i64.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = load , ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call @llvm.vp.load.nxv4i64.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = load , ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call @llvm.vp.load.nxv8i64.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = load , ptr undef, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call @llvm.vp.load.nxv16i64.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = load , ptr undef, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call @llvm.vp.load.nxv2i8.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call @llvm.vp.load.nxv4i8.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = call @llvm.vp.load.nxv8i8.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t22 = call @llvm.vp.load.nxv16i8.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = call @llvm.vp.load.nxv2i64.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = call @llvm.vp.load.nxv4i64.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = call @llvm.vp.load.nxv8i64.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = call @llvm.vp.load.nxv16i64.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call { , i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef) - %t1 = load <2 x i8>, ptr undef - %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef) - %t3 = load <4 x i8>, ptr undef - %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef) - %t5 = load <8 x i8>, ptr undef - %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef) - %t7 = load <16 x i8>, ptr undef - %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef) - %t9 = load <2 x i64>, ptr undef - %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef) - %t12 = load <4 x i64>, ptr undef - %t13 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef) - %t14 = load <8 x i64>, ptr undef - %t15 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef) - %t16 = load <16 x i64>, ptr undef - %t17 = call @llvm.vp.load.nxv2i8(ptr undef, undef, i32 undef) - %t18 = load , ptr undef - %t19 = call @llvm.vp.load.nxv4i8(ptr undef, undef, i32 undef) - %t20 = load , ptr undef - %t21 = call @llvm.vp.load.nxv8i8(ptr undef, undef, i32 undef) - %t22 = load , ptr undef - %t23 = call @llvm.vp.load.nxv16i8(ptr undef, undef, i32 undef) - %t24 = load , ptr undef - %t25 = call @llvm.vp.load.nxv2i64(ptr undef, undef, i32 undef) - %t26 = load , ptr undef - %t27 = call @llvm.vp.load.nxv4i64(ptr undef, undef, i32 undef) - %t28 = load , ptr undef - %t29 = call @llvm.vp.load.nxv8i64(ptr undef, undef, i32 undef) - %t30 = load , ptr undef - %t31 = call @llvm.vp.load.nxv16i64(ptr undef, undef, i32 undef) - %t32 = load , ptr undef + %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr %src, <2 x i1> undef, i32 undef) + %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef) + %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr %src, <4 x i1> undef, i32 undef) + %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef) + %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr %src, <8 x i1> undef, i32 undef) + %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef) + %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr %src, <16 x i1> undef, i32 undef) + %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef) + %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr %src, <2 x i1> undef, i32 undef) + %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef) + %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr %src, <4 x i1> undef, i32 undef) + %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef) + %t12 = call <8 x i64> @llvm.vp.load.v8i64(ptr %src, <8 x i1> undef, i32 undef) + %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef) + %t14 = call <16 x i64> @llvm.vp.load.v16i64(ptr %src, <16 x i1> undef, i32 undef) + %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef) + %t16 = call @llvm.vp.load.nxv2i8(ptr %src, undef, i32 undef) + %t17 = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, undef, i32 undef) + %t18 = call @llvm.vp.load.nxv4i8(ptr %src, undef, i32 undef) + %t19 = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, undef, i32 undef) + %t20 = call @llvm.vp.load.nxv8i8(ptr %src, undef, i32 undef) + %t21 = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, undef, i32 undef) + %t22 = call @llvm.vp.load.nxv16i8(ptr %src, undef, i32 undef) + %t23 = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, undef, i32 undef) + %t24 = call @llvm.vp.load.nxv2i64(ptr %src, undef, i32 undef) + %t25 = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, undef, i32 undef) + %t26 = call @llvm.vp.load.nxv4i64(ptr %src, undef, i32 undef) + %t27 = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, undef, i32 undef) + %t28 = call @llvm.vp.load.nxv8i64(ptr %src, undef, i32 undef) + %t29 = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, undef, i32 undef) + %t30 = call @llvm.vp.load.nxv16i64(ptr %src, undef, i32 undef) + %t31 = call { , i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, undef, i32 undef) ret void } From 7d4a1defc14ecbbd08b9de78861b1ecff6d3a9a4 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Sun, 30 Nov 2025 07:00:46 -0800 Subject: [PATCH 2/2] Remove hard-coded address space --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 1cc7efb0ba135..e697b9810e824 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1018,7 +1018,8 @@ RISCVTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment)) return BaseT::getMemIntrinsicInstrCost(MICA, CostKind); - return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind, + unsigned AS = MICA.getAddressSpace(); + return getMemoryOpCost(Instruction::Load, DataTy, Alignment, AS, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, nullptr); } }