diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 3813769a73dc7..d54384c25c0d5 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -452,6 +452,11 @@ def FeatureUnalignedScalarMem "true", "Has reasonably performant unaligned scalar " "loads and stores">; +def TuneNoOptimizedZeroStrideLoad + : SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad", + "false", "Hasn't optimized (perform fewer memory operations)" + "zero-stride vector load">; + def TuneLUIADDIFusion : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion", "true", "Enable LUI+ADDI macrofusion">; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 6e3a2a38b81e5..16a0ca4c49560 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1790,6 +1790,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { case RISCVISD::VFMV_S_F_VL: case RISCVISD::VMV_V_X_VL: case RISCVISD::VFMV_V_F_VL: { + // Only if we have optimized zero-stride vector load. + if (!Subtarget->hasOptimizedZeroStrideLoad()) + break; + // Try to match splat of a scalar load to a strided load with stride of x0. bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || Node->getOpcode() == RISCVISD::VFMV_S_F_VL; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 456dc00999aba..f79f9b4bdd4e7 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -101,6 +101,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasShortForwardBranchOpt = false; bool HasLUIADDIFusion = false; bool HasForcedAtomics = false; + bool HasOptimizedZeroStrideLoad = true; unsigned XLen = 32; unsigned ZvlLen = 0; MVT XLenVT = MVT::i32; @@ -199,6 +200,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; } bool hasLUIADDIFusion() const { return HasLUIADDIFusion; } bool hasForcedAtomics() const { return HasForcedAtomics; } + bool hasOptimizedZeroStrideLoad() const { return HasOptimizedZeroStrideLoad; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } unsigned getFLen() const { diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll index 81fc7329be162..879d06cfee1f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+experimental-zvfh,+v -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefixes=CHECK,OPTIMIZED ; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+experimental-zvfh,+v -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefixes=CHECK,OPTIMIZED +; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+experimental-zvfh,+v,+no-optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+experimental-zvfh,+v,+no-optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED define @vsplat_nxv8f16(half %f) { ; CHECK-LABEL: vsplat_nxv8f16: @@ -72,11 +76,18 @@ define @vsplat_zero_nxv8f64() { ; Test that we fold this to a vlse with 0 stride. define @vsplat_load_nxv8f32(float* %ptr) { -; CHECK-LABEL: vsplat_load_nxv8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vlse32.v v8, (a0), zero -; CHECK-NEXT: ret +; OPTIMIZED-LABEL: vsplat_load_nxv8f32: +; OPTIMIZED: # %bb.0: +; OPTIMIZED-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; OPTIMIZED-NEXT: vlse32.v v8, (a0), zero +; OPTIMIZED-NEXT: ret +; +; NOT-OPTIMIZED-LABEL: vsplat_load_nxv8f32: +; NOT-OPTIMIZED: # %bb.0: +; NOT-OPTIMIZED-NEXT: flw ft0, 0(a0) +; NOT-OPTIMIZED-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; NOT-OPTIMIZED-NEXT: vfmv.v.f v8, ft0 +; NOT-OPTIMIZED-NEXT: ret %f = load float, float* %ptr %head = insertelement poison, float %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer