From 9225aa95ba8e05a60d1edd0b5e5310c5e7dc552b Mon Sep 17 00:00:00 2001 From: Shaoce SUN Date: Sun, 12 Oct 2025 00:37:22 +0800 Subject: [PATCH 1/7] pre-commit --- .../RISCV/GlobalISel/store-fp-zero-to-x0.ll | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll new file mode 100644 index 0000000000000..4939fe11c5394 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV64 + +define void @zero_f32(ptr %i) { +; RV32-LABEL: zero_f32: +; RV32: # %bb.0: # %entry +; RV32-NEXT: fmv.w.x fa5, zero +; RV32-NEXT: fsw fa5, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_f32: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.w.x fa5, zero +; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: ret +entry: + store float 0.000000e+00, ptr %i, align 4 + ret void +} + + +define void @zero_f64(ptr %i) { +; RV32-LABEL: zero_f64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a1, %hi(.LCPI1_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI1_0) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a1, 4(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_f64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.d.x fa5, zero +; RV64-NEXT: fsd fa5, 0(a0) +; RV64-NEXT: ret +entry: + store double 0.000000e+00, ptr %i, align 8 + ret void +} From 9b25edd6384d722e1980fff61d51c0999d7b9d62 Mon Sep 17 00:00:00 2001 From: Shaoce SUN Date: Sun, 12 Oct 2025 00:39:15 +0800 Subject: [PATCH 2/7] [RISCV][GISel] Fold `G_FCONSTANT` 0.0 store into `sw x0` --- .../Target/RISCV/GISel/RISCVInstructionSelector.cpp | 13 +++++++++++++ .../CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll | 9 +++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 53633eac3d2c3..54050242b1854 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -1049,6 +1049,19 @@ void RISCVInstructionSelector::preISelLower(MachineInstr &MI, MRI->setType(DstReg, sXLen); break; } + case TargetOpcode::G_STORE: { + Register SrcReg = MI.getOperand(0).getReg(); + MachineInstr *Def = MRI->getVRegDef(SrcReg); + if (Def && Def->getOpcode() == TargetOpcode::G_FCONSTANT) { + if (Def->getOperand(1).getFPImm()->getValueAPF().isPosZero()) { + MI.getOperand(0).setReg(RISCV::X0); + + if (MRI->use_nodbg_empty(SrcReg)) + Def->eraseFromParent(); + } + } + break; + } } } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll index 4939fe11c5394..d9a6e4b5ec53a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll @@ -7,14 +7,12 @@ define void @zero_f32(ptr %i) { ; RV32-LABEL: zero_f32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x fa5, zero -; RV32-NEXT: fsw fa5, 0(a0) +; RV32-NEXT: sw zero, 0(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: zero_f32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x fa5, zero -; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: sw zero, 0(a0) ; RV64-NEXT: ret entry: store float 0.000000e+00, ptr %i, align 4 @@ -35,8 +33,7 @@ define void @zero_f64(ptr %i) { ; ; RV64-LABEL: zero_f64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.d.x fa5, zero -; RV64-NEXT: fsd fa5, 0(a0) +; RV64-NEXT: sd zero, 0(a0) ; RV64-NEXT: ret entry: store double 0.000000e+00, ptr %i, align 8 From 74e65af91c4ce352b22faebc8030b46e00d325e4 Mon Sep 17 00:00:00 2001 From: Shaoce SUN Date: Sun, 12 Oct 2025 18:15:26 +0800 Subject: [PATCH 3/7] only add tests --- .../RISCV/GISel/RISCVInstructionSelector.cpp | 13 -- .../RISCV/GlobalISel/store-fp-zero-to-x0.ll | 189 +++++++++++++++++- 2 files changed, 180 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 54050242b1854..53633eac3d2c3 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -1049,19 +1049,6 @@ void RISCVInstructionSelector::preISelLower(MachineInstr &MI, MRI->setType(DstReg, sXLen); break; } - case TargetOpcode::G_STORE: { - Register SrcReg = MI.getOperand(0).getReg(); - MachineInstr *Def = MRI->getVRegDef(SrcReg); - if (Def && Def->getOpcode() == TargetOpcode::G_FCONSTANT) { - if (Def->getOperand(1).getFPImm()->getValueAPF().isPosZero()) { - MI.getOperand(0).setReg(RISCV::X0); - - if (MRI->use_nodbg_empty(SrcReg)) - Def->eraseFromParent(); - } - } - break; - } } } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll index d9a6e4b5ec53a..37a026ef06821 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll @@ -1,21 +1,57 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \ +; RUN: llc -global-isel -mtriple=riscv32 -global-isel -mattr=+f,+zfh < %s \ ; RUN: | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: llc -global-isel -mtriple=riscv64 -global-isel -mattr=+d,+zfh < %s \ ; RUN: | FileCheck %s --check-prefix=RV64 +define void @zero_f16(ptr %i) { +; RV32-LABEL: zero_f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: fmv.h.x fa5, zero +; RV32-NEXT: fsh fa5, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.h.x fa5, zero +; RV64-NEXT: fsh fa5, 0(a0) +; RV64-NEXT: ret +entry: + store half 0.0, ptr %i, align 4 + ret void +} + +define void @zero_bf16(ptr %i) { +; RV32-LABEL: zero_bf16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: fmv.h.x fa5, zero +; RV32-NEXT: fsh fa5, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_bf16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.h.x fa5, zero +; RV64-NEXT: fsh fa5, 0(a0) +; RV64-NEXT: ret +entry: + store bfloat 0.0, ptr %i, align 4 + ret void +} + define void @zero_f32(ptr %i) { ; RV32-LABEL: zero_f32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: fmv.w.x fa5, zero +; RV32-NEXT: fsw fa5, 0(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: zero_f32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: sw zero, 0(a0) +; RV64-NEXT: fmv.w.x fa5, zero +; RV64-NEXT: fsw fa5, 0(a0) ; RV64-NEXT: ret entry: - store float 0.000000e+00, ptr %i, align 4 + store float 0.0, ptr %i, align 4 ret void } @@ -23,8 +59,8 @@ entry: define void @zero_f64(ptr %i) { ; RV32-LABEL: zero_f64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, %hi(.LCPI1_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI1_0) +; RV32-NEXT: lui a1, %hi(.LCPI3_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI3_0) ; RV32-NEXT: lw a2, 0(a1) ; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: sw a2, 0(a0) @@ -33,9 +69,144 @@ define void @zero_f64(ptr %i) { ; ; RV64-LABEL: zero_f64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: fmv.d.x fa5, zero +; RV64-NEXT: fsd fa5, 0(a0) +; RV64-NEXT: ret +entry: + store double 0.0, ptr %i, align 8 + ret void +} + +define void @zero_v1f32(ptr %i) { +; RV32-LABEL: zero_v1f32: +; RV32: # %bb.0: # %entry +; RV32-NEXT: fmv.w.x fa5, zero +; RV32-NEXT: fsw fa5, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_v1f32: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.w.x fa5, zero +; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: ret +entry: + store <1 x float> , ptr %i, align 8 + ret void +} + +define void @zero_v2f32(ptr %i) { +; RV32-LABEL: zero_v2f32: +; RV32: # %bb.0: # %entry +; RV32-NEXT: fmv.w.x fa5, zero +; RV32-NEXT: fsw fa5, 0(a0) +; RV32-NEXT: fsw fa5, 4(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_v2f32: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.w.x fa5, zero +; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: fsw fa5, 4(a0) +; RV64-NEXT: ret +entry: + store <2 x float> , ptr %i, align 8 + ret void +} + +define void @zero_v4f32(ptr %i) { +; RV32-LABEL: zero_v4f32: +; RV32: # %bb.0: # %entry +; RV32-NEXT: fmv.w.x fa5, zero +; RV32-NEXT: fsw fa5, 0(a0) +; RV32-NEXT: fsw fa5, 4(a0) +; RV32-NEXT: fsw fa5, 8(a0) +; RV32-NEXT: fsw fa5, 12(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_v4f32: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.w.x fa5, zero +; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: fsw fa5, 4(a0) +; RV64-NEXT: fsw fa5, 8(a0) +; RV64-NEXT: fsw fa5, 12(a0) +; RV64-NEXT: ret +entry: + store <4 x float> , ptr %i, align 8 + ret void +} + +define void @zero_v1f64(ptr %i) { +; RV32-LABEL: zero_v1f64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a1, %hi(.LCPI7_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI7_0) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a1, 4(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_v1f64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.d.x fa5, zero +; RV64-NEXT: fsd fa5, 0(a0) +; RV64-NEXT: ret +entry: + store <1 x double> , ptr %i, align 8 + ret void +} + +define void @zero_v2f64(ptr %i) { +; RV32-LABEL: zero_v2f64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a1, %hi(.LCPI8_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a1, 4(a0) +; RV32-NEXT: sw a2, 8(a0) +; RV32-NEXT: sw a1, 12(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_v2f64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.d.x fa5, zero +; RV64-NEXT: fsd fa5, 0(a0) +; RV64-NEXT: fsd fa5, 8(a0) +; RV64-NEXT: ret +entry: + store <2 x double> , ptr %i, align 8 + ret void +} + +define void @zero_v4f64(ptr %i) { +; RV32-LABEL: zero_v4f64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a1, %hi(.LCPI9_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI9_0) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a1, 4(a0) +; RV32-NEXT: sw a2, 8(a0) +; RV32-NEXT: sw a1, 12(a0) +; RV32-NEXT: sw a2, 16(a0) +; RV32-NEXT: sw a1, 20(a0) +; RV32-NEXT: sw a2, 24(a0) +; RV32-NEXT: sw a1, 28(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: zero_v4f64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: fmv.d.x fa5, zero +; RV64-NEXT: fsd fa5, 0(a0) +; RV64-NEXT: fsd fa5, 8(a0) +; RV64-NEXT: fsd fa5, 16(a0) +; RV64-NEXT: fsd fa5, 24(a0) ; RV64-NEXT: ret entry: - store double 0.000000e+00, ptr %i, align 8 + store <4 x double> , ptr %i, align 8 ret void } From 1946132eda359ae8f5d6f7c3574e918f03be251e Mon Sep 17 00:00:00 2001 From: Shaoce SUN Date: Sun, 12 Oct 2025 18:31:15 +0800 Subject: [PATCH 4/7] remove duplicate option --- llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll index 37a026ef06821..52bcd653a8480 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=riscv32 -global-isel -mattr=+f,+zfh < %s \ +; RUN: llc -global-isel -mtriple=riscv32 -mattr=+f,+zfh < %s \ ; RUN: | FileCheck %s --check-prefix=RV32 -; RUN: llc -global-isel -mtriple=riscv64 -global-isel -mattr=+d,+zfh < %s \ +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+d,+zfh < %s \ ; RUN: | FileCheck %s --check-prefix=RV64 define void @zero_f16(ptr %i) { From 1dbeb86852a38789a1338690d3ad8e486f0e99ee Mon Sep 17 00:00:00 2001 From: Shaoce SUN Date: Mon, 13 Oct 2025 00:31:35 +0800 Subject: [PATCH 5/7] update --- .../GISel/RISCVPostLegalizerCombiner.cpp | 58 ++++++++++++++- .../RISCV/GlobalISel/store-fp-zero-to-x0.ll | 72 ++++++++----------- 2 files changed, 85 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp index 67b510dc80f1e..802acc8ff239c 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/FormatVariadic.h" #define GET_GICOMBINER_DEPS #include "RISCVGenPostLegalizeGICombiner.inc" @@ -98,6 +99,8 @@ class RISCVPostLegalizerCombiner : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + bool combineFPZeroStore(MachineFunction &MF, const RISCVSubtarget &STI); + private: RISCVPostLegalizerCombinerImplRuleConfig RuleConfig; }; @@ -122,6 +125,54 @@ RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner() report_fatal_error("Invalid rule identifier"); } +/// Try to fold: +/// G_STORE (G_FCONSTANT +0.0), addr +/// into: +/// G_STORE (G_CONSTANT 0 [XLEN]), addr +bool RISCVPostLegalizerCombiner::combineFPZeroStore(MachineFunction &MF, + const RISCVSubtarget &STI) { + bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.getOpcode() != TargetOpcode::G_STORE) + continue; + + Register SrcReg = MI.getOperand(0).getReg(); + if (!SrcReg.isVirtual()) + continue; + + MachineInstr *Def = MRI.getVRegDef(SrcReg); + if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT) + continue; + + auto *CFP = Def->getOperand(1).getFPImm(); + if (!CFP || !CFP->getValueAPF().isPosZero()) + continue; + + // Use XLEN-wide integer zero + MachineIRBuilder MIB(MI); + const unsigned XLen = STI.getXLen(); + auto Zero = MIB.buildConstant(LLT::scalar(XLen), 0); + MI.getOperand(0).setReg(Zero.getReg(0)); + + LLT ValTy = MRI.getType(SrcReg); + if (MRI.use_nodbg_empty(SrcReg)) + Def->eraseFromParent(); + + [[maybe_unused]] unsigned ValBits = ValTy.getSizeInBits(); + LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero " + "(XLEN={1}, ValBits={2}) : \n\t{3}\n", + DEBUG_TYPE, XLen, ValBits, MI)); + + Changed = true; + } + } + + return Changed; +} + bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { if (MF.getProperties().hasFailedISel()) return false; @@ -147,7 +198,12 @@ bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { F.hasMinSize()); RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig, ST, MDT, LI); - return Impl.combineMachineInstrs(); + + bool TableCombChanged = Impl.combineMachineInstrs(); + + bool LocalChanged = combineFPZeroStore(MF, ST); + + return TableCombChanged || LocalChanged; } char RISCVPostLegalizerCombiner::ID = 0; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll index 52bcd653a8480..1323bfc1aefbc 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll @@ -7,14 +7,12 @@ define void @zero_f16(ptr %i) { ; RV32-LABEL: zero_f16: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.h.x fa5, zero -; RV32-NEXT: fsh fa5, 0(a0) +; RV32-NEXT: sh zero, 0(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: zero_f16: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.h.x fa5, zero -; RV64-NEXT: fsh fa5, 0(a0) +; RV64-NEXT: sh zero, 0(a0) ; RV64-NEXT: ret entry: store half 0.0, ptr %i, align 4 @@ -24,14 +22,12 @@ entry: define void @zero_bf16(ptr %i) { ; RV32-LABEL: zero_bf16: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.h.x fa5, zero -; RV32-NEXT: fsh fa5, 0(a0) +; RV32-NEXT: sh zero, 0(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: zero_bf16: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.h.x fa5, zero -; RV64-NEXT: fsh fa5, 0(a0) +; RV64-NEXT: sh zero, 0(a0) ; RV64-NEXT: ret entry: store bfloat 0.0, ptr %i, align 4 @@ -41,14 +37,12 @@ entry: define void @zero_f32(ptr %i) { ; RV32-LABEL: zero_f32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x fa5, zero -; RV32-NEXT: fsw fa5, 0(a0) +; RV32-NEXT: sw zero, 0(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: zero_f32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x fa5, zero -; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: sw zero, 0(a0) ; RV64-NEXT: ret entry: store float 0.0, ptr %i, align 4 @@ -69,8 +63,7 @@ define void @zero_f64(ptr %i) { ; ; RV64-LABEL: zero_f64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.d.x fa5, zero -; RV64-NEXT: fsd fa5, 0(a0) +; RV64-NEXT: sd zero, 0(a0) ; RV64-NEXT: ret entry: store double 0.0, ptr %i, align 8 @@ -80,14 +73,12 @@ entry: define void @zero_v1f32(ptr %i) { ; RV32-LABEL: zero_v1f32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x fa5, zero -; RV32-NEXT: fsw fa5, 0(a0) +; RV32-NEXT: sw zero, 0(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: zero_v1f32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x fa5, zero -; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: sw zero, 0(a0) ; RV64-NEXT: ret entry: store <1 x float> , ptr %i, align 8 @@ -97,16 +88,14 @@ entry: define void @zero_v2f32(ptr %i) { ; RV32-LABEL: zero_v2f32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x fa5, zero -; RV32-NEXT: fsw fa5, 0(a0) -; RV32-NEXT: fsw fa5, 4(a0) +; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: zero_v2f32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x fa5, zero -; RV64-NEXT: fsw fa5, 0(a0) -; RV64-NEXT: fsw fa5, 4(a0) +; RV64-NEXT: sw zero, 0(a0) +; RV64-NEXT: sw zero, 4(a0) ; RV64-NEXT: ret entry: store <2 x float> , ptr %i, align 8 @@ -116,20 +105,18 @@ entry: define void @zero_v4f32(ptr %i) { ; RV32-LABEL: zero_v4f32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x fa5, zero -; RV32-NEXT: fsw fa5, 0(a0) -; RV32-NEXT: fsw fa5, 4(a0) -; RV32-NEXT: fsw fa5, 8(a0) -; RV32-NEXT: fsw fa5, 12(a0) +; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: sw zero, 4(a0) +; RV32-NEXT: sw zero, 8(a0) +; RV32-NEXT: sw zero, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: zero_v4f32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x fa5, zero -; RV64-NEXT: fsw fa5, 0(a0) -; RV64-NEXT: fsw fa5, 4(a0) -; RV64-NEXT: fsw fa5, 8(a0) -; RV64-NEXT: fsw fa5, 12(a0) +; RV64-NEXT: sw zero, 0(a0) +; RV64-NEXT: sw zero, 4(a0) +; RV64-NEXT: sw zero, 8(a0) +; RV64-NEXT: sw zero, 12(a0) ; RV64-NEXT: ret entry: store <4 x float> , ptr %i, align 8 @@ -149,8 +136,7 @@ define void @zero_v1f64(ptr %i) { ; ; RV64-LABEL: zero_v1f64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.d.x fa5, zero -; RV64-NEXT: fsd fa5, 0(a0) +; RV64-NEXT: sd zero, 0(a0) ; RV64-NEXT: ret entry: store <1 x double> , ptr %i, align 8 @@ -172,9 +158,8 @@ define void @zero_v2f64(ptr %i) { ; ; RV64-LABEL: zero_v2f64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.d.x fa5, zero -; RV64-NEXT: fsd fa5, 0(a0) -; RV64-NEXT: fsd fa5, 8(a0) +; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: sd zero, 8(a0) ; RV64-NEXT: ret entry: store <2 x double> , ptr %i, align 8 @@ -200,11 +185,10 @@ define void @zero_v4f64(ptr %i) { ; ; RV64-LABEL: zero_v4f64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.d.x fa5, zero -; RV64-NEXT: fsd fa5, 0(a0) -; RV64-NEXT: fsd fa5, 8(a0) -; RV64-NEXT: fsd fa5, 16(a0) -; RV64-NEXT: fsd fa5, 24(a0) +; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: sd zero, 8(a0) +; RV64-NEXT: sd zero, 16(a0) +; RV64-NEXT: sd zero, 24(a0) ; RV64-NEXT: ret entry: store <4 x double> , ptr %i, align 8 From 2d948587b93bf9e68ed8a32f19078a624bfb038e Mon Sep 17 00:00:00 2001 From: Shaoce SUN Date: Tue, 14 Oct 2025 21:21:41 +0800 Subject: [PATCH 6/7] Use tablegen --- .../GISel/RISCVPostLegalizerCombiner.cpp | 101 ++++++++---------- llvm/lib/Target/RISCV/RISCVCombine.td | 11 +- 2 files changed, 55 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp index 802acc8ff239c..dc2ded4064e06 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp @@ -43,6 +43,50 @@ namespace { #include "RISCVGenPostLegalizeGICombiner.inc" #undef GET_GICOMBINER_TYPES +/// Match: G_STORE (G_FCONSTANT +0.0), addr +/// Return the source vreg in MatchInfo if matched. +bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI, + Register &MatchInfo) { + if (MI.getOpcode() != TargetOpcode::G_STORE) + return false; + + Register SrcReg = MI.getOperand(0).getReg(); + if (!SrcReg.isVirtual()) + return false; + + MachineInstr *Def = MRI.getVRegDef(SrcReg); + if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT) + return false; + + auto *CFP = Def->getOperand(1).getFPImm(); + if (!CFP || !CFP->getValueAPF().isPosZero()) + return false; + + MatchInfo = SrcReg; + return true; +} + +/// Apply: rewrite to G_STORE (G_CONSTANT 0 [XLEN]), addr +void applyFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, const RISCVSubtarget &STI, + Register &MatchInfo) { + const unsigned XLen = STI.getXLen(); + + auto Zero = B.buildConstant(LLT::scalar(XLen), 0); + MI.getOperand(0).setReg(Zero.getReg(0)); + + MachineInstr *Def = MRI.getVRegDef(MatchInfo); + if (Def && MRI.use_nodbg_empty(MatchInfo)) + Def->eraseFromParent(); + +#ifndef NDEBUG + unsigned ValBits = MRI.getType(MatchInfo).getSizeInBits(); + LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero " + "(XLEN={1}, ValBits={2}):\n {3}\n", + DEBUG_TYPE, XLen, ValBits, MI)); +#endif +} + class RISCVPostLegalizerCombinerImpl : public Combiner { protected: const CombinerHelper Helper; @@ -99,8 +143,6 @@ class RISCVPostLegalizerCombiner : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; - bool combineFPZeroStore(MachineFunction &MF, const RISCVSubtarget &STI); - private: RISCVPostLegalizerCombinerImplRuleConfig RuleConfig; }; @@ -125,54 +167,6 @@ RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner() report_fatal_error("Invalid rule identifier"); } -/// Try to fold: -/// G_STORE (G_FCONSTANT +0.0), addr -/// into: -/// G_STORE (G_CONSTANT 0 [XLEN]), addr -bool RISCVPostLegalizerCombiner::combineFPZeroStore(MachineFunction &MF, - const RISCVSubtarget &STI) { - bool Changed = false; - MachineRegisterInfo &MRI = MF.getRegInfo(); - - for (auto &MBB : MF) { - for (auto &MI : MBB) { - if (MI.getOpcode() != TargetOpcode::G_STORE) - continue; - - Register SrcReg = MI.getOperand(0).getReg(); - if (!SrcReg.isVirtual()) - continue; - - MachineInstr *Def = MRI.getVRegDef(SrcReg); - if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT) - continue; - - auto *CFP = Def->getOperand(1).getFPImm(); - if (!CFP || !CFP->getValueAPF().isPosZero()) - continue; - - // Use XLEN-wide integer zero - MachineIRBuilder MIB(MI); - const unsigned XLen = STI.getXLen(); - auto Zero = MIB.buildConstant(LLT::scalar(XLen), 0); - MI.getOperand(0).setReg(Zero.getReg(0)); - - LLT ValTy = MRI.getType(SrcReg); - if (MRI.use_nodbg_empty(SrcReg)) - Def->eraseFromParent(); - - [[maybe_unused]] unsigned ValBits = ValTy.getSizeInBits(); - LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero " - "(XLEN={1}, ValBits={2}) : \n\t{3}\n", - DEBUG_TYPE, XLen, ValBits, MI)); - - Changed = true; - } - } - - return Changed; -} - bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { if (MF.getProperties().hasFailedISel()) return false; @@ -198,12 +192,7 @@ bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { F.hasMinSize()); RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig, ST, MDT, LI); - - bool TableCombChanged = Impl.combineMachineInstrs(); - - bool LocalChanged = combineFPZeroStore(MF, ST); - - return TableCombChanged || LocalChanged; + return Impl.combineMachineInstrs(); } char RISCVPostLegalizerCombiner::ID = 0; diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index 995dd0c5d82eb..75b71b580cbbe 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -19,11 +19,20 @@ def RISCVO0PreLegalizerCombiner: GICombiner< "RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> { } +// Rule: fold store (fp +0.0) -> store (int zero [XLEN]) +def fp_zero_store_matchdata : GIDefMatchData<"Register">; +def fold_fp_zero_store : GICombineRule< + (defs root:$root, fp_zero_store_matchdata:$matchinfo), + (match (G_STORE $src, $addr):$root, + [{ return matchFoldFPZeroStore(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyFoldFPZeroStore(*${root}, MRI, B, STI, ${matchinfo}); }])>; + // Post-legalization combines which are primarily optimizations. // TODO: Add more combines. def RISCVPostLegalizerCombiner : GICombiner<"RISCVPostLegalizerCombinerImpl", [sub_to_add, combines_for_extload, redundant_and, identity_combines, shift_immed_chain, - commute_constant_to_rhs, simplify_neg_minmax]> { + commute_constant_to_rhs, simplify_neg_minmax, + fold_fp_zero_store]> { } From cb4c37d975ef735bb91204df55f07c8998b7d0e1 Mon Sep 17 00:00:00 2001 From: Shaoce SUN Date: Wed, 15 Oct 2025 11:14:33 +0800 Subject: [PATCH 7/7] add test for f64 on rv32 --- .../GISel/RISCVPostLegalizerCombiner.cpp | 8 +- llvm/lib/Target/RISCV/RISCVCombine.td | 2 +- .../RISCV/GlobalISel/store-fp-zero-to-x0.ll | 388 ++++++++++++------ 3 files changed, 264 insertions(+), 134 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp index dc2ded4064e06..f2b216be1db15 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp @@ -46,7 +46,7 @@ namespace { /// Match: G_STORE (G_FCONSTANT +0.0), addr /// Return the source vreg in MatchInfo if matched. bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI, - Register &MatchInfo) { + const RISCVSubtarget &STI, Register &MatchInfo) { if (MI.getOpcode() != TargetOpcode::G_STORE) return false; @@ -62,6 +62,12 @@ bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI, if (!CFP || !CFP->getValueAPF().isPosZero()) return false; + unsigned ValBits = MRI.getType(SrcReg).getSizeInBits(); + if ((ValBits == 16 && !STI.hasStdExtZfh()) || + (ValBits == 32 && !STI.hasStdExtF()) || + (ValBits == 64 && (!STI.hasStdExtD() || !STI.is64Bit()))) + return false; + MatchInfo = SrcReg; return true; } diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index 75b71b580cbbe..a06b60d8cce07 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -24,7 +24,7 @@ def fp_zero_store_matchdata : GIDefMatchData<"Register">; def fold_fp_zero_store : GICombineRule< (defs root:$root, fp_zero_store_matchdata:$matchinfo), (match (G_STORE $src, $addr):$root, - [{ return matchFoldFPZeroStore(*${root}, MRI, ${matchinfo}); }]), + [{ return matchFoldFPZeroStore(*${root}, MRI, STI, ${matchinfo}); }]), (apply [{ applyFoldFPZeroStore(*${root}, MRI, B, STI, ${matchinfo}); }])>; // Post-legalization combines which are primarily optimizations. diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll index 1323bfc1aefbc..bc79c6f650291 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll @@ -1,49 +1,83 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=riscv32 -mattr=+f,+zfh < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: | FileCheck %s --check-prefix=RV32F +; RUN: llc -global-isel -mtriple=riscv32 -mattr=+d,+zfh < %s \ +; RUN: | FileCheck %s --check-prefix=RV32D +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+f,+zfh < %s \ +; RUN: | FileCheck %s --check-prefix=RV64F ; RUN: llc -global-isel -mtriple=riscv64 -mattr=+d,+zfh < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 +; RUN: | FileCheck %s --check-prefix=RV64D define void @zero_f16(ptr %i) { -; RV32-LABEL: zero_f16: -; RV32: # %bb.0: # %entry -; RV32-NEXT: sh zero, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_f16: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sh zero, 0(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_f16: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sh zero, 0(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_f16: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sh zero, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_f16: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sh zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_f16: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sh zero, 0(a0) +; RV64D-NEXT: ret entry: store half 0.0, ptr %i, align 4 ret void } define void @zero_bf16(ptr %i) { -; RV32-LABEL: zero_bf16: -; RV32: # %bb.0: # %entry -; RV32-NEXT: sh zero, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_bf16: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sh zero, 0(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_bf16: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sh zero, 0(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_bf16: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sh zero, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_bf16: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sh zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_bf16: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sh zero, 0(a0) +; RV64D-NEXT: ret entry: store bfloat 0.0, ptr %i, align 4 ret void } define void @zero_f32(ptr %i) { -; RV32-LABEL: zero_f32: -; RV32: # %bb.0: # %entry -; RV32-NEXT: sw zero, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_f32: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sw zero, 0(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_f32: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sw zero, 0(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_f32: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sw zero, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_f32: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sw zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_f32: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sw zero, 0(a0) +; RV64D-NEXT: ret entry: store float 0.0, ptr %i, align 4 ret void @@ -51,145 +85,235 @@ entry: define void @zero_f64(ptr %i) { -; RV32-LABEL: zero_f64: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, %hi(.LCPI3_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI3_0) -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) -; RV32-NEXT: sw a2, 0(a0) -; RV32-NEXT: sw a1, 4(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_f64: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sd zero, 0(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_f64: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: lui a1, %hi(.LCPI3_0) +; RV32F-NEXT: addi a1, a1, %lo(.LCPI3_0) +; RV32F-NEXT: lw a2, 0(a1) +; RV32F-NEXT: lw a1, 4(a1) +; RV32F-NEXT: sw a2, 0(a0) +; RV32F-NEXT: sw a1, 4(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_f64: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fcvt.d.w fa5, zero +; RV32D-NEXT: fsd fa5, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_f64: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sd zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_f64: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sd zero, 0(a0) +; RV64D-NEXT: ret entry: store double 0.0, ptr %i, align 8 ret void } define void @zero_v1f32(ptr %i) { -; RV32-LABEL: zero_v1f32: -; RV32: # %bb.0: # %entry -; RV32-NEXT: sw zero, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_v1f32: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sw zero, 0(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_v1f32: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sw zero, 0(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v1f32: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sw zero, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v1f32: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sw zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v1f32: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sw zero, 0(a0) +; RV64D-NEXT: ret entry: store <1 x float> , ptr %i, align 8 ret void } define void @zero_v2f32(ptr %i) { -; RV32-LABEL: zero_v2f32: -; RV32: # %bb.0: # %entry -; RV32-NEXT: sw zero, 0(a0) -; RV32-NEXT: sw zero, 4(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_v2f32: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sw zero, 0(a0) -; RV64-NEXT: sw zero, 4(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_v2f32: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sw zero, 0(a0) +; RV32F-NEXT: sw zero, 4(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v2f32: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sw zero, 0(a0) +; RV32D-NEXT: sw zero, 4(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v2f32: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sw zero, 0(a0) +; RV64F-NEXT: sw zero, 4(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v2f32: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sw zero, 0(a0) +; RV64D-NEXT: sw zero, 4(a0) +; RV64D-NEXT: ret entry: store <2 x float> , ptr %i, align 8 ret void } define void @zero_v4f32(ptr %i) { -; RV32-LABEL: zero_v4f32: -; RV32: # %bb.0: # %entry -; RV32-NEXT: sw zero, 0(a0) -; RV32-NEXT: sw zero, 4(a0) -; RV32-NEXT: sw zero, 8(a0) -; RV32-NEXT: sw zero, 12(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_v4f32: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sw zero, 0(a0) -; RV64-NEXT: sw zero, 4(a0) -; RV64-NEXT: sw zero, 8(a0) -; RV64-NEXT: sw zero, 12(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_v4f32: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sw zero, 0(a0) +; RV32F-NEXT: sw zero, 4(a0) +; RV32F-NEXT: sw zero, 8(a0) +; RV32F-NEXT: sw zero, 12(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v4f32: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sw zero, 0(a0) +; RV32D-NEXT: sw zero, 4(a0) +; RV32D-NEXT: sw zero, 8(a0) +; RV32D-NEXT: sw zero, 12(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v4f32: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sw zero, 0(a0) +; RV64F-NEXT: sw zero, 4(a0) +; RV64F-NEXT: sw zero, 8(a0) +; RV64F-NEXT: sw zero, 12(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v4f32: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sw zero, 0(a0) +; RV64D-NEXT: sw zero, 4(a0) +; RV64D-NEXT: sw zero, 8(a0) +; RV64D-NEXT: sw zero, 12(a0) +; RV64D-NEXT: ret entry: store <4 x float> , ptr %i, align 8 ret void } define void @zero_v1f64(ptr %i) { -; RV32-LABEL: zero_v1f64: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, %hi(.LCPI7_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI7_0) -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) -; RV32-NEXT: sw a2, 0(a0) -; RV32-NEXT: sw a1, 4(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_v1f64: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sd zero, 0(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_v1f64: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: lui a1, %hi(.LCPI7_0) +; RV32F-NEXT: addi a1, a1, %lo(.LCPI7_0) +; RV32F-NEXT: lw a2, 0(a1) +; RV32F-NEXT: lw a1, 4(a1) +; RV32F-NEXT: sw a2, 0(a0) +; RV32F-NEXT: sw a1, 4(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v1f64: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fcvt.d.w fa5, zero +; RV32D-NEXT: fsd fa5, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v1f64: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sd zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v1f64: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sd zero, 0(a0) +; RV64D-NEXT: ret entry: store <1 x double> , ptr %i, align 8 ret void } define void @zero_v2f64(ptr %i) { -; RV32-LABEL: zero_v2f64: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, %hi(.LCPI8_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0) -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) -; RV32-NEXT: sw a2, 0(a0) -; RV32-NEXT: sw a1, 4(a0) -; RV32-NEXT: sw a2, 8(a0) -; RV32-NEXT: sw a1, 12(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_v2f64: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sd zero, 0(a0) -; RV64-NEXT: sd zero, 8(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_v2f64: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: lui a1, %hi(.LCPI8_0) +; RV32F-NEXT: addi a1, a1, %lo(.LCPI8_0) +; RV32F-NEXT: lw a2, 0(a1) +; RV32F-NEXT: lw a1, 4(a1) +; RV32F-NEXT: sw a2, 0(a0) +; RV32F-NEXT: sw a1, 4(a0) +; RV32F-NEXT: sw a2, 8(a0) +; RV32F-NEXT: sw a1, 12(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v2f64: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fcvt.d.w fa5, zero +; RV32D-NEXT: fsd fa5, 0(a0) +; RV32D-NEXT: fsd fa5, 8(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v2f64: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sd zero, 0(a0) +; RV64F-NEXT: sd zero, 8(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v2f64: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sd zero, 0(a0) +; RV64D-NEXT: sd zero, 8(a0) +; RV64D-NEXT: ret entry: store <2 x double> , ptr %i, align 8 ret void } define void @zero_v4f64(ptr %i) { -; RV32-LABEL: zero_v4f64: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, %hi(.LCPI9_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI9_0) -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) -; RV32-NEXT: sw a2, 0(a0) -; RV32-NEXT: sw a1, 4(a0) -; RV32-NEXT: sw a2, 8(a0) -; RV32-NEXT: sw a1, 12(a0) -; RV32-NEXT: sw a2, 16(a0) -; RV32-NEXT: sw a1, 20(a0) -; RV32-NEXT: sw a2, 24(a0) -; RV32-NEXT: sw a1, 28(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: zero_v4f64: -; RV64: # %bb.0: # %entry -; RV64-NEXT: sd zero, 0(a0) -; RV64-NEXT: sd zero, 8(a0) -; RV64-NEXT: sd zero, 16(a0) -; RV64-NEXT: sd zero, 24(a0) -; RV64-NEXT: ret +; RV32F-LABEL: zero_v4f64: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: lui a1, %hi(.LCPI9_0) +; RV32F-NEXT: addi a1, a1, %lo(.LCPI9_0) +; RV32F-NEXT: lw a2, 0(a1) +; RV32F-NEXT: lw a1, 4(a1) +; RV32F-NEXT: sw a2, 0(a0) +; RV32F-NEXT: sw a1, 4(a0) +; RV32F-NEXT: sw a2, 8(a0) +; RV32F-NEXT: sw a1, 12(a0) +; RV32F-NEXT: sw a2, 16(a0) +; RV32F-NEXT: sw a1, 20(a0) +; RV32F-NEXT: sw a2, 24(a0) +; RV32F-NEXT: sw a1, 28(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v4f64: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fcvt.d.w fa5, zero +; RV32D-NEXT: fsd fa5, 0(a0) +; RV32D-NEXT: fsd fa5, 8(a0) +; RV32D-NEXT: fsd fa5, 16(a0) +; RV32D-NEXT: fsd fa5, 24(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v4f64: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sd zero, 0(a0) +; RV64F-NEXT: sd zero, 8(a0) +; RV64F-NEXT: sd zero, 16(a0) +; RV64F-NEXT: sd zero, 24(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v4f64: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sd zero, 0(a0) +; RV64D-NEXT: sd zero, 8(a0) +; RV64D-NEXT: sd zero, 16(a0) +; RV64D-NEXT: sd zero, 24(a0) +; RV64D-NEXT: ret entry: store <4 x double> , ptr %i, align 8 ret void