diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 526675a682d86..b0453fc57c053 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -131,6 +131,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoCCMAXU: case RISCV::PseudoCCMIN: case RISCV::PseudoCCMINU: + case RISCV::PseudoCCMUL: case RISCV::PseudoCCADDW: case RISCV::PseudoCCSUBW: case RISCV::PseudoCCSLL: @@ -237,6 +238,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break; case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break; case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break; + case RISCV::PseudoCCMUL: NewOpc = RISCV::MUL; break; case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break; case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break; case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break; diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index cfee6ab22d4ff..5b72334f58d45 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1856,6 +1856,11 @@ def TuneShortForwardBranchIMinMax "true", "Enable short forward branch optimization for min,max instructions in Zbb", [TuneShortForwardBranchOpt]>; +def TuneShortForwardBranchIMul + : SubtargetFeature<"short-forward-branch-i-mul", "HasShortForwardBranchIMul", + "true", "Enable short forward branch optimization for mul instruction", + [TuneShortForwardBranchOpt]>; + // Some subtargets require a S2V transfer buffer to move scalars into vectors. // FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure. def TuneNoSinkSplatOperands diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 3a7013d9efae6..b43951729b706 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1703,6 +1703,7 @@ unsigned getPredicatedOpcode(unsigned Opcode) { case RISCV::MAXU: return RISCV::PseudoCCMAXU; case RISCV::MIN: return RISCV::PseudoCCMIN; case RISCV::MINU: return RISCV::PseudoCCMINU; + case RISCV::MUL: return RISCV::PseudoCCMUL; case RISCV::ADDI: return RISCV::PseudoCCADDI; case RISCV::SLLI: return RISCV::PseudoCCSLLI; @@ -1754,6 +1755,9 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg, MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU)) return nullptr; + if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL) + return nullptr; + // Check if MI can be predicated and folded into the CCMOV. if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) return nullptr; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td index 5a67a5aaba293..494b1c9f98839 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td @@ -110,6 +110,7 @@ def PseudoCCMAX : SFBALU_rr; def PseudoCCMIN : SFBALU_rr; def PseudoCCMAXU : SFBALU_rr; def PseudoCCMINU : SFBALU_rr; +def PseudoCCMUL : SFBALU_rr; def PseudoCCADDI : SFBALU_ri; def PseudoCCANDI : SFBALU_ri; diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 988d0490afeb6..cf44af608542c 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -137,6 +137,7 @@ ; CHECK-NEXT: shifted-zextw-fusion - Enable SLLI+SRLI to be fused when computing (shifted) word zero extension. ; CHECK-NEXT: shlcofideleg - 'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode). ; CHECK-NEXT: short-forward-branch-i-minmax - Enable short forward branch optimization for min,max instructions in Zbb. +; CHECK-NEXT: short-forward-branch-i-mul - Enable short forward branch optimization for mul instruction. ; CHECK-NEXT: short-forward-branch-opt - Enable short forward branch optimization. ; CHECK-NEXT: shtvala - 'Shtvala' (htval provides all needed values). ; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp). diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-mul.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-mul.ll new file mode 100644 index 0000000000000..3f780fddafcce --- /dev/null +++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-mul.ll @@ -0,0 +1,156 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RV32I-M +; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RV64I-M +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+short-forward-branch-opt | \ +; RUN: FileCheck %s --check-prefixes=RV32I-SFB-M +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+short-forward-branch-opt | \ +; RUN: FileCheck %s --check-prefixes=RV64I-SFB-M +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+short-forward-branch-i-mul | \ +; RUN: FileCheck %s --check-prefixes=RV32I-SFBIMul-M +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+short-forward-branch-i-mul | \ +; RUN: FileCheck %s --check-prefixes=RV64I-SFBIMul-M + +define i32 @select_example_mul_i32(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-M-LABEL: select_example_mul_i32: +; RV32I-M: # %bb.0: # %entry +; RV32I-M-NEXT: beqz a2, .LBB0_2 +; RV32I-M-NEXT: # %bb.1: +; RV32I-M-NEXT: mul a1, a0, a3 +; RV32I-M-NEXT: .LBB0_2: # %entry +; RV32I-M-NEXT: mv a0, a1 +; RV32I-M-NEXT: ret +; +; RV64I-M-LABEL: select_example_mul_i32: +; RV64I-M: # %bb.0: # %entry +; RV64I-M-NEXT: beqz a2, .LBB0_2 +; RV64I-M-NEXT: # %bb.1: +; RV64I-M-NEXT: mulw a1, a0, a3 +; RV64I-M-NEXT: .LBB0_2: # %entry +; RV64I-M-NEXT: mv a0, a1 +; RV64I-M-NEXT: ret +; +; RV32I-SFB-M-LABEL: select_example_mul_i32: +; RV32I-SFB-M: # %bb.0: # %entry +; RV32I-SFB-M-NEXT: mul a0, a0, a3 +; RV32I-SFB-M-NEXT: bnez a2, .LBB0_2 +; RV32I-SFB-M-NEXT: # %bb.1: # %entry +; RV32I-SFB-M-NEXT: mv a0, a1 +; RV32I-SFB-M-NEXT: .LBB0_2: # %entry +; RV32I-SFB-M-NEXT: ret +; +; RV64I-SFB-M-LABEL: select_example_mul_i32: +; RV64I-SFB-M: # %bb.0: # %entry +; RV64I-SFB-M-NEXT: mulw a0, a0, a3 +; RV64I-SFB-M-NEXT: bnez a2, .LBB0_2 +; RV64I-SFB-M-NEXT: # %bb.1: # %entry +; RV64I-SFB-M-NEXT: mv a0, a1 +; RV64I-SFB-M-NEXT: .LBB0_2: # %entry +; RV64I-SFB-M-NEXT: ret +; +; RV32I-SFBIMul-M-LABEL: select_example_mul_i32: +; RV32I-SFBIMul-M: # %bb.0: # %entry +; RV32I-SFBIMul-M-NEXT: beqz a2, .LBB0_2 +; RV32I-SFBIMul-M-NEXT: # %bb.1: # %entry +; RV32I-SFBIMul-M-NEXT: mul a1, a0, a3 +; RV32I-SFBIMul-M-NEXT: .LBB0_2: # %entry +; RV32I-SFBIMul-M-NEXT: mv a0, a1 +; RV32I-SFBIMul-M-NEXT: ret +; +; RV64I-SFBIMul-M-LABEL: select_example_mul_i32: +; RV64I-SFBIMul-M: # %bb.0: # %entry +; RV64I-SFBIMul-M-NEXT: mulw a0, a0, a3 +; RV64I-SFBIMul-M-NEXT: bnez a2, .LBB0_2 +; RV64I-SFBIMul-M-NEXT: # %bb.1: # %entry +; RV64I-SFBIMul-M-NEXT: mv a0, a1 +; RV64I-SFBIMul-M-NEXT: .LBB0_2: # %entry +; RV64I-SFBIMul-M-NEXT: ret +entry: + %res = mul i32 %a, %y + %sel = select i1 %x, i32 %res, i32 %b + ret i32 %sel +} + +define i64 @select_example_mul_i64(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-M-LABEL: select_example_mul_i64: +; RV32I-M: # %bb.0: # %entry +; RV32I-M-NEXT: beqz a4, .LBB1_2 +; RV32I-M-NEXT: # %bb.1: +; RV32I-M-NEXT: mul a2, a0, a6 +; RV32I-M-NEXT: mulhu a3, a0, a5 +; RV32I-M-NEXT: mul a1, a1, a5 +; RV32I-M-NEXT: add a2, a3, a2 +; RV32I-M-NEXT: add a3, a2, a1 +; RV32I-M-NEXT: mul a2, a0, a5 +; RV32I-M-NEXT: .LBB1_2: # %entry +; RV32I-M-NEXT: mv a0, a2 +; RV32I-M-NEXT: mv a1, a3 +; RV32I-M-NEXT: ret +; +; RV64I-M-LABEL: select_example_mul_i64: +; RV64I-M: # %bb.0: # %entry +; RV64I-M-NEXT: beqz a2, .LBB1_2 +; RV64I-M-NEXT: # %bb.1: +; RV64I-M-NEXT: mul a1, a0, a3 +; RV64I-M-NEXT: .LBB1_2: # %entry +; RV64I-M-NEXT: mv a0, a1 +; RV64I-M-NEXT: ret +; +; RV32I-SFB-M-LABEL: select_example_mul_i64: +; RV32I-SFB-M: # %bb.0: # %entry +; RV32I-SFB-M-NEXT: mul a6, a0, a6 +; RV32I-SFB-M-NEXT: mulhu a7, a0, a5 +; RV32I-SFB-M-NEXT: mul a1, a1, a5 +; RV32I-SFB-M-NEXT: mul a0, a0, a5 +; RV32I-SFB-M-NEXT: add a6, a7, a6 +; RV32I-SFB-M-NEXT: beqz a4, .LBB1_2 +; RV32I-SFB-M-NEXT: # %bb.1: # %entry +; RV32I-SFB-M-NEXT: add a3, a6, a1 +; RV32I-SFB-M-NEXT: .LBB1_2: # %entry +; RV32I-SFB-M-NEXT: bnez a4, .LBB1_4 +; RV32I-SFB-M-NEXT: # %bb.3: # %entry +; RV32I-SFB-M-NEXT: mv a0, a2 +; RV32I-SFB-M-NEXT: .LBB1_4: # %entry +; RV32I-SFB-M-NEXT: mv a1, a3 +; RV32I-SFB-M-NEXT: ret +; +; RV64I-SFB-M-LABEL: select_example_mul_i64: +; RV64I-SFB-M: # %bb.0: # %entry +; RV64I-SFB-M-NEXT: mul a0, a0, a3 +; RV64I-SFB-M-NEXT: bnez a2, .LBB1_2 +; RV64I-SFB-M-NEXT: # %bb.1: # %entry +; RV64I-SFB-M-NEXT: mv a0, a1 +; RV64I-SFB-M-NEXT: .LBB1_2: # %entry +; RV64I-SFB-M-NEXT: ret +; +; RV32I-SFBIMul-M-LABEL: select_example_mul_i64: +; RV32I-SFBIMul-M: # %bb.0: # %entry +; RV32I-SFBIMul-M-NEXT: mul a6, a0, a6 +; RV32I-SFBIMul-M-NEXT: mulhu a7, a0, a5 +; RV32I-SFBIMul-M-NEXT: mul a1, a1, a5 +; RV32I-SFBIMul-M-NEXT: add a6, a7, a6 +; RV32I-SFBIMul-M-NEXT: beqz a4, .LBB1_2 +; RV32I-SFBIMul-M-NEXT: # %bb.1: # %entry +; RV32I-SFBIMul-M-NEXT: add a3, a6, a1 +; RV32I-SFBIMul-M-NEXT: .LBB1_2: # %entry +; RV32I-SFBIMul-M-NEXT: beqz a4, .LBB1_4 +; RV32I-SFBIMul-M-NEXT: # %bb.3: # %entry +; RV32I-SFBIMul-M-NEXT: mul a2, a0, a5 +; RV32I-SFBIMul-M-NEXT: .LBB1_4: # %entry +; RV32I-SFBIMul-M-NEXT: mv a0, a2 +; RV32I-SFBIMul-M-NEXT: mv a1, a3 +; RV32I-SFBIMul-M-NEXT: ret +; +; RV64I-SFBIMul-M-LABEL: select_example_mul_i64: +; RV64I-SFBIMul-M: # %bb.0: # %entry +; RV64I-SFBIMul-M-NEXT: beqz a2, .LBB1_2 +; RV64I-SFBIMul-M-NEXT: # %bb.1: # %entry +; RV64I-SFBIMul-M-NEXT: mul a1, a0, a3 +; RV64I-SFBIMul-M-NEXT: .LBB1_2: # %entry +; RV64I-SFBIMul-M-NEXT: mv a0, a1 +; RV64I-SFBIMul-M-NEXT: ret +entry: + %res = mul i64 %a, %y + %sel = select i1 %x, i64 %res, i64 %b + ret i64 %sel +} +