-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Use short forward branch for ISD::ABS. #72958
Conversation
We can use short forward branch to conditionally negate if the value is negative.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesWe can use short forward branch to conditionally negate if the value is negative. Patch is 38.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72958.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 26190337eb3bd1b..d3df513813f270c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -396,9 +396,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
}
- if (!RV64LegalI32 && Subtarget.is64Bit())
+ if (!RV64LegalI32 && Subtarget.is64Bit() &&
+ !Subtarget.hasShortForwardBranchOpt())
setOperationAction(ISD::ABS, MVT::i32, Custom);
+ // We can use PseudoCCSUB to implement ABS.
+ if (Subtarget.hasShortForwardBranchOpt())
+ setOperationAction(ISD::ABS, XLenVT, Legal);
+
if (!Subtarget.hasVendorXTHeadCondMov())
setOperationAction(ISD::SELECT, XLenVT, Custom);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index af34e026bed1488..0f1d1d4cb23cee3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1242,6 +1242,10 @@ def anyext_oneuse : unop_oneuse<anyext>;
def ext_oneuse : unop_oneuse<ext>;
def fpext_oneuse : unop_oneuse<any_fpextend>;
+def 33signbits_node : PatLeaf<(i64 GPR:$src), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N, 0)) > 32;
+}]>;
+
/// Simple arithmetic operations
def : PatGprGpr<add, ADD>;
@@ -2002,6 +2006,15 @@ def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
(AddiPairImmSmall AddiPair:$rs2))>;
}
+let Predicates = [HasShortForwardBranchOpt] in
+def : Pat<(XLenVT (abs GPR:$rs1)),
+ (PseudoCCSUB (XLenVT GPR:$rs1), (XLenVT X0), /* COND_LT */ 2,
+ (XLenVT GPR:$rs1), (XLenVT X0), (XLenVT GPR:$rs1))>;
+let Predicates = [HasShortForwardBranchOpt, IsRV64] in
+def : Pat<(sext_inreg (abs 33signbits_node:$rs1), i32),
+ (PseudoCCSUBW (i64 GPR:$rs1), (i64 X0), /* COND_LT */ 2,
+ (XLenVT GPR:$rs1), (i64 X0), (i64 GPR:$rs1))>;
+
//===----------------------------------------------------------------------===//
// Experimental RV64 i32 legalization patterns.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index a91f726eb06df2e..725b8fd6eeea6b3 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -2,9 +2,11 @@
; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=NOSFB %s
; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=SFB,NOZICOND %s
+; RUN: | FileCheck -check-prefixes=SFB,NOZICOND,RV64SFB %s
; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -mattr=+experimental-zicond \
; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=SFB,ZICOND %s
+; RUN: llc -mtriple=riscv32 -mcpu=sifive-e76 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=SFB,NOZICOND,RV32SFB %s
; The sifive-7-series can predicate a mv.
@@ -65,18 +67,44 @@ define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 si
; NOSFB-NEXT: addw a0, a1, a2
; NOSFB-NEXT: ret
;
-; SFB-LABEL: test3:
-; SFB: # %bb.0:
-; SFB-NEXT: bnez a4, .LBB2_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: mv a0, a1
-; SFB-NEXT: .LBB2_2:
-; SFB-NEXT: beqz a4, .LBB2_4
-; SFB-NEXT: # %bb.3:
-; SFB-NEXT: mv a2, a3
-; SFB-NEXT: .LBB2_4:
-; SFB-NEXT: addw a0, a0, a2
-; SFB-NEXT: ret
+; RV64SFB-LABEL: test3:
+; RV64SFB: # %bb.0:
+; RV64SFB-NEXT: bnez a4, .LBB2_2
+; RV64SFB-NEXT: # %bb.1:
+; RV64SFB-NEXT: mv a0, a1
+; RV64SFB-NEXT: .LBB2_2:
+; RV64SFB-NEXT: beqz a4, .LBB2_4
+; RV64SFB-NEXT: # %bb.3:
+; RV64SFB-NEXT: mv a2, a3
+; RV64SFB-NEXT: .LBB2_4:
+; RV64SFB-NEXT: addw a0, a0, a2
+; RV64SFB-NEXT: ret
+;
+; ZICOND-LABEL: test3:
+; ZICOND: # %bb.0:
+; ZICOND-NEXT: bnez a4, .LBB2_2
+; ZICOND-NEXT: # %bb.1:
+; ZICOND-NEXT: mv a0, a1
+; ZICOND-NEXT: .LBB2_2:
+; ZICOND-NEXT: beqz a4, .LBB2_4
+; ZICOND-NEXT: # %bb.3:
+; ZICOND-NEXT: mv a2, a3
+; ZICOND-NEXT: .LBB2_4:
+; ZICOND-NEXT: addw a0, a0, a2
+; ZICOND-NEXT: ret
+;
+; RV32SFB-LABEL: test3:
+; RV32SFB: # %bb.0:
+; RV32SFB-NEXT: bnez a4, .LBB2_2
+; RV32SFB-NEXT: # %bb.1:
+; RV32SFB-NEXT: mv a0, a1
+; RV32SFB-NEXT: .LBB2_2:
+; RV32SFB-NEXT: beqz a4, .LBB2_4
+; RV32SFB-NEXT: # %bb.3:
+; RV32SFB-NEXT: mv a2, a3
+; RV32SFB-NEXT: .LBB2_4:
+; RV32SFB-NEXT: add a0, a0, a2
+; RV32SFB-NEXT: ret
%c = icmp eq i32 %z, 0
%a = select i1 %c, i32 %w, i32 %v
%b = select i1 %c, i32 %x, i32 %y
@@ -413,31 +441,83 @@ define void @sextw_removal_ccor(i1 %c, i32 signext %arg, i32 signext %arg1, i32
; NOSFB-NEXT: addi sp, sp, 32
; NOSFB-NEXT: ret
;
-; SFB-LABEL: sextw_removal_ccor:
-; SFB: # %bb.0: # %bb
-; SFB-NEXT: addi sp, sp, -32
-; SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; SFB-NEXT: mv s0, a3
-; SFB-NEXT: andi a0, a0, 1
-; SFB-NEXT: mv s1, a2
-; SFB-NEXT: beqz a0, .LBB15_4
-; SFB-NEXT: # %bb.3: # %bb
-; SFB-NEXT: or s0, a3, a1
-; SFB-NEXT: .LBB15_4: # %bb
-; SFB-NEXT: .LBB15_1: # %bb2
-; SFB-NEXT: # =>This Inner Loop Header: Depth=1
-; SFB-NEXT: mv a0, s0
-; SFB-NEXT: call bar@plt
-; SFB-NEXT: sllw s0, s0, s1
-; SFB-NEXT: bnez a0, .LBB15_1
-; SFB-NEXT: # %bb.2: # %bb7
-; SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; SFB-NEXT: addi sp, sp, 32
-; SFB-NEXT: ret
+; RV64SFB-LABEL: sextw_removal_ccor:
+; RV64SFB: # %bb.0: # %bb
+; RV64SFB-NEXT: addi sp, sp, -32
+; RV64SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: mv s0, a3
+; RV64SFB-NEXT: andi a0, a0, 1
+; RV64SFB-NEXT: mv s1, a2
+; RV64SFB-NEXT: beqz a0, .LBB15_4
+; RV64SFB-NEXT: # %bb.3: # %bb
+; RV64SFB-NEXT: or s0, a3, a1
+; RV64SFB-NEXT: .LBB15_4: # %bb
+; RV64SFB-NEXT: .LBB15_1: # %bb2
+; RV64SFB-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64SFB-NEXT: mv a0, s0
+; RV64SFB-NEXT: call bar@plt
+; RV64SFB-NEXT: sllw s0, s0, s1
+; RV64SFB-NEXT: bnez a0, .LBB15_1
+; RV64SFB-NEXT: # %bb.2: # %bb7
+; RV64SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: addi sp, sp, 32
+; RV64SFB-NEXT: ret
+;
+; ZICOND-LABEL: sextw_removal_ccor:
+; ZICOND: # %bb.0: # %bb
+; ZICOND-NEXT: addi sp, sp, -32
+; ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: mv s0, a3
+; ZICOND-NEXT: andi a0, a0, 1
+; ZICOND-NEXT: mv s1, a2
+; ZICOND-NEXT: beqz a0, .LBB15_4
+; ZICOND-NEXT: # %bb.3: # %bb
+; ZICOND-NEXT: or s0, a3, a1
+; ZICOND-NEXT: .LBB15_4: # %bb
+; ZICOND-NEXT: .LBB15_1: # %bb2
+; ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
+; ZICOND-NEXT: mv a0, s0
+; ZICOND-NEXT: call bar@plt
+; ZICOND-NEXT: sllw s0, s0, s1
+; ZICOND-NEXT: bnez a0, .LBB15_1
+; ZICOND-NEXT: # %bb.2: # %bb7
+; ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: addi sp, sp, 32
+; ZICOND-NEXT: ret
+;
+; RV32SFB-LABEL: sextw_removal_ccor:
+; RV32SFB: # %bb.0: # %bb
+; RV32SFB-NEXT: addi sp, sp, -16
+; RV32SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: mv s0, a3
+; RV32SFB-NEXT: andi a0, a0, 1
+; RV32SFB-NEXT: mv s1, a2
+; RV32SFB-NEXT: beqz a0, .LBB15_4
+; RV32SFB-NEXT: # %bb.3: # %bb
+; RV32SFB-NEXT: or s0, a3, a1
+; RV32SFB-NEXT: .LBB15_4: # %bb
+; RV32SFB-NEXT: .LBB15_1: # %bb2
+; RV32SFB-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32SFB-NEXT: mv a0, s0
+; RV32SFB-NEXT: call bar@plt
+; RV32SFB-NEXT: sll s0, s0, s1
+; RV32SFB-NEXT: bnez a0, .LBB15_1
+; RV32SFB-NEXT: # %bb.2: # %bb7
+; RV32SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: addi sp, sp, 16
+; RV32SFB-NEXT: ret
bb:
%sel = select i1 %c, i32 %arg, i32 0
%or = or i32 %sel, %arg2
@@ -480,31 +560,83 @@ define void @sextw_removal_ccaddw(i1 %c, i32 signext %arg, i32 signext %arg1, i3
; NOSFB-NEXT: addi sp, sp, 32
; NOSFB-NEXT: ret
;
-; SFB-LABEL: sextw_removal_ccaddw:
-; SFB: # %bb.0: # %bb
-; SFB-NEXT: addi sp, sp, -32
-; SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; SFB-NEXT: mv s0, a1
-; SFB-NEXT: andi a0, a0, 1
-; SFB-NEXT: mv s1, a2
-; SFB-NEXT: beqz a0, .LBB16_4
-; SFB-NEXT: # %bb.3: # %bb
-; SFB-NEXT: addw s0, a1, a3
-; SFB-NEXT: .LBB16_4: # %bb
-; SFB-NEXT: .LBB16_1: # %bb2
-; SFB-NEXT: # =>This Inner Loop Header: Depth=1
-; SFB-NEXT: mv a0, s0
-; SFB-NEXT: call bar@plt
-; SFB-NEXT: sllw s0, s0, s1
-; SFB-NEXT: bnez a0, .LBB16_1
-; SFB-NEXT: # %bb.2: # %bb7
-; SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; SFB-NEXT: addi sp, sp, 32
-; SFB-NEXT: ret
+; RV64SFB-LABEL: sextw_removal_ccaddw:
+; RV64SFB: # %bb.0: # %bb
+; RV64SFB-NEXT: addi sp, sp, -32
+; RV64SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: mv s0, a1
+; RV64SFB-NEXT: andi a0, a0, 1
+; RV64SFB-NEXT: mv s1, a2
+; RV64SFB-NEXT: beqz a0, .LBB16_4
+; RV64SFB-NEXT: # %bb.3: # %bb
+; RV64SFB-NEXT: addw s0, a1, a3
+; RV64SFB-NEXT: .LBB16_4: # %bb
+; RV64SFB-NEXT: .LBB16_1: # %bb2
+; RV64SFB-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64SFB-NEXT: mv a0, s0
+; RV64SFB-NEXT: call bar@plt
+; RV64SFB-NEXT: sllw s0, s0, s1
+; RV64SFB-NEXT: bnez a0, .LBB16_1
+; RV64SFB-NEXT: # %bb.2: # %bb7
+; RV64SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: addi sp, sp, 32
+; RV64SFB-NEXT: ret
+;
+; ZICOND-LABEL: sextw_removal_ccaddw:
+; ZICOND: # %bb.0: # %bb
+; ZICOND-NEXT: addi sp, sp, -32
+; ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: mv s0, a1
+; ZICOND-NEXT: andi a0, a0, 1
+; ZICOND-NEXT: mv s1, a2
+; ZICOND-NEXT: beqz a0, .LBB16_4
+; ZICOND-NEXT: # %bb.3: # %bb
+; ZICOND-NEXT: addw s0, a1, a3
+; ZICOND-NEXT: .LBB16_4: # %bb
+; ZICOND-NEXT: .LBB16_1: # %bb2
+; ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
+; ZICOND-NEXT: mv a0, s0
+; ZICOND-NEXT: call bar@plt
+; ZICOND-NEXT: sllw s0, s0, s1
+; ZICOND-NEXT: bnez a0, .LBB16_1
+; ZICOND-NEXT: # %bb.2: # %bb7
+; ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: addi sp, sp, 32
+; ZICOND-NEXT: ret
+;
+; RV32SFB-LABEL: sextw_removal_ccaddw:
+; RV32SFB: # %bb.0: # %bb
+; RV32SFB-NEXT: addi sp, sp, -16
+; RV32SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: mv s0, a1
+; RV32SFB-NEXT: andi a0, a0, 1
+; RV32SFB-NEXT: mv s1, a2
+; RV32SFB-NEXT: beqz a0, .LBB16_4
+; RV32SFB-NEXT: # %bb.3: # %bb
+; RV32SFB-NEXT: add s0, a1, a3
+; RV32SFB-NEXT: .LBB16_4: # %bb
+; RV32SFB-NEXT: .LBB16_1: # %bb2
+; RV32SFB-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32SFB-NEXT: mv a0, s0
+; RV32SFB-NEXT: call bar@plt
+; RV32SFB-NEXT: sll s0, s0, s1
+; RV32SFB-NEXT: bnez a0, .LBB16_1
+; RV32SFB-NEXT: # %bb.2: # %bb7
+; RV32SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: addi sp, sp, 16
+; RV32SFB-NEXT: ret
bb:
%sel = select i1 %c, i32 %arg2, i32 0
%or = add i32 %sel, %arg
@@ -531,14 +663,32 @@ define i32 @select_sllw(i32 %A, i32 %B, i32 %C, i1 zeroext %cond) {
; NOSFB-NEXT: mv a0, a2
; NOSFB-NEXT: ret
;
-; SFB-LABEL: select_sllw:
-; SFB: # %bb.0: # %entry
-; SFB-NEXT: bnez a3, .LBB17_2
-; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: sllw a2, a0, a1
-; SFB-NEXT: .LBB17_2: # %entry
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: ret
+; RV64SFB-LABEL: select_sllw:
+; RV64SFB: # %bb.0: # %entry
+; RV64SFB-NEXT: bnez a3, .LBB17_2
+; RV64SFB-NEXT: # %bb.1: # %entry
+; RV64SFB-NEXT: sllw a2, a0, a1
+; RV64SFB-NEXT: .LBB17_2: # %entry
+; RV64SFB-NEXT: mv a0, a2
+; RV64SFB-NEXT: ret
+;
+; ZICOND-LABEL: select_sllw:
+; ZICOND: # %bb.0: # %entry
+; ZICOND-NEXT: bnez a3, .LBB17_2
+; ZICOND-NEXT: # %bb.1: # %entry
+; ZICOND-NEXT: sllw a2, a0, a1
+; ZICOND-NEXT: .LBB17_2: # %entry
+; ZICOND-NEXT: mv a0, a2
+; ZICOND-NEXT: ret
+;
+; RV32SFB-LABEL: select_sllw:
+; RV32SFB: # %bb.0: # %entry
+; RV32SFB-NEXT: bnez a3, .LBB17_2
+; RV32SFB-NEXT: # %bb.1: # %entry
+; RV32SFB-NEXT: sll a2, a0, a1
+; RV32SFB-NEXT: .LBB17_2: # %entry
+; RV32SFB-NEXT: mv a0, a2
+; RV32SFB-NEXT: ret
entry:
%0 = shl i32 %A, %B
%1 = select i1 %cond, i32 %C, i32 %0
@@ -555,14 +705,32 @@ define i32 @select_srlw(i32 %A, i32 %B, i32 %C, i1 zeroext %cond) {
; NOSFB-NEXT: mv a0, a2
; NOSFB-NEXT: ret
;
-; SFB-LABEL: select_srlw:
-; SFB: # %bb.0: # %entry
-; SFB-NEXT: bnez a3, .LBB18_2
-; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: srlw a2, a0, a1
-; SFB-NEXT: .LBB18_2: # %entry
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: ret
+; RV64SFB-LABEL: select_srlw:
+; RV64SFB: # %bb.0: # %entry
+; RV64SFB-NEXT: bnez a3, .LBB18_2
+; RV64SFB-NEXT: # %bb.1: # %entry
+; RV64SFB-NEXT: srlw a2, a0, a1
+; RV64SFB-NEXT: .LBB18_2: # %entry
+; RV64SFB-NEXT: mv a0, a2
+; RV64SFB-NEXT: ret
+;
+; ZICOND-LABEL: select_srlw:
+; ZICOND: # %bb.0: # %entry
+; ZICOND-NEXT: bnez a3, .LBB18_2
+; ZICOND-NEXT: # %bb.1: # %entry
+; ZICOND-NEXT: srlw a2, a0, a1
+; ZICOND-NEXT: .LBB18_2: # %entry
+; ZICOND-NEXT: mv a0, a2
+; ZICOND-NEXT: ret
+;
+; RV32SFB-LABEL: select_srlw:
+; RV32SFB: # %bb.0: # %entry
+; RV32SFB-NEXT: bnez a3, .LBB18_2
+; RV32SFB-NEXT: # %bb.1: # %entry
+; RV32SFB-NEXT: srl a2, a0, a1
+; RV32SFB-NEXT: .LBB18_2: # %entry
+; RV32SFB-NEXT: mv a0, a2
+; RV32SFB-NEXT: ret
entry:
%0 = lshr i32 %A, %B
%1 = select i1 %cond, i32 %C, i32 %0
@@ -579,14 +747,32 @@ define i32 @select_sraw(i32 %A, i32 %B, i32 %C, i1 zeroext %cond) {
; NOSFB-NEXT: mv a0, a2
; NOSFB-NEXT: ret
;
-; SFB-LABEL: select_sraw:
-; SFB: # %bb.0: # %entry
-; SFB-NEXT: bnez a3, .LBB19_2
-; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: sraw a2, a0, a1
-; SFB-NEXT: .LBB19_2: # %entry
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: ret
+; RV64SFB-LABEL: select_sraw:
+; RV64SFB: # %bb.0: # %entry
+; RV64SFB-NEXT: bnez a3, .LBB19_2
+; RV64SFB-NEXT: # %bb.1: # %entry
+; RV64SFB-NEXT: sraw a2, a0, a1
+; RV64SFB-NEXT: .LBB19_2: # %entry
+; RV64SFB-NEXT: mv a0, a2
+; RV64SFB-NEXT: ret
+;
+; ZICOND-LABEL: select_sraw:
+; ZICOND: # %bb.0: # %entry
+; ZICOND-NEXT: bnez a3, .LBB19_2
+; ZICOND-NEXT: # %bb.1: # %entry
+; ZICOND-NEXT: sraw a2, a0, a1
+; ZICOND-NEXT: .LBB19_2: # %entry
+; ZICOND-NEXT: mv a0, a2
+; ZICOND-NEXT: ret
+;
+; RV32SFB-LABEL: select_sraw:
+; RV32SFB: # %bb.0: # %entry
+; RV32SFB-NEXT: bnez a3, .LBB19_2
+; RV32SFB-NEXT: # %bb.1: # %entry
+; RV32SFB-NEXT: sra a2, a0, a1
+; RV32SFB-NEXT: .LBB19_2: # %entry
+; RV32SFB-NEXT: mv a0, a2
+; RV32SFB-NEXT: ret
entry:
%0 = ashr i32 %A, %B
%1 = select i1 %cond, i32 %C, i32 %0
@@ -603,14 +789,51 @@ define i64 @select_sll(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; NOSFB-NEXT: mv a0, a2
; NOSFB-NEXT: ret
;
-; SFB-LABEL: select_sll:
-; SFB: # %bb.0: # %entry
-; SFB-NEXT: bnez a3, .LBB20_2
-; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: sll a2, a0, a1
-; SFB-NEXT: .LBB20_2: # %entry
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: ret
+; RV64SFB-LABEL: select_sll:
+; RV64SFB: # %bb.0: # %entry
+; RV64SFB-NEXT: bnez a3, .LBB20_2
+; RV64SFB-NEXT: # %bb.1: # %entry
+; RV64SFB-NEXT: sll a2, a0, a1
+; RV64SFB-NEXT: .LBB20_2: # %entry
+; RV64SFB-NEXT: mv a0, a2
+; RV64SFB-NEXT: ret
+;
+; ZICOND-LABEL: select_sll:
+; ZICOND: # %bb.0: # %entry
+; ZICOND-NEXT: bnez a3, .LBB20_2
+; ZICOND-NEXT: # %bb.1: # %entry
+; ZICOND-NEXT: sll a2, a0, a1
+; ZICOND-NEXT: .LBB20_2: # %entry
+; ZICOND-NEXT: mv a0, a2
+; ZICOND-NEXT: ret
+;
+; RV32SFB-LABEL: select_sll:
+; RV32SFB: # %bb.0: # %entry
+; RV32SFB-NEXT: sll a3, a0, a2
+; RV32SFB-NEXT: not a7, a2
+; RV32SFB-NEXT: srli a0, a0, 1
+; RV32SFB-NEXT: sll t0, a1, a2
+; RV32SFB-NEXT: srl a0, a0, a7
+; RV32SFB-NEXT: addi a2, a2, -32
+; RV32SFB-NEXT: mv a1, a3
+; RV32SFB-NEXT: bgez a2, .LBB20_2
+; RV32SFB-NEXT: # %bb.1: # %entry
+; RV32SFB-NEXT: or a1, t0, a0
+; RV32SFB-NEXT: .LBB20_2: # %entry
+; RV32SFB-NEXT: bltz a2, .LBB20_4
+; RV32SFB-NEXT: # %bb.3: # %entry
+; RV32SFB-NEXT: li a3, 0
+; RV32SFB-NEXT: .LBB20_4: # %entry
+; RV32SFB-NEXT: beqz a6, .LBB20_6
+; RV32SFB-NEXT: # %bb.5: # %entry
+; RV32SFB-NEXT: mv a1, a5
+; RV32SFB-NEXT: .LBB20_6: # %entry
+; RV32SFB-NEXT: beqz a6, .LBB20_8
+; RV32SFB-NEXT: # %bb.7: # %entry
+; RV32SFB-NEXT: mv a3, a4
+; RV32SFB-NEXT: .LBB20_8: # %entry
+; RV32SFB-NEXT: mv a0, a3
+; RV32SFB-NEXT: ret
entry:
%0 = shl i64 %A, %B
%1 = select i1 %cond, i64 %C, i64 %0
@@ -627,14 +850,51 @@ define i64 @select_srl(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; NOSFB-NEXT: mv a0, a2
; NOSFB-NEXT: ret
;
-; SFB-LABEL: select_srl:
-; SFB: # %bb.0: # %entry
-; SFB-NEXT: bnez a3, .LBB21_2
-; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: srl a2, a0, a1
-; SFB-NEXT: .LBB21_2: # %entry
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: ret
+; RV64SFB-LABEL: select_srl:
+; RV64SFB: # %bb.0: # %entry
+; RV64SFB-NEXT: bnez a3, .LBB21_2
+; RV64SFB-NEXT: # %bb.1: # %entry
+; RV64SFB-NEXT: srl a2, a0, a1
+; RV64SFB-NEXT: .LBB21_2: # %entry
+; RV64SFB-NEXT: mv a0, a2
+; RV64SFB-NEXT: ret
+;
+; ZICOND-LABEL: select_srl:
+; ZICOND: # %bb.0: # %entry
+; ZICOND-NEXT: bnez a3, .LBB21_2
+; ZICOND-NEXT: # %bb.1: # %entry
+; ZICOND-NEXT: srl a2, a0, a1
+; ZICOND-NEXT: .LBB21_2: # %entry
+; ZICOND-NEXT: mv a0, a2
+; ZICOND-NEXT: ret
+;
+; RV32SFB-LABEL: select_srl:
+; RV32SFB: # %bb.0: # %entry
+; RV32SFB-NEXT: srl a3, a1, a2
+; RV32SFB-NEXT: not a7, a2
+; RV32SFB-NEXT: slli a1, a1, 1
+; RV32SFB-NEXT: srl t0, a0, a2
+; RV32SFB-NEXT: sll a1, a1, a7
+; RV32SFB-NEXT: addi a2, a2, -32
+; RV32SFB-NEXT: mv a0, a3
+; RV32SFB-NEXT: bgez a2, .LBB21_2
+; RV32SFB-NEXT: # %bb.1: # %entry
+; RV32SFB-NEXT: or a0, t0, a1
+; RV32SFB-NEXT: .LBB21_2: # %entry
+; RV32SFB-NEXT: bltz a2, .LBB21_4
+; RV32SFB-NEXT: # %bb.3: # %entry
+; RV32SFB-NEXT: li a3, 0
+; RV32SFB-NEXT: .LBB21_4: # %entry
+; RV32SFB-NEXT: beqz a6, .LBB21_6
+; RV32SFB-NEXT: # %bb.5: # %entry
+; RV32SFB-NEXT: mv a0, a4
+; RV32SFB-NEXT: .LBB21_6: # %entry
+; RV32SFB-NEXT: beqz a6, .LBB21_8
+; RV32SFB-NEXT: # %bb.7: # %entry
+; RV32SFB-NEXT: mv a3, a5
+; RV32SFB-NEXT: .LBB21_8: # %entry
+; RV32SFB-NEXT: mv...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Committed as 7a6fd49 |
We can use short forward branch to conditionally negate if the value is negative.