diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index d0dc1ea888140..0136d91ef2983 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3259,6 +3259,14 @@ class TargetLoweringBase { return false; } + // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) + // A) where y has a single bit set? + virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, + const APInt &AndMask) const { + unsigned ShCt = AndMask.getBitWidth() - 1; + return !shouldAvoidTransformToShift(VT, ShCt); + } + /// Does this target require the clearing of high-order bits in a register /// passed to the fp16 to fp conversion library function. virtual bool shouldKeepZExtForFP16Conv() const { return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2fc9a2866c32d..83a1a8b3181cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -27252,8 +27252,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) { // Shift the tested bit over the sign bit. const APInt &AndMask = ConstAndRHS->getAPIntValue(); - unsigned ShCt = AndMask.getBitWidth() - 1; - if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { + if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) { + unsigned ShCt = AndMask.getBitWidth() - 1; SDValue ShlAmt = DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 05d0d5f964a83..fef0f96670b7c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -19695,6 +19695,13 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return SDValue(); return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created); } + +bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( + EVT VT, const APInt &AndMask) const { + if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) + return AndMask.ugt(1024); + return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); +} namespace llvm::RISCVVIntrinsicsTable { #define GET_RISCVVIntrinsicsTable_IMPL diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 8f3ff4be22a2d..e10db7d441ef2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -959,6 +959,9 @@ class RISCVTargetLowering : public TargetLowering { SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; + + bool shouldFoldSelectWithSingleBitTest(EVT VT, + const APInt &AndMask) const override; }; namespace RISCV { diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll index c405b0ae17178..b9912c6ccfb98 100644 --- a/llvm/test/CodeGen/RISCV/condops.ll +++ b/llvm/test/CodeGen/RISCV/condops.ll @@ -3533,3 +3533,103 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2, } declare void @bat(i16 signext) + +define i64 @single_bit(i64 %x) { +; RV32I-LABEL: single_bit: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a2, a0, 21 +; RV32I-NEXT: srai a2, a2, 31 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: single_bit: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a0, 53 +; RV64I-NEXT: srai a1, a1, 63 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64XVENTANACONDOPS-LABEL: single_bit: +; RV64XVENTANACONDOPS: # %bb.0: # %entry +; RV64XVENTANACONDOPS-NEXT: andi a1, a0, 1024 +; RV64XVENTANACONDOPS-NEXT: vt.maskc a0, a0, a1 +; RV64XVENTANACONDOPS-NEXT: ret +; +; RV64XTHEADCONDMOV-LABEL: single_bit: +; RV64XTHEADCONDMOV: # %bb.0: # %entry +; RV64XTHEADCONDMOV-NEXT: slli a1, a0, 53 +; RV64XTHEADCONDMOV-NEXT: srai a1, a1, 63 +; RV64XTHEADCONDMOV-NEXT: and a0, a1, a0 +; RV64XTHEADCONDMOV-NEXT: ret +; +; RV32ZICOND-LABEL: single_bit: +; RV32ZICOND: # %bb.0: # %entry +; RV32ZICOND-NEXT: andi a2, a0, 1024 +; RV32ZICOND-NEXT: czero.eqz a0, a0, a2 +; RV32ZICOND-NEXT: czero.eqz a1, a1, a2 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: single_bit: +; RV64ZICOND: # %bb.0: # %entry +; RV64ZICOND-NEXT: andi a1, a0, 1024 +; RV64ZICOND-NEXT: czero.eqz a0, a0, a1 +; RV64ZICOND-NEXT: ret +entry: + %and = and i64 %x, 1024 + %tobool.not = icmp eq i64 %and, 0 + %cond = select i1 %tobool.not, i64 0, i64 %x + ret i64 %cond +} + +; Test to fold select with single bit check to (and (sra (shl x))). +define i64 @single_bit2(i64 %x) { +; RV32I-LABEL: single_bit2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a2, a0, 20 +; RV32I-NEXT: srai a2, a2, 31 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: single_bit2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a0, 52 +; RV64I-NEXT: srai a1, a1, 63 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64XVENTANACONDOPS-LABEL: single_bit2: +; RV64XVENTANACONDOPS: # %bb.0: # %entry +; RV64XVENTANACONDOPS-NEXT: slli a1, a0, 52 +; RV64XVENTANACONDOPS-NEXT: srai a1, a1, 63 +; RV64XVENTANACONDOPS-NEXT: and a0, a1, a0 +; RV64XVENTANACONDOPS-NEXT: ret +; +; RV64XTHEADCONDMOV-LABEL: single_bit2: +; RV64XTHEADCONDMOV: # %bb.0: # %entry +; RV64XTHEADCONDMOV-NEXT: slli a1, a0, 52 +; RV64XTHEADCONDMOV-NEXT: srai a1, a1, 63 +; RV64XTHEADCONDMOV-NEXT: and a0, a1, a0 +; RV64XTHEADCONDMOV-NEXT: ret +; +; RV32ZICOND-LABEL: single_bit2: +; RV32ZICOND: # %bb.0: # %entry +; RV32ZICOND-NEXT: slli a2, a0, 20 +; RV32ZICOND-NEXT: srai a2, a2, 31 +; RV32ZICOND-NEXT: and a0, a2, a0 +; RV32ZICOND-NEXT: and a1, a2, a1 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: single_bit2: +; RV64ZICOND: # %bb.0: # %entry +; RV64ZICOND-NEXT: slli a1, a0, 52 +; RV64ZICOND-NEXT: srai a1, a1, 63 +; RV64ZICOND-NEXT: and a0, a1, a0 +; RV64ZICOND-NEXT: ret +entry: + %and = and i64 %x, 2048 + %tobool.not = icmp eq i64 %and, 0 + %cond = select i1 %tobool.not, i64 0, i64 %x + ret i64 %cond +} diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll index d4a6e9e9dbb46..7fa27a307757d 100644 --- a/llvm/test/CodeGen/RISCV/select.ll +++ b/llvm/test/CodeGen/RISCV/select.ll @@ -1,26 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK32,RV32IM %s -; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,RV64IM %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+xventanacondops -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,RV64IMXVTCONDOPS %s -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK32,CHECKZICOND,RV32IMZICOND %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,CHECKZICOND,RV64IMZICOND %s +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32IM %s +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64IM %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+xventanacondops -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64IMXVTCONDOPS %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECKZICOND,RV32IMZICOND %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECKZICOND,RV64IMZICOND %s define i16 @select_xor_1(i16 %A, i8 %cond) { -; CHECK32-LABEL: select_xor_1: -; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: slli a1, a1, 31 -; CHECK32-NEXT: srai a1, a1, 31 -; CHECK32-NEXT: andi a1, a1, 43 -; CHECK32-NEXT: xor a0, a0, a1 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: select_xor_1: -; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: slli a1, a1, 63 -; CHECK64-NEXT: srai a1, a1, 63 -; CHECK64-NEXT: andi a1, a1, 43 -; CHECK64-NEXT: xor a0, a0, a1 -; CHECK64-NEXT: ret +; RV32IM-LABEL: select_xor_1: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a1, a1, 31 +; RV32IM-NEXT: srai a1, a1, 31 +; RV32IM-NEXT: andi a1, a1, 43 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_xor_1: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: slli a1, a1, 63 +; RV64IM-NEXT: srai a1, a1, 63 +; RV64IM-NEXT: andi a1, a1, 43 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_xor_1: +; RV64IMXVTCONDOPS: # %bb.0: # %entry +; RV64IMXVTCONDOPS-NEXT: andi a1, a1, 1 +; RV64IMXVTCONDOPS-NEXT: li a2, 43 +; RV64IMXVTCONDOPS-NEXT: vt.maskc a1, a2, a1 +; RV64IMXVTCONDOPS-NEXT: xor a0, a0, a1 +; RV64IMXVTCONDOPS-NEXT: ret +; +; CHECKZICOND-LABEL: select_xor_1: +; CHECKZICOND: # %bb.0: # %entry +; CHECKZICOND-NEXT: andi a1, a1, 1 +; CHECKZICOND-NEXT: li a2, 43 +; CHECKZICOND-NEXT: czero.eqz a1, a2, a1 +; CHECKZICOND-NEXT: xor a0, a0, a1 +; CHECKZICOND-NEXT: ret entry: %and = and i8 %cond, 1 %cmp10 = icmp eq i8 %and, 0 @@ -72,21 +88,35 @@ entry: } define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { -; CHECK32-LABEL: select_xor_2: -; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: slli a2, a2, 31 -; CHECK32-NEXT: srai a2, a2, 31 -; CHECK32-NEXT: and a1, a2, a1 -; CHECK32-NEXT: xor a0, a0, a1 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: select_xor_2: -; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: slli a2, a2, 63 -; CHECK64-NEXT: srai a2, a2, 63 -; CHECK64-NEXT: and a1, a2, a1 -; CHECK64-NEXT: xor a0, a0, a1 -; CHECK64-NEXT: ret +; RV32IM-LABEL: select_xor_2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a2, a2, 31 +; RV32IM-NEXT: srai a2, a2, 31 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_xor_2: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: slli a2, a2, 63 +; RV64IM-NEXT: srai a2, a2, 63 +; RV64IM-NEXT: and a1, a2, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_xor_2: +; RV64IMXVTCONDOPS: # %bb.0: # %entry +; RV64IMXVTCONDOPS-NEXT: andi a2, a2, 1 +; RV64IMXVTCONDOPS-NEXT: vt.maskc a1, a1, a2 +; RV64IMXVTCONDOPS-NEXT: xor a0, a0, a1 +; RV64IMXVTCONDOPS-NEXT: ret +; +; CHECKZICOND-LABEL: select_xor_2: +; CHECKZICOND: # %bb.0: # %entry +; CHECKZICOND-NEXT: andi a2, a2, 1 +; CHECKZICOND-NEXT: czero.eqz a1, a1, a2 +; CHECKZICOND-NEXT: xor a0, a0, a1 +; CHECKZICOND-NEXT: ret entry: %and = and i8 %cond, 1 %cmp10 = icmp eq i8 %and, 0 @@ -296,21 +326,35 @@ entry: } define i32 @select_or(i32 %A, i32 %B, i8 %cond) { -; CHECK32-LABEL: select_or: -; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: slli a2, a2, 31 -; CHECK32-NEXT: srai a2, a2, 31 -; CHECK32-NEXT: and a1, a2, a1 -; CHECK32-NEXT: or a0, a0, a1 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: select_or: -; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: slli a2, a2, 63 -; CHECK64-NEXT: srai a2, a2, 63 -; CHECK64-NEXT: and a1, a2, a1 -; CHECK64-NEXT: or a0, a0, a1 -; CHECK64-NEXT: ret +; RV32IM-LABEL: select_or: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a2, a2, 31 +; RV32IM-NEXT: srai a2, a2, 31 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_or: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: slli a2, a2, 63 +; RV64IM-NEXT: srai a2, a2, 63 +; RV64IM-NEXT: and a1, a2, a1 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_or: +; RV64IMXVTCONDOPS: # %bb.0: # %entry +; RV64IMXVTCONDOPS-NEXT: andi a2, a2, 1 +; RV64IMXVTCONDOPS-NEXT: vt.maskc a1, a1, a2 +; RV64IMXVTCONDOPS-NEXT: or a0, a0, a1 +; RV64IMXVTCONDOPS-NEXT: ret +; +; CHECKZICOND-LABEL: select_or: +; CHECKZICOND: # %bb.0: # %entry +; CHECKZICOND-NEXT: andi a2, a2, 1 +; CHECKZICOND-NEXT: czero.eqz a1, a1, a2 +; CHECKZICOND-NEXT: or a0, a0, a1 +; CHECKZICOND-NEXT: ret entry: %and = and i8 %cond, 1 %cmp10 = icmp eq i8 %and, 0 @@ -360,21 +404,35 @@ entry: } define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) { -; CHECK32-LABEL: select_or_1: -; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: slli a2, a2, 31 -; CHECK32-NEXT: srai a2, a2, 31 -; CHECK32-NEXT: and a1, a2, a1 -; CHECK32-NEXT: or a0, a0, a1 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: select_or_1: -; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: slli a2, a2, 63 -; CHECK64-NEXT: srai a2, a2, 63 -; CHECK64-NEXT: and a1, a2, a1 -; CHECK64-NEXT: or a0, a0, a1 -; CHECK64-NEXT: ret +; RV32IM-LABEL: select_or_1: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a2, a2, 31 +; RV32IM-NEXT: srai a2, a2, 31 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_or_1: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: slli a2, a2, 63 +; RV64IM-NEXT: srai a2, a2, 63 +; RV64IM-NEXT: and a1, a2, a1 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_or_1: +; RV64IMXVTCONDOPS: # %bb.0: # %entry +; RV64IMXVTCONDOPS-NEXT: andi a2, a2, 1 +; RV64IMXVTCONDOPS-NEXT: vt.maskc a1, a1, a2 +; RV64IMXVTCONDOPS-NEXT: or a0, a0, a1 +; RV64IMXVTCONDOPS-NEXT: ret +; +; CHECKZICOND-LABEL: select_or_1: +; CHECKZICOND: # %bb.0: # %entry +; CHECKZICOND-NEXT: andi a2, a2, 1 +; CHECKZICOND-NEXT: czero.eqz a1, a1, a2 +; CHECKZICOND-NEXT: or a0, a0, a1 +; CHECKZICOND-NEXT: ret entry: %and = and i32 %cond, 1 %cmp10 = icmp eq i32 %and, 0