diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index dc00fdfc2146a..403bd727684f6 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -438,6 +438,39 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { return Res; } +InstSeq generateTwoRegInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures, + unsigned &ShiftAmt, unsigned &AddOpc) { + int64_t LoVal = SignExtend64<32>(Val); + if (LoVal == 0) + return RISCVMatInt::InstSeq(); + + // Subtract the LoVal to emulate the effect of the final ADD. + uint64_t Tmp = (uint64_t)Val - (uint64_t)LoVal; + assert(Tmp != 0); + + // Use trailing zero counts to figure how far we need to shift LoVal to line + // up with the remaining constant. + // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the + // final constant come from LoVal. + unsigned TzLo = llvm::countr_zero((uint64_t)LoVal); + unsigned TzHi = llvm::countr_zero(Tmp); + assert(TzLo < 32 && TzHi >= 32); + ShiftAmt = TzHi - TzLo; + AddOpc = RISCV::ADD; + + if (Tmp == ((uint64_t)LoVal << ShiftAmt)) + return RISCVMatInt::generateInstSeq(LoVal, ActiveFeatures); + + // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)). + if (ActiveFeatures[RISCV::FeatureStdExtZba] && Lo_32(Val) == Hi_32(Val)) { + ShiftAmt = 32; + AddOpc = RISCV::ADD_UW; + return RISCVMatInt::generateInstSeq(LoVal, ActiveFeatures); + } + + return RISCVMatInt::InstSeq(); +} + int getIntMatCost(const APInt &Val, unsigned Size, const FeatureBitset &ActiveFeatures, bool CompressionCost) { bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h index ae7b8d402184d..072b30f2a0648 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -48,6 +48,14 @@ using InstSeq = SmallVector; // instruction selection. InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures); +// Helper to generate an instruction sequence that can materialize the given +// immediate value into a register using an additional temporary register. This +// handles cases where the constant can be generated by (ADD (SLLI X, C), X) or +// (ADD_UW (SLLI X, C) X). The sequence to generate X is returned. ShiftAmt is +// provides the SLLI and AddOpc indicates ADD or ADD_UW. +InstSeq generateTwoRegInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures, + unsigned &ShiftAmt, unsigned &AddOpc); + // Helper to estimate the number of instructions required to materialise the // given immediate value into a register. This estimate does not account for // `Val` possibly fitting into an immediate, and so may over-estimate. diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index e90b8d49a0968..16faaa8eeb6bc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -195,29 +195,23 @@ static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); - // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at + // See if we can create this constant as (ADD (SLLI X, C), X) where X is at // worst an LUI+ADDIW. This will require an extra register, but avoids a // constant pool. // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where // low and high 32 bits are the same and bit 31 and 63 are set. if (Seq.size() > 3) { - int64_t LoVal = SignExtend64<32>(Imm); - int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32); - if (LoVal == HiVal || - (Subtarget.hasStdExtZba() && Lo_32(Imm) == Hi_32(Imm))) { - RISCVMatInt::InstSeq SeqLo = - RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits()); - if ((SeqLo.size() + 2) < Seq.size()) { - SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); - - SDValue SLLI = SDValue( - CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, - CurDAG->getTargetConstant(32, DL, VT)), - 0); - // Prefer ADD when possible. - unsigned AddOpc = (LoVal == HiVal) ? RISCV::ADD : RISCV::ADD_UW; - return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0); - } + unsigned ShiftAmt, AddOpc; + RISCVMatInt::InstSeq SeqLo = RISCVMatInt::generateTwoRegInstSeq( + Imm, Subtarget.getFeatureBits(), ShiftAmt, AddOpc); + if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) { + SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); + + SDValue SLLI = SDValue( + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, + CurDAG->getTargetConstant(ShiftAmt, DL, VT)), + 0); + return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0); } } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a044aa7f4a8d2..e47168e10d8dc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4972,24 +4972,22 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) return Op; - // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do + // Optimizations below are disabled for opt size. If we're optimizing for + // size, use a constant pool. + if (DAG.shouldOptForSize()) + return SDValue(); + + // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do // that if it will avoid a constant pool. // It will require an extra temporary register though. // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where // low and high 32 bits are the same and bit 31 and 63 are set. - if (!DAG.shouldOptForSize()) { - int64_t LoVal = SignExtend64<32>(Imm); - int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32); - if (LoVal == HiVal || - (Subtarget.hasStdExtZba() && Lo_32(Imm) == Hi_32(Imm))) { - RISCVMatInt::InstSeq SeqLo = - RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits()); - if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) - return Op; - } - } + unsigned ShiftAmt, AddOpc; + RISCVMatInt::InstSeq SeqLo = RISCVMatInt::generateTwoRegInstSeq( + Imm, Subtarget.getFeatureBits(), ShiftAmt, AddOpc); + if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) + return Op; - // Expand to a constant pool using the default expansion code. return SDValue(); } diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll index ae66af7267975..738318e4bd677 100644 --- a/llvm/test/CodeGen/RISCV/imm.ll +++ b/llvm/test/CodeGen/RISCV/imm.ll @@ -1157,60 +1157,44 @@ define i64 @imm_2reg_1() nounwind { ; RV32I-NEXT: lui a1, 983040 ; RV32I-NEXT: ret ; -; RV64-NOPOOL-LABEL: imm_2reg_1: -; RV64-NOPOOL: # %bb.0: -; RV64-NOPOOL-NEXT: lui a0, 1048430 -; RV64-NOPOOL-NEXT: addiw a0, a0, 1493 -; RV64-NOPOOL-NEXT: slli a0, a0, 13 -; RV64-NOPOOL-NEXT: addi a0, a0, -1921 -; RV64-NOPOOL-NEXT: srli a0, a0, 4 -; RV64-NOPOOL-NEXT: not a0, a0 -; RV64-NOPOOL-NEXT: ret -; -; RV64I-POOL-LABEL: imm_2reg_1: -; RV64I-POOL: # %bb.0: -; RV64I-POOL-NEXT: lui a0, %hi(.LCPI27_0) -; RV64I-POOL-NEXT: ld a0, %lo(.LCPI27_0)(a0) -; RV64I-POOL-NEXT: ret +; RV64I-LABEL: imm_2reg_1: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 74565 +; RV64I-NEXT: addiw a0, a0, 1656 +; RV64I-NEXT: slli a1, a0, 57 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret ; ; RV64IZBA-LABEL: imm_2reg_1: ; RV64IZBA: # %bb.0: -; RV64IZBA-NEXT: lui a0, 1048430 -; RV64IZBA-NEXT: addiw a0, a0, 1493 -; RV64IZBA-NEXT: slli a0, a0, 13 -; RV64IZBA-NEXT: addi a0, a0, -1921 -; RV64IZBA-NEXT: srli a0, a0, 4 -; RV64IZBA-NEXT: not a0, a0 +; RV64IZBA-NEXT: lui a0, 74565 +; RV64IZBA-NEXT: addiw a0, a0, 1656 +; RV64IZBA-NEXT: slli a1, a0, 57 +; RV64IZBA-NEXT: add a0, a0, a1 ; RV64IZBA-NEXT: ret ; ; RV64IZBB-LABEL: imm_2reg_1: ; RV64IZBB: # %bb.0: -; RV64IZBB-NEXT: lui a0, 1048430 -; RV64IZBB-NEXT: addiw a0, a0, 1493 -; RV64IZBB-NEXT: slli a0, a0, 13 -; RV64IZBB-NEXT: addi a0, a0, -1921 -; RV64IZBB-NEXT: srli a0, a0, 4 -; RV64IZBB-NEXT: not a0, a0 +; RV64IZBB-NEXT: lui a0, 74565 +; RV64IZBB-NEXT: addiw a0, a0, 1656 +; RV64IZBB-NEXT: slli a1, a0, 57 +; RV64IZBB-NEXT: add a0, a0, a1 ; RV64IZBB-NEXT: ret ; ; RV64IZBS-LABEL: imm_2reg_1: ; RV64IZBS: # %bb.0: -; RV64IZBS-NEXT: lui a0, 1048430 -; RV64IZBS-NEXT: addiw a0, a0, 1493 -; RV64IZBS-NEXT: slli a0, a0, 13 -; RV64IZBS-NEXT: addi a0, a0, -1921 -; RV64IZBS-NEXT: srli a0, a0, 4 -; RV64IZBS-NEXT: not a0, a0 +; RV64IZBS-NEXT: lui a0, 74565 +; RV64IZBS-NEXT: addiw a0, a0, 1656 +; RV64IZBS-NEXT: slli a1, a0, 57 +; RV64IZBS-NEXT: add a0, a0, a1 ; RV64IZBS-NEXT: ret ; ; RV64IXTHEADBB-LABEL: imm_2reg_1: ; RV64IXTHEADBB: # %bb.0: -; RV64IXTHEADBB-NEXT: lui a0, 1048430 -; RV64IXTHEADBB-NEXT: addiw a0, a0, 1493 -; RV64IXTHEADBB-NEXT: slli a0, a0, 13 -; RV64IXTHEADBB-NEXT: addi a0, a0, -1921 -; RV64IXTHEADBB-NEXT: srli a0, a0, 4 -; RV64IXTHEADBB-NEXT: not a0, a0 +; RV64IXTHEADBB-NEXT: lui a0, 74565 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 1656 +; RV64IXTHEADBB-NEXT: slli a1, a0, 57 +; RV64IXTHEADBB-NEXT: add a0, a0, a1 ; RV64IXTHEADBB-NEXT: ret ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678 } @@ -1703,40 +1687,28 @@ define i64 @imm_neg_9223372034778874949() { ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: ret ; -; RV64-NOPOOL-LABEL: imm_neg_9223372034778874949: -; RV64-NOPOOL: # %bb.0: -; RV64-NOPOOL-NEXT: lui a0, 1048329 -; RV64-NOPOOL-NEXT: addiw a0, a0, -1911 -; RV64-NOPOOL-NEXT: slli a0, a0, 12 -; RV64-NOPOOL-NEXT: addi a0, a0, -1911 -; RV64-NOPOOL-NEXT: srli a0, a0, 1 -; RV64-NOPOOL-NEXT: not a0, a0 -; RV64-NOPOOL-NEXT: ret -; -; RV64I-POOL-LABEL: imm_neg_9223372034778874949: -; RV64I-POOL: # %bb.0: -; RV64I-POOL-NEXT: lui a0, %hi(.LCPI38_0) -; RV64I-POOL-NEXT: ld a0, %lo(.LCPI38_0)(a0) -; RV64I-POOL-NEXT: ret +; RV64I-LABEL: imm_neg_9223372034778874949: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 506812 +; RV64I-NEXT: addiw a0, a0, -1093 +; RV64I-NEXT: slli a1, a0, 63 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret ; ; RV64IZBA-LABEL: imm_neg_9223372034778874949: ; RV64IZBA: # %bb.0: -; RV64IZBA-NEXT: lui a0, 1048329 -; RV64IZBA-NEXT: addiw a0, a0, -1911 -; RV64IZBA-NEXT: slli a0, a0, 12 -; RV64IZBA-NEXT: addi a0, a0, -1911 -; RV64IZBA-NEXT: srli a0, a0, 1 -; RV64IZBA-NEXT: not a0, a0 +; RV64IZBA-NEXT: lui a0, 506812 +; RV64IZBA-NEXT: addiw a0, a0, -1093 +; RV64IZBA-NEXT: slli a1, a0, 63 +; RV64IZBA-NEXT: add a0, a0, a1 ; RV64IZBA-NEXT: ret ; ; RV64IZBB-LABEL: imm_neg_9223372034778874949: ; RV64IZBB: # %bb.0: -; RV64IZBB-NEXT: lui a0, 1048329 -; RV64IZBB-NEXT: addiw a0, a0, -1911 -; RV64IZBB-NEXT: slli a0, a0, 12 -; RV64IZBB-NEXT: addi a0, a0, -1911 -; RV64IZBB-NEXT: srli a0, a0, 1 -; RV64IZBB-NEXT: not a0, a0 +; RV64IZBB-NEXT: lui a0, 506812 +; RV64IZBB-NEXT: addiw a0, a0, -1093 +; RV64IZBB-NEXT: slli a1, a0, 63 +; RV64IZBB-NEXT: add a0, a0, a1 ; RV64IZBB-NEXT: ret ; ; RV64IZBS-LABEL: imm_neg_9223372034778874949: @@ -1748,12 +1720,10 @@ define i64 @imm_neg_9223372034778874949() { ; ; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949: ; RV64IXTHEADBB: # %bb.0: -; RV64IXTHEADBB-NEXT: lui a0, 1048329 -; RV64IXTHEADBB-NEXT: addiw a0, a0, -1911 -; RV64IXTHEADBB-NEXT: slli a0, a0, 12 -; RV64IXTHEADBB-NEXT: addi a0, a0, -1911 -; RV64IXTHEADBB-NEXT: srli a0, a0, 1 -; RV64IXTHEADBB-NEXT: not a0, a0 +; RV64IXTHEADBB-NEXT: lui a0, 506812 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093 +; RV64IXTHEADBB-NEXT: slli a1, a0, 63 +; RV64IXTHEADBB-NEXT: add a0, a0, a1 ; RV64IXTHEADBB-NEXT: ret ret i64 -9223372034778874949 ; 0x800000007bbbbbbb } @@ -1932,29 +1902,26 @@ define i64 @imm_9223372034904144827() { ; ; RV64I-LABEL: imm_9223372034904144827: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a0, 1048343 -; RV64I-NEXT: addiw a0, a0, 1911 -; RV64I-NEXT: slli a0, a0, 12 -; RV64I-NEXT: addi a0, a0, 1911 -; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: lui a0, 572348 +; RV64I-NEXT: addiw a0, a0, -1093 +; RV64I-NEXT: slli a1, a0, 63 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IZBA-LABEL: imm_9223372034904144827: ; RV64IZBA: # %bb.0: -; RV64IZBA-NEXT: lui a0, 1048343 -; RV64IZBA-NEXT: addiw a0, a0, 1911 -; RV64IZBA-NEXT: slli a0, a0, 12 -; RV64IZBA-NEXT: addi a0, a0, 1911 -; RV64IZBA-NEXT: srli a0, a0, 1 +; RV64IZBA-NEXT: lui a0, 572348 +; RV64IZBA-NEXT: addiw a0, a0, -1093 +; RV64IZBA-NEXT: slli a1, a0, 63 +; RV64IZBA-NEXT: add a0, a0, a1 ; RV64IZBA-NEXT: ret ; ; RV64IZBB-LABEL: imm_9223372034904144827: ; RV64IZBB: # %bb.0: -; RV64IZBB-NEXT: lui a0, 1048343 -; RV64IZBB-NEXT: addiw a0, a0, 1911 -; RV64IZBB-NEXT: slli a0, a0, 12 -; RV64IZBB-NEXT: addi a0, a0, 1911 -; RV64IZBB-NEXT: srli a0, a0, 1 +; RV64IZBB-NEXT: lui a0, 572348 +; RV64IZBB-NEXT: addiw a0, a0, -1093 +; RV64IZBB-NEXT: slli a1, a0, 63 +; RV64IZBB-NEXT: add a0, a0, a1 ; RV64IZBB-NEXT: ret ; ; RV64IZBS-LABEL: imm_9223372034904144827: @@ -1966,11 +1933,10 @@ define i64 @imm_9223372034904144827() { ; ; RV64IXTHEADBB-LABEL: imm_9223372034904144827: ; RV64IXTHEADBB: # %bb.0: -; RV64IXTHEADBB-NEXT: lui a0, 1048343 -; RV64IXTHEADBB-NEXT: addiw a0, a0, 1911 -; RV64IXTHEADBB-NEXT: slli a0, a0, 12 -; RV64IXTHEADBB-NEXT: addi a0, a0, 1911 -; RV64IXTHEADBB-NEXT: srli a0, a0, 1 +; RV64IXTHEADBB-NEXT: lui a0, 572348 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093 +; RV64IXTHEADBB-NEXT: slli a1, a0, 63 +; RV64IXTHEADBB-NEXT: add a0, a0, a1 ; RV64IXTHEADBB-NEXT: ret ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb } diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll index e8d3ec1b128ec..07a4c093f06e6 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll @@ -61,11 +61,10 @@ define i64 @orcb64_knownbits(i64 %a) nounwind { ; RV64ZBB-NEXT: lui a1, 65535 ; RV64ZBB-NEXT: slli a1, a1, 12 ; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: lui a1, 131073 -; RV64ZBB-NEXT: slli a1, a1, 13 -; RV64ZBB-NEXT: addi a1, a1, 1 -; RV64ZBB-NEXT: slli a1, a1, 20 -; RV64ZBB-NEXT: addi a1, a1, 8 +; RV64ZBB-NEXT: lui a1, 256 +; RV64ZBB-NEXT: addiw a1, a1, 8 +; RV64ZBB-NEXT: slli a2, a1, 42 +; RV64ZBB-NEXT: add a1, a1, a2 ; RV64ZBB-NEXT: or a0, a0, a1 ; RV64ZBB-NEXT: orc.b a0, a0 ; RV64ZBB-NEXT: ret