diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 1b9a5fdf30bf43..a73422ab952640 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2041,15 +2041,62 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { /// SHXADD we are trying to match. bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val) { + if (N.getOpcode() == ISD::AND && isa(N.getOperand(1))) { + SDValue N0 = N.getOperand(0); + + bool LeftShift = N0.getOpcode() == ISD::SHL; + if ((LeftShift || N0.getOpcode() == ISD::SRL) && + isa(N0.getOperand(1))) { + uint64_t Mask = N.getConstantOperandVal(1); + unsigned C2 = N0.getConstantOperandVal(1); + + unsigned XLen = Subtarget->getXLen(); + if (LeftShift) + Mask &= maskTrailingZeros(C2); + else + Mask &= maskTrailingOnes(XLen - C2); + + // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no + // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 + // followed by a SHXADD with c3 for the X amount. + if (isShiftedMask_64(Mask)) { + unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); + unsigned Trailing = countTrailingZeros(Mask); + if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Trailing - C2, DL, VT)), + 0); + return true; + } + // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 + // leading zeros and c3 trailing zeros. We can use an SRLI by C3 + // followed by a SHXADD using c3 for the X amount. + if (!LeftShift && Leading == C2 && Trailing == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue( + CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), + 0); + return true; + } + } + } + } + bool LeftShift = N.getOpcode() == ISD::SHL; if ((LeftShift || N.getOpcode() == ISD::SRL) && isa(N.getOperand(1))) { - unsigned C1 = N.getConstantOperandVal(1); SDValue N0 = N.getOperand(0); if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && isa(N0.getOperand(1))) { uint64_t Mask = N0.getConstantOperandVal(1); if (isShiftedMask_64(Mask)) { + unsigned C1 = N.getConstantOperandVal(1); unsigned XLen = Subtarget->getXLen(); unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); unsigned Trailing = countTrailingZeros(Mask); diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 1b92377865bcf4..4e8f5ad33992a8 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1444,13 +1444,20 @@ define i64 @srliw_4_sh3add(i64* %0, i32 signext %1) { } define signext i32 @srli_1_sh2add(i32* %0, i64 %1) { -; CHECK-LABEL: srli_1_sh2add: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: andi a1, a1, -4 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: ret +; RV64I-LABEL: srli_1_sh2add: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: andi a1, a1, -4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srli_1_sh2add: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a1, a1, 1 +; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: lw a0, 0(a0) +; RV64ZBA-NEXT: ret %3 = lshr i64 %1, 1 %4 = getelementptr inbounds i32, i32* %0, i64 %3 %5 = load i32, i32* %4, align 4 @@ -1458,13 +1465,20 @@ define signext i32 @srli_1_sh2add(i32* %0, i64 %1) { } define i64 @srli_2_sh3add(i64* %0, i64 %1) { -; CHECK-LABEL: srli_2_sh3add: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: andi a1, a1, -8 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ld a0, 0(a0) -; CHECK-NEXT: ret +; RV64I-LABEL: srli_2_sh3add: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: andi a1, a1, -8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srli_2_sh3add: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a1, a1, 2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret %3 = lshr i64 %1, 2 %4 = getelementptr inbounds i64, i64* %0, i64 %3 %5 = load i64, i64* %4, align 8 @@ -1472,13 +1486,20 @@ define i64 @srli_2_sh3add(i64* %0, i64 %1) { } define signext i16 @srli_2_sh1add(i16* %0, i64 %1) { -; CHECK-LABEL: srli_2_sh1add: -; CHECK: # %bb.0: -; CHECK-NEXT: srli a1, a1, 1 -; CHECK-NEXT: andi a1, a1, -2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: lh a0, 0(a0) -; CHECK-NEXT: ret +; RV64I-LABEL: srli_2_sh1add: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a1, 1 +; RV64I-NEXT: andi a1, a1, -2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srli_2_sh1add: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a1, a1, 2 +; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: lh a0, 0(a0) +; RV64ZBA-NEXT: ret %3 = lshr i64 %1, 2 %4 = getelementptr inbounds i16, i16* %0, i64 %3 %5 = load i16, i16* %4, align 2 @@ -1486,13 +1507,20 @@ define signext i16 @srli_2_sh1add(i16* %0, i64 %1) { } define signext i32 @srli_3_sh2add(i32* %0, i64 %1) { -; CHECK-LABEL: srli_3_sh2add: -; CHECK: # %bb.0: -; CHECK-NEXT: srli a1, a1, 1 -; CHECK-NEXT: andi a1, a1, -4 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: ret +; RV64I-LABEL: srli_3_sh2add: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a1, 1 +; RV64I-NEXT: andi a1, a1, -4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srli_3_sh2add: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a1, a1, 3 +; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: lw a0, 0(a0) +; RV64ZBA-NEXT: ret %3 = lshr i64 %1, 3 %4 = getelementptr inbounds i32, i32* %0, i64 %3 %5 = load i32, i32* %4, align 4 @@ -1500,13 +1528,20 @@ define signext i32 @srli_3_sh2add(i32* %0, i64 %1) { } define i64 @srli_4_sh3add(i64* %0, i64 %1) { -; CHECK-LABEL: srli_4_sh3add: -; CHECK: # %bb.0: -; CHECK-NEXT: srli a1, a1, 1 -; CHECK-NEXT: andi a1, a1, -8 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ld a0, 0(a0) -; CHECK-NEXT: ret +; RV64I-LABEL: srli_4_sh3add: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a1, 1 +; RV64I-NEXT: andi a1, a1, -8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srli_4_sh3add: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a1, a1, 4 +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret %3 = lshr i64 %1, 4 %4 = getelementptr inbounds i64, i64* %0, i64 %3 %5 = load i64, i64* %4, align 8