Skip to content

Commit

Permalink
[DAG][ARM][MIPS][RISCV] Improve funnel shift promotion to use 'double…
Browse files Browse the repository at this point in the history
… shift' patterns

Based on a discussion on D88783, if we're promoting a funnel shift to a width at least twice the size as the original type, then we can use the 'double shift' patterns (shifting the concatenated sources).

Differential Revision: https://reviews.llvm.org/D89139
  • Loading branch information
RKSimon committed Oct 12, 2020
1 parent 1968a61 commit c252200
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 55 deletions.
41 changes: 29 additions & 12 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1129,27 +1129,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Lo = GetPromotedInteger(N->getOperand(1));
SDValue Amount = GetPromotedInteger(N->getOperand(2));

unsigned OldBits = N->getOperand(0).getScalarValueSizeInBits();
unsigned NewBits = Hi.getScalarValueSizeInBits();

// Shift Lo up to occupy the upper bits of the promoted type.
SDLoc DL(N);
EVT OldVT = N->getOperand(0).getValueType();
EVT VT = Lo.getValueType();
Lo = DAG.getNode(ISD::SHL, DL, VT, Lo,
DAG.getConstant(NewBits - OldBits, DL, VT));
unsigned Opcode = N->getOpcode();
bool IsFSHR = Opcode == ISD::FSHR;
unsigned OldBits = OldVT.getScalarSizeInBits();
unsigned NewBits = VT.getScalarSizeInBits();

// Amount has to be interpreted modulo the old bit width.
Amount =
DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));

unsigned Opcode = N->getOpcode();
if (Opcode == ISD::FSHR) {
// Increase Amount to shift the result into the lower bits of the promoted
// type.
Amount = DAG.getNode(ISD::ADD, DL, VT, Amount,
DAG.getConstant(NewBits - OldBits, DL, VT));
// If the promoted type is twice the size (or more), then we use the
// traditional funnel 'double' shift codegen. This isn't necessary if the
// shift amount is constant.
// fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
// fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
!TLI.isOperationLegalOrCustom(Opcode, VT)) {
SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
if (!IsFSHR)
Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
return Res;
}

// Shift Lo up to occupy the upper bits of the promoted type.
SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);

// Increase Amount to shift the result into the lower bits of the promoted
// type.
if (IsFSHR)
Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);

return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
}

Expand Down
23 changes: 7 additions & 16 deletions llvm/test/CodeGen/ARM/funnel-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
; CHECK-LABEL: fshl_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: and r2, r2, #15
; CHECK-NEXT: mov r3, #31
; CHECK-NEXT: lsl r1, r1, #16
; CHECK-NEXT: bic r3, r3, r2
; CHECK-NEXT: lsl r0, r0, r2
; CHECK-NEXT: lsr r1, r1, #1
; CHECK-NEXT: orr r0, r0, r1, lsr r3
; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16
; CHECK-NEXT: and r1, r2, #15
; CHECK-NEXT: lsl r0, r0, r1
; CHECK-NEXT: lsr r0, r0, #16
; CHECK-NEXT: bx lr
%f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
ret i16 %f
Expand Down Expand Up @@ -188,15 +185,9 @@ define i8 @fshl_i8_const_fold() {
define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
; CHECK-LABEL: fshr_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r3, #1
; CHECK-NEXT: lsl r0, r0, #1
; CHECK-NEXT: bfi r2, r3, #4, #28
; CHECK-NEXT: mov r3, #31
; CHECK-NEXT: bic r3, r3, r2
; CHECK-NEXT: and r2, r2, #31
; CHECK-NEXT: lsl r1, r1, #16
; CHECK-NEXT: lsl r0, r0, r3
; CHECK-NEXT: orr r0, r0, r1, lsr r2
; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16
; CHECK-NEXT: and r1, r2, #15
; CHECK-NEXT: lsr r0, r0, r1
; CHECK-NEXT: bx lr
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
ret i16 %f
Expand Down
25 changes: 10 additions & 15 deletions llvm/test/CodeGen/Mips/funnel-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,13 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
; CHECK-LABEL: fshl_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: andi $1, $6, 15
; CHECK-NEXT: sllv $2, $4, $1
; CHECK-NEXT: sll $3, $5, 16
; CHECK-NEXT: srl $3, $3, 1
; CHECK-NEXT: not $1, $1
; CHECK-NEXT: andi $1, $1, 31
; CHECK-NEXT: srlv $1, $3, $1
; CHECK-NEXT: andi $1, $5, 65535
; CHECK-NEXT: sll $2, $4, 16
; CHECK-NEXT: or $1, $2, $1
; CHECK-NEXT: andi $2, $6, 15
; CHECK-NEXT: sllv $1, $1, $2
; CHECK-NEXT: jr $ra
; CHECK-NEXT: or $2, $2, $1
; CHECK-NEXT: srl $2, $1, 16
%f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
ret i16 %f
}
Expand Down Expand Up @@ -288,15 +286,12 @@ define i8 @fshl_i8_const_fold() {
define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
; CHECK-LABEL: fshr_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sll $1, $5, 16
; CHECK-NEXT: andi $1, $5, 65535
; CHECK-NEXT: sll $2, $4, 16
; CHECK-NEXT: or $1, $2, $1
; CHECK-NEXT: andi $2, $6, 15
; CHECK-NEXT: ori $3, $2, 16
; CHECK-NEXT: srlv $1, $1, $3
; CHECK-NEXT: sll $3, $4, 1
; CHECK-NEXT: xori $2, $2, 15
; CHECK-NEXT: sllv $2, $3, $2
; CHECK-NEXT: jr $ra
; CHECK-NEXT: or $2, $2, $1
; CHECK-NEXT: srlv $2, $1, $2
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
ret i16 %f
}
Expand Down
21 changes: 9 additions & 12 deletions llvm/test/CodeGen/RISCV/rv64Zbt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,13 @@ declare i32 @llvm.fshl.i32(i32, i32, i32)
define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
; RV64I-LABEL: fshl_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a2, a2, 31
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: not a2, a2
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: srli a1, a1, 1
; RV64I-NEXT: srl a1, a1, a2
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: andi a1, a2, 31
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: srai a0, a0, 32
; RV64I-NEXT: ret
;
; RV64IB-LABEL: fshl_i32:
Expand Down Expand Up @@ -162,14 +161,12 @@ declare i32 @llvm.fshr.i32(i32, i32, i32)
define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
; RV64I-LABEL: fshr_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: andi a2, a2, 31
; RV64I-NEXT: ori a3, a2, 32
; RV64I-NEXT: srl a1, a1, a3
; RV64I-NEXT: slli a0, a0, 1
; RV64I-NEXT: xori a2, a2, 31
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: andi a1, a2, 31
; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
;
Expand Down

0 comments on commit c252200

Please sign in to comment.