Skip to content

Commit

Permalink
[DAG] MatchRotate - support rotate-by-constant of illegal types
Browse files Browse the repository at this point in the history
Patch to fix some of the regressions in D77804.

By folding to rotate/funnel-shift by constant amounts for illegal types, we prevent SimplifyDemandedBits from destroying the patterns prematurely, allowing us to use the rotate/funnel-shift legalization that was added in D112443.

Differential Revision: https://reviews.llvm.org/D113192
  • Loading branch information
Simon Pilgrim committed Nov 19, 2021
1 parent bf55b9f commit 812e64e
Show file tree
Hide file tree
Showing 13 changed files with 367 additions and 382 deletions.
25 changes: 18 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -6981,11 +6981,13 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
EVT VT = LHS.getValueType();

// The target must have at least one rotate/funnel flavor.
// We still try to match rotate by constant pre-legalization.
// TODO: Support pre-legalization funnel-shift by constant.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();

// Check for truncated rotate.
Expand Down Expand Up @@ -7038,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.

// TODO: Support pre-legalization funnel-shift by constant.
bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
if (!IsRotate && !(HasFSHL || HasFSHR))
return SDValue(); // Requires funnel shift support.
Expand Down Expand Up @@ -7066,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;
if (IsRotate && (HasROTL || HasROTR))
Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
HasROTL ? LHSShiftAmt : RHSShiftAmt);
else
Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
bool UseROTL = !LegalOperations || HasROTL;
Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
UseROTL ? LHSShiftAmt : RHSShiftAmt);
} else {
bool UseFSHL = !LegalOperations || HasFSHL;
Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
}

// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
Expand All @@ -7095,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return Res;
}

// Even pre-legalization, we can't easily rotate/funnel-shift by a variable
// shift.
if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();

// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/ARM/rotate.ll
Expand Up @@ -7,8 +7,8 @@ define <2 x i64> @testcase(<2 x i64>* %in) {
; CHECK-LABEL: testcase:
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vshl.i64 q9, q8, #56
; CHECK-NEXT: vshr.u64 q8, q8, #8
; CHECK-NEXT: vshr.u64 q9, q8, #8
; CHECK-NEXT: vshl.i64 q8, q8, #56
; CHECK-NEXT: vorr q0, q8, q9
; CHECK-NEXT: bx lr
%1 = load <2 x i64>, <2 x i64>* %in
Expand Down
25 changes: 9 additions & 16 deletions llvm/test/CodeGen/PowerPC/vector-rotates.ll
Expand Up @@ -110,23 +110,16 @@ define <2 x i64> @rotl_v2i64(<2 x i64> %a) {
;
; CHECK-P7-LABEL: rotl_v2i64:
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addi r3, r1, -48
; CHECK-P7-NEXT: stxvd2x vs34, 0, r3
; CHECK-P7-NEXT: ld r3, -40(r1)
; CHECK-P7-NEXT: sldi r4, r3, 53
; CHECK-P7-NEXT: rldicl r3, r3, 53, 11
; CHECK-P7-NEXT: std r4, -8(r1)
; CHECK-P7-NEXT: ld r4, -48(r1)
; CHECK-P7-NEXT: sldi r5, r4, 41
; CHECK-P7-NEXT: rldicl r4, r4, 41, 23
; CHECK-P7-NEXT: std r5, -16(r1)
; CHECK-P7-NEXT: addi r5, r1, -16
; CHECK-P7-NEXT: lxvw4x vs0, 0, r5
; CHECK-P7-NEXT: std r3, -24(r1)
; CHECK-P7-NEXT: addi r3, r1, -32
; CHECK-P7-NEXT: std r4, -32(r1)
; CHECK-P7-NEXT: lxvw4x vs1, 0, r3
; CHECK-P7-NEXT: xxlor vs34, vs0, vs1
; CHECK-P7-NEXT: stxvd2x vs34, 0, r3
; CHECK-P7-NEXT: ld r3, -24(r1)
; CHECK-P7-NEXT: rotldi r3, r3, 53
; CHECK-P7-NEXT: std r3, -8(r1)
; CHECK-P7-NEXT: ld r3, -32(r1)
; CHECK-P7-NEXT: rotldi r3, r3, 41
; CHECK-P7-NEXT: std r3, -16(r1)
; CHECK-P7-NEXT: addi r3, r1, -16
; CHECK-P7-NEXT: lxvd2x vs34, 0, r3
; CHECK-P7-NEXT: blr
entry:
%b = shl <2 x i64> %a, <i64 41, i64 53>
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/rv32zbp.ll
Expand Up @@ -1642,9 +1642,9 @@ define i64 @grev8_i64(i64 %a) nounwind {
define i32 @grev16_i32(i32 %a) nounwind {
; RV32I-LABEL: grev16_i32:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 16
; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: ret
;
; RV32ZBP-LABEL: grev16_i32:
Expand Down Expand Up @@ -2045,12 +2045,12 @@ define signext i32 @grev16_i32_fshr(i32 signext %a) nounwind {
define i64 @grev16_i64(i64 %a) nounwind {
; RV32I-LABEL: grev16_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a2, a1, 16
; RV32I-NEXT: srli a3, a0, 16
; RV32I-NEXT: srli a2, a0, 16
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: srli a1, a1, 16
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: srli a2, a1, 16
; RV32I-NEXT: slli a1, a1, 16
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: ret
;
; RV32ZBP-LABEL: grev16_i64:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/RISCV/rv64zbp.ll
Expand Up @@ -1853,14 +1853,14 @@ define i64 @grev8_i64(i64 %a) nounwind {
define signext i32 @grev16_i32(i32 signext %a) nounwind {
; RV64I-LABEL: grev16_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slliw a1, a0, 16
; RV64I-NEXT: srliw a0, a0, 16
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: srliw a1, a0, 16
; RV64I-NEXT: slliw a0, a0, 16
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBP-LABEL: grev16_i32:
; RV64ZBP: # %bb.0:
; RV64ZBP-NEXT: greviw a0, a0, 16
; RV64ZBP-NEXT: roriw a0, a0, 16
; RV64ZBP-NEXT: ret
%shl = shl i32 %a, 16
%shr = lshr i32 %a, 16
Expand Down Expand Up @@ -1935,9 +1935,9 @@ define i64 @grev16_i64(i64 %a) nounwind {
define i64 @grev32(i64 %a) nounwind {
; RV64I-LABEL: grev32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: srli a1, a0, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBP-LABEL: grev32:
Expand Down
28 changes: 12 additions & 16 deletions llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
Expand Up @@ -367,19 +367,17 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV32-NEXT: neg a4, a2
; RV32-NEXT: neg a5, a0
; RV32-NEXT: sw a5, 0(s0)
; RV32-NEXT: andi a4, a4, 7
; RV32-NEXT: sb a4, 12(s0)
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: sub a0, a0, a3
; RV32-NEXT: sw a0, 4(s0)
; RV32-NEXT: slli a0, a2, 2
; RV32-NEXT: srli a2, a4, 30
; RV32-NEXT: sub a2, a2, a0
; RV32-NEXT: andi a2, a2, 7
; RV32-NEXT: sb a2, 12(s0)
; RV32-NEXT: srli a2, a1, 31
; RV32-NEXT: srli a0, a1, 31
; RV32-NEXT: andi a1, a1, 1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: or a1, a2, a1
; RV32-NEXT: sub a0, a1, a0
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: slli a1, a2, 2
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: sw a0, 8(s0)
; RV32-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
Expand Down Expand Up @@ -541,19 +539,17 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV32M-NEXT: neg a4, a2
; RV32M-NEXT: neg a5, a0
; RV32M-NEXT: sw a5, 0(s0)
; RV32M-NEXT: andi a4, a4, 7
; RV32M-NEXT: sb a4, 12(s0)
; RV32M-NEXT: slli a3, a3, 1
; RV32M-NEXT: sub a0, a0, a3
; RV32M-NEXT: sw a0, 4(s0)
; RV32M-NEXT: slli a0, a2, 2
; RV32M-NEXT: srli a2, a4, 30
; RV32M-NEXT: sub a2, a2, a0
; RV32M-NEXT: andi a2, a2, 7
; RV32M-NEXT: sb a2, 12(s0)
; RV32M-NEXT: srli a2, a1, 31
; RV32M-NEXT: srli a0, a1, 31
; RV32M-NEXT: andi a1, a1, 1
; RV32M-NEXT: slli a1, a1, 1
; RV32M-NEXT: or a1, a2, a1
; RV32M-NEXT: sub a0, a1, a0
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: slli a1, a2, 2
; RV32M-NEXT: sub a0, a0, a1
; RV32M-NEXT: sw a0, 8(s0)
; RV32M-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/rotate-extract-vector.ll
Expand Up @@ -132,18 +132,18 @@ define <4 x i32> @vrolw_extract_mul_with_mask(<4 x i32> %i) nounwind {
define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind {
; X86-LABEL: illegal_no_extract_mul:
; X86: # %bb.0:
; X86-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1
; X86-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
; X86-NEXT: vpsrlw $10, %zmm0, %zmm0
; X86-NEXT: vporq %zmm0, %zmm1, %zmm0
; X86-NEXT: vpsrlw $10, %zmm0, %zmm1
; X86-NEXT: vpsllw $6, %zmm0, %zmm0
; X86-NEXT: vporq %zmm1, %zmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: illegal_no_extract_mul:
; X64: # %bb.0:
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; X64-NEXT: vpsrlw $10, %zmm0, %zmm0
; X64-NEXT: vporq %zmm0, %zmm1, %zmm0
; X64-NEXT: vpsrlw $10, %zmm0, %zmm1
; X64-NEXT: vpsllw $6, %zmm0, %zmm0
; X64-NEXT: vporq %zmm1, %zmm0, %zmm0
; X64-NEXT: retq
%lhs_mul = mul <32 x i16> %i, <i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640, i16 640>
%rhs_mul = mul <32 x i16> %i, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
Expand Down
31 changes: 12 additions & 19 deletions llvm/test/CodeGen/X86/rotate-extract.ll
Expand Up @@ -12,13 +12,13 @@
define i64 @rolq_extract_shl(i64 %i) nounwind {
; X86-LABEL: rolq_extract_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: leal (,%edx,8), %eax
; X86-NEXT: shldl $10, %ecx, %edx
; X86-NEXT: shll $10, %ecx
; X86-NEXT: shrl $25, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shldl $3, %edx, %ecx
; X86-NEXT: shll $3, %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: shldl $7, %ecx, %eax
; X86-NEXT: shrdl $25, %ecx, %edx
; X86-NEXT: retl
;
; X64-LABEL: rolq_extract_shl:
Expand Down Expand Up @@ -105,21 +105,14 @@ define i8 @rolb_extract_udiv(i8 %i) nounwind {
define i64 @rolq_extract_mul_with_mask(i64 %i) nounwind {
; X86-LABEL: rolq_extract_mul_with_mask:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shll $7, %ecx
; X86-NEXT: leal (%ecx,%ecx,8), %ecx
; X86-NEXT: movl $9, %edx
; X86-NEXT: mull %edx
; X86-NEXT: leal (%esi,%esi,8), %eax
; X86-NEXT: addl %edx, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: shrl $25, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: leal (%eax,%eax,8), %ecx
; X86-NEXT: movl $9, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: shrdl $25, %eax, %edx
; X86-NEXT: movzbl %dl, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: rolq_extract_mul_with_mask:
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/X86/rotate.ll
Expand Up @@ -113,11 +113,11 @@ define i64 @rotr64(i64 %A, i8 %Amt) nounwind {
define i64 @rotli64(i64 %A) nounwind {
; X86-LABEL: rotli64:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shldl $5, %eax, %edx
; X86-NEXT: shldl $5, %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shldl $5, %edx, %eax
; X86-NEXT: shldl $5, %ecx, %edx
; X86-NEXT: retl
;
; X64-LABEL: rotli64:
Expand Down Expand Up @@ -155,11 +155,11 @@ define i64 @rotri64(i64 %A) nounwind {
define i64 @rotl1_64(i64 %A) nounwind {
; X86-LABEL: rotl1_64:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shldl $1, %eax, %edx
; X86-NEXT: shldl $1, %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shldl $1, %edx, %eax
; X86-NEXT: shldl $1, %ecx, %edx
; X86-NEXT: retl
;
; X64-LABEL: rotl1_64:
Expand Down Expand Up @@ -569,11 +569,11 @@ define void @rotr1_64_mem(i64* %Aptr) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shldl $31, %ecx, %esi
; X86-NEXT: shldl $31, %edx, %ecx
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: shldl $31, %edx, %esi
; X86-NEXT: shldl $31, %ecx, %edx
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/rotate2.ll
Expand Up @@ -5,11 +5,11 @@
define i64 @test1(i64 %x) nounwind {
; X86-LABEL: test1:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shldl $9, %eax, %edx
; X86-NEXT: shldl $9, %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shldl $9, %edx, %eax
; X86-NEXT: shldl $9, %ecx, %edx
; X86-NEXT: retl
;
; X64-LABEL: test1:
Expand Down

0 comments on commit 812e64e

Please sign in to comment.