Skip to content

Commit

Permalink
[X86] Disable shouldFoldConstantShiftPairToMask for scalar shifts on …
Browse files Browse the repository at this point in the history
…AMD targets (PR40758)

D61068 handled vector shifts, this patch does the same for scalars where there are similar number of pipes for shifts as bit ops - this is true almost entirely for AMD targets where the scalar ALUs are well balanced.

This combine avoids AND immediate mask which usually means we reduce encoding size.

Some tests show use of (slow, scaled) LEA instead of SHL in some cases, but thats due to particular shift immediates - shift+mask generate these just as easily.

Differential Revision: https://reviews.llvm.org/D61830

llvm-svn: 360684
  • Loading branch information
RKSimon committed May 14, 2019
1 parent 3b91701 commit c2d9cfd
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 58 deletions.
16 changes: 13 additions & 3 deletions llvm/lib/Target/X86/X86.td
Expand Up @@ -427,6 +427,11 @@ def FeatureFastHorizontalOps
"Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
"normal vector instructions with shuffles", [FeatureSSE3]>;

def FeatureFastScalarShiftMasks
: SubtargetFeature<
"fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
"Prefer a left/right scalar logical shift pair over a shift+and pair">;

def FeatureFastVectorShiftMasks
: SubtargetFeature<
"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
Expand Down Expand Up @@ -784,6 +789,7 @@ def ProcessorFeatures {
FeatureSlowSHLD,
FeatureLAHFSAHF,
FeatureFast15ByteNOP,
FeatureFastScalarShiftMasks,
FeatureFastVectorShiftMasks];
list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures;

Expand Down Expand Up @@ -825,6 +831,7 @@ def ProcessorFeatures {
FeatureSlowSHLD,
FeatureLAHFSAHF,
FeatureFast11ByteNOP,
FeatureFastScalarShiftMasks,
FeatureBranchFusion];
list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;

Expand Down Expand Up @@ -876,6 +883,7 @@ def ProcessorFeatures {
FeatureFastBEXTR,
FeatureFast15ByteNOP,
FeatureBranchFusion,
FeatureFastScalarShiftMasks,
FeatureMMX,
FeatureMOVBE,
FeatureMWAITX,
Expand Down Expand Up @@ -1092,20 +1100,22 @@ foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
Feature64Bit, FeatureSlowSHLD, FeatureCMOV]>;
Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
FeatureFastScalarShiftMasks]>;
}

foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
FeatureSlowSHLD, FeatureCMOV, Feature64Bit]>;
FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
FeatureFastScalarShiftMasks]>;
}

foreach P = ["amdfam10", "barcelona"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE4A, Feature3DNowA,
FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV,
Feature64Bit]>;
Feature64Bit, FeatureFastScalarShiftMasks]>;
}

// Bobcat
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -5021,11 +5021,12 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
(N->getOpcode() == ISD::SRL &&
N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask");

if (Subtarget.hasFastVectorShiftMasks() && N->getValueType(0).isVector()) {
EVT VT = N->getValueType(0);
if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
(Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
// Only fold if the shift values are equal - so it folds to AND.
// TODO - we should fold if either is non-uniform but we don't do the
// fold for non-splats yet.
// TODO - we should fold if either is a non-uniform vector but we don't do
// the fold for non-splats yet.
return N->getOperand(1) == N->getOperand(0).getOperand(1);
}
return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86Subtarget.h
Expand Up @@ -396,6 +396,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Try harder to combine to horizontal vector ops if they are fast.
bool HasFastHorizontalOps = false;

/// Prefer a left/right scalar logical shifts pair over a shift+and pair.
bool HasFastScalarShiftMasks = false;

/// Prefer a left/right vector logical shifts pair over a shift+and pair.
bool HasFastVectorShiftMasks = false;

Expand Down Expand Up @@ -650,6 +653,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
bool hasFastBEXTR() const { return HasFastBEXTR; }
bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
bool hasMacroFusion() const { return HasMacroFusion; }
bool hasBranchFusion() const { return HasBranchFusion; }
Expand Down
161 changes: 110 additions & 51 deletions llvm/test/CodeGen/X86/shift-mask.ll
Expand Up @@ -43,13 +43,21 @@ define i8 @test_i8_shl_lshr_1(i8 %a0) {
; X86-NEXT: andb $-32, %al
; X86-NEXT: retl
;
; X64-LABEL: test_i8_shl_lshr_1:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (,%rdi,4), %eax
; X64-NEXT: andb $-32, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i8_shl_lshr_1:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi
; X64-MASK-NEXT: leal (,%rdi,4), %eax
; X64-MASK-NEXT: andb $-32, %al
; X64-MASK-NEXT: # kill: def $al killed $al killed $eax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i8_shl_lshr_1:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: movl %edi, %eax
; X64-SHIFT-NEXT: shrb $3, %al
; X64-SHIFT-NEXT: shlb $5, %al
; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax
; X64-SHIFT-NEXT: retq
%1 = lshr i8 %a0, 3
%2 = shl i8 %1, 5
ret i8 %2
Expand All @@ -63,13 +71,21 @@ define i8 @test_i8_shl_lshr_2(i8 %a0) {
; X86-NEXT: andb $56, %al
; X86-NEXT: retl
;
; X64-LABEL: test_i8_shl_lshr_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrb $2, %al
; X64-NEXT: andb $56, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i8_shl_lshr_2:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: movl %edi, %eax
; X64-MASK-NEXT: shrb $2, %al
; X64-MASK-NEXT: andb $56, %al
; X64-MASK-NEXT: # kill: def $al killed $al killed $eax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i8_shl_lshr_2:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SHIFT-NEXT: shrb $5, %dil
; X64-SHIFT-NEXT: leal (,%rdi,8), %eax
; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax
; X64-SHIFT-NEXT: retq
%1 = lshr i8 %a0, 5
%2 = shl i8 %1, 3
ret i8 %2
Expand Down Expand Up @@ -103,13 +119,21 @@ define i16 @test_i16_shl_lshr_1(i16 %a0) {
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_i16_shl_lshr_1:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (,%rdi,4), %eax
; X64-NEXT: andl $65504, %eax # imm = 0xFFE0
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i16_shl_lshr_1:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi
; X64-MASK-NEXT: leal (,%rdi,4), %eax
; X64-MASK-NEXT: andl $65504, %eax # imm = 0xFFE0
; X64-MASK-NEXT: # kill: def $ax killed $ax killed $eax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i16_shl_lshr_1:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: movzwl %di, %eax
; X64-SHIFT-NEXT: shrl $3, %eax
; X64-SHIFT-NEXT: shll $5, %eax
; X64-SHIFT-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SHIFT-NEXT: retq
%1 = lshr i16 %a0, 3
%2 = shl i16 %1, 5
ret i16 %2
Expand All @@ -124,13 +148,21 @@ define i16 @test_i16_shl_lshr_2(i16 %a0) {
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_i16_shl_lshr_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl $2, %eax
; X64-NEXT: andl $16376, %eax # imm = 0x3FF8
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i16_shl_lshr_2:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: movl %edi, %eax
; X64-MASK-NEXT: shrl $2, %eax
; X64-MASK-NEXT: andl $16376, %eax # imm = 0x3FF8
; X64-MASK-NEXT: # kill: def $ax killed $ax killed $eax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i16_shl_lshr_2:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: movzwl %di, %eax
; X64-SHIFT-NEXT: shrl $5, %eax
; X64-SHIFT-NEXT: shll $3, %eax
; X64-SHIFT-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SHIFT-NEXT: retq
%1 = lshr i16 %a0, 5
%2 = shl i16 %1, 3
ret i16 %2
Expand Down Expand Up @@ -161,12 +193,19 @@ define i32 @test_i32_shl_lshr_1(i32 %a0) {
; X86-NEXT: andl $-32, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_shl_lshr_1:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (,%rdi,4), %eax
; X64-NEXT: andl $-32, %eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i32_shl_lshr_1:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi
; X64-MASK-NEXT: leal (,%rdi,4), %eax
; X64-MASK-NEXT: andl $-32, %eax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i32_shl_lshr_1:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: movl %edi, %eax
; X64-SHIFT-NEXT: shrl $3, %eax
; X64-SHIFT-NEXT: shll $5, %eax
; X64-SHIFT-NEXT: retq
%1 = lshr i32 %a0, 3
%2 = shl i32 %1, 5
ret i32 %2
Expand All @@ -180,12 +219,19 @@ define i32 @test_i32_shl_lshr_2(i32 %a0) {
; X86-NEXT: andl $-8, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_shl_lshr_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl $2, %eax
; X64-NEXT: andl $-8, %eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i32_shl_lshr_2:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: movl %edi, %eax
; X64-MASK-NEXT: shrl $2, %eax
; X64-MASK-NEXT: andl $-8, %eax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i32_shl_lshr_2:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SHIFT-NEXT: shrl $5, %edi
; X64-SHIFT-NEXT: leal (,%rdi,8), %eax
; X64-SHIFT-NEXT: retq
%1 = lshr i32 %a0, 5
%2 = shl i32 %1, 3
ret i32 %2
Expand Down Expand Up @@ -219,11 +265,18 @@ define i64 @test_i64_shl_lshr_1(i64 %a0) {
; X86-NEXT: shldl $2, %ecx, %edx
; X86-NEXT: retl
;
; X64-LABEL: test_i64_shl_lshr_1:
; X64: # %bb.0:
; X64-NEXT: leaq (,%rdi,4), %rax
; X64-NEXT: andq $-32, %rax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i64_shl_lshr_1:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: leaq (,%rdi,4), %rax
; X64-MASK-NEXT: andq $-32, %rax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i64_shl_lshr_1:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: movq %rdi, %rax
; X64-SHIFT-NEXT: shrq $3, %rax
; X64-SHIFT-NEXT: shlq $5, %rax
; X64-SHIFT-NEXT: retq
%1 = lshr i64 %a0, 3
%2 = shl i64 %1, 5
ret i64 %2
Expand All @@ -239,12 +292,18 @@ define i64 @test_i64_shl_lshr_2(i64 %a0) {
; X86-NEXT: shrl $2, %edx
; X86-NEXT: retl
;
; X64-LABEL: test_i64_shl_lshr_2:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shrq $2, %rax
; X64-NEXT: andq $-8, %rax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i64_shl_lshr_2:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: movq %rdi, %rax
; X64-MASK-NEXT: shrq $2, %rax
; X64-MASK-NEXT: andq $-8, %rax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i64_shl_lshr_2:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: shrq $5, %rdi
; X64-SHIFT-NEXT: leaq (,%rdi,8), %rax
; X64-SHIFT-NEXT: retq
%1 = lshr i64 %a0, 5
%2 = shl i64 %1, 3
ret i64 %2
Expand Down

0 comments on commit c2d9cfd

Please sign in to comment.