Skip to content

Commit

Permalink
[X86] Improve shift combining
Browse files Browse the repository at this point in the history
This folds (ashr (shl a, [56,48,32,24,16]), SarConst)
into       (shl, (sext (a), [56,48,32,24,16] - SarConst))
or into    (lshr, (sext (a), SarConst - [56,48,32,24,16]))
depending on sign of (SarConst - [56,48,32,24,16])

sexts in X86 are MOVs.
The MOVs have the same code size as above SHIFTs (only SHIFT by 1 has lower code size).
However the MOVs have 2 advantages to SHIFTs on x86:
1. MOVs can write to a register that differs from source.
2. MOVs accept memory operands.

This fixes PR24373.

Patch by: evgeny.v.stupachenko@intel.com
Differential Revision: http://reviews.llvm.org/D13161

llvm-svn: 255761
  • Loading branch information
Michael Kuperstein committed Dec 16, 2015
1 parent 94f181a commit e75e6e2
Show file tree
Hide file tree
Showing 5 changed files with 1,104 additions and 854 deletions.
57 changes: 57 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -24950,6 +24950,59 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}

static SDValue PerformSRACombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Size = VT.getSizeInBits();

// fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
// into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
// into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
// depending on sign of (SarConst - [56,48,32,24,16])

// sexts in X86 are MOVs. The MOVs have the same code size
// as above SHIFTs (only SHIFT on 1 has lower code size).
// However the MOVs have 2 advantages to a SHIFT:
// 1. MOVs can write to a register that differs from source
// 2. MOVs accept memory operands

if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant ||
N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
N0.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();

SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
EVT CVT = N1.getValueType();

if (SarConst.isNegative())
return SDValue();

for (MVT SVT : MVT::integer_valuetypes()) {
unsigned ShiftSize = SVT.getSizeInBits();
// skipping types without corresponding sext/zext and
// ShlConst that is not one of [56,48,32,24,16]
if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize)
continue;
SDLoc DL(N);
SDValue NN =
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
SarConst = SarConst - (Size - ShiftSize);
if (SarConst == 0)
return NN;
else if (SarConst.isNegative())
return DAG.getNode(ISD::SHL, DL, VT, NN,
DAG.getConstant(-SarConst, DL, CVT));
else
return DAG.getNode(ISD::SRA, DL, VT, NN,
DAG.getConstant(SarConst, DL, CVT));
}
return SDValue();
}

/// \brief Returns a vector of 0s if the node in input is a vector logical
/// shift by a constant amount which is known to be bigger than or equal
/// to the vector element size in bits.
Expand Down Expand Up @@ -24989,6 +25042,10 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
if (SDValue V = PerformSHLCombine(N, DAG))
return V;

if (N->getOpcode() == ISD::SRA)
if (SDValue V = PerformSRACombine(N, DAG))
return V;

// Try to fold this logical shift into a zero vector.
if (N->getOpcode() != ISD::SRA)
if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
Expand Down
14 changes: 11 additions & 3 deletions llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
Expand Up @@ -4,15 +4,23 @@
; a shr (X, -8) that gets subsequently "optimized away" as undef
; PR4254

; after fixing PR24373
; shlq $56, %rdi
; sarq $48, %rdi
; folds into
; movsbq %dil, %rax
; shlq $8, %rax
; which is better for x86

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"

define i64 @foo(i64 %b) nounwind readnone {
entry:
; CHECK-LABEL: foo:
; CHECK: shlq $56, %rdi
; CHECK: sarq $48, %rdi
; CHECK: leaq 1(%rdi), %rax
; CHECK: movsbq %dil, %rax
; CHECK: shlq $8, %rax
; CHECK: orq $1, %rax
%shl = shl i64 %b, 56 ; <i64> [#uses=1]
%shr = ashr i64 %shl, 48 ; <i64> [#uses=1]
%add5 = or i64 %shr, 1 ; <i64> [#uses=1]
Expand Down
37 changes: 37 additions & 0 deletions llvm/test/CodeGen/X86/sar_fold.ll
@@ -0,0 +1,37 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s

define i32 @shl16sar15(i32 %a) #0 {
; CHECK-LABEL: shl16sar15:
; CHECK: # BB#0:
; CHECK-NEXT: movswl {{[0-9]+}}(%esp), %eax
%1 = shl i32 %a, 16
%2 = ashr exact i32 %1, 15
ret i32 %2
}

define i32 @shl16sar17(i32 %a) #0 {
; CHECK-LABEL: shl16sar17:
; CHECK: # BB#0:
; CHECK-NEXT: movswl {{[0-9]+}}(%esp), %eax
%1 = shl i32 %a, 16
%2 = ashr exact i32 %1, 17
ret i32 %2
}

define i32 @shl24sar23(i32 %a) #0 {
; CHECK-LABEL: shl24sar23:
; CHECK: # BB#0:
; CHECK-NEXT: movsbl {{[0-9]+}}(%esp), %eax
%1 = shl i32 %a, 24
%2 = ashr exact i32 %1, 23
ret i32 %2
}

define i32 @shl24sar25(i32 %a) #0 {
; CHECK-LABEL: shl24sar25:
; CHECK: # BB#0:
; CHECK-NEXT: movsbl {{[0-9]+}}(%esp), %eax
%1 = shl i32 %a, 24
%2 = ashr exact i32 %1, 25
ret i32 %2
}
43 changes: 43 additions & 0 deletions llvm/test/CodeGen/X86/sar_fold64.ll
@@ -0,0 +1,43 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s

define i32 @shl48sar47(i64 %a) #0 {
; CHECK-LABEL: shl48sar47:
; CHECK: # BB#0:
; CHECK-NEXT: movswq %di, %rax
%1 = shl i64 %a, 48
%2 = ashr exact i64 %1, 47
%3 = trunc i64 %2 to i32
ret i32 %3
}

define i32 @shl48sar49(i64 %a) #0 {
; CHECK-LABEL: shl48sar49:
; CHECK: # BB#0:
; CHECK-NEXT: movswq %di, %rax
%1 = shl i64 %a, 48
%2 = ashr exact i64 %1, 49
%3 = trunc i64 %2 to i32
ret i32 %3
}

define i32 @shl56sar55(i64 %a) #0 {
; CHECK-LABEL: shl56sar55:
; CHECK: # BB#0:
; CHECK-NEXT: movsbq %dil, %rax
%1 = shl i64 %a, 56
%2 = ashr exact i64 %1, 55
%3 = trunc i64 %2 to i32
ret i32 %3
}

define i32 @shl56sar57(i64 %a) #0 {
; CHECK-LABEL: shl56sar57:
; CHECK: # BB#0:
; CHECK-NEXT: movsbq %dil, %rax
%1 = shl i64 %a, 56
%2 = ashr exact i64 %1, 57
%3 = trunc i64 %2 to i32
ret i32 %3
}

attributes #0 = { nounwind }

0 comments on commit e75e6e2

Please sign in to comment.