Skip to content

Commit

Permalink
[X86] For Silvermont CPU use 16-bit division instead of 64-bit for sm…
Browse files Browse the repository at this point in the history
…all positive numbers

Differential Revision: http://reviews.llvm.org/D5938

llvm-svn: 222521
  • Loading branch information
Alexey Volkov committed Nov 21, 2014
1 parent 30a9907 commit fd1731d
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 12 deletions.
12 changes: 8 additions & 4 deletions llvm/lib/Target/X86/X86.td
Expand Up @@ -167,9 +167,12 @@ def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
"Support SMAP instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
"HasSlowDivide", "true",
"Use small divide for positive values less than 256">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
"Use 8-bit divide for positive values less than 256">;
def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
"HasSlowDivide64", "true",
"Use 16-bit divide for positive values less than 65536">;
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
Expand Down Expand Up @@ -234,7 +237,7 @@ def : ProcessorModel<"penryn", SandyBridgeModel,
def : ProcessorModel<"atom", AtomModel,
[ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
FeatureSlowDivide,
FeatureSlowDivide32, FeatureSlowDivide64,
FeatureCallRegIndirect,
FeatureLEAUsesAG,
FeaturePadShortFunctions]>;
Expand All @@ -244,6 +247,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
FeatureSSE42, FeatureCMPXCHG16B,
FeatureMOVBE, FeaturePOPCNT,
FeaturePCLMUL, FeatureAES,
FeatureSlowDivide64,
FeatureCallRegIndirect,
FeaturePRFCHW,
FeatureSlowLEA, FeatureSlowIncDec,
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -249,9 +249,10 @@ void X86TargetLowering::resetOperationActions() {
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());

// Bypass expensive divides on Atom when compiling with O2
if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
addBypassSlowDiv(32, 8);
if (Subtarget->is64Bit())
if (TM.getOptLevel() >= CodeGenOpt::Default) {
if (Subtarget->hasSlowDivide32())
addBypassSlowDiv(32, 8);
if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
addBypassSlowDiv(64, 16);
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86Subtarget.cpp
Expand Up @@ -267,7 +267,8 @@ void X86Subtarget::initializeEnvironment() {
HasVectorUAMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
HasSlowDivide = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
CallRegIndirect = false;
LEAUsesAG = false;
Expand Down
13 changes: 9 additions & 4 deletions llvm/lib/Target/X86/X86Subtarget.h
Expand Up @@ -171,9 +171,13 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;

/// HasSlowDivide - True if smaller divides are significantly faster than
/// full divides and should be used when possible.
bool HasSlowDivide;
/// HasSlowDivide32 - True if 8-bit divisions are significantly faster than
/// 32-bit divisions and should be used when possible.
bool HasSlowDivide32;

/// HasSlowDivide64 - True if 16-bit divides are significantly faster than
/// 64-bit divisions and should be used when possible.
bool HasSlowDivide64;

/// PadShortFunctions - True if the short functions should be padded to prevent
/// a stall when returning too early.
Expand Down Expand Up @@ -373,7 +377,8 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide() const { return HasSlowDivide; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
Expand Down
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/X86/slow-div.ll
@@ -0,0 +1,28 @@
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s

define i32 @div32(i32 %a, i32 %b) {
entry:
; DIV32-LABEL: div32:
; DIV32: orl %{{.*}}, [[REG:%[a-z]+]]
; DIV32: testl $-256, [[REG]]
; DIV32: divb
; DIV64-LABEL: div32:
; DIV64-NOT: divb
%div = sdiv i32 %a, %b
ret i32 %div
}

define i64 @div64(i64 %a, i64 %b) {
entry:
; DIV32-LABEL: div64:
; DIV32-NOT: divw
; DIV64-LABEL: div64:
; DIV64: orq %{{.*}}, [[REG:%[a-z]+]]
; DIV64: testq $-65536, [[REG]]
; DIV64: divw
%div = sdiv i64 %a, %b
ret i64 %div
}


0 comments on commit fd1731d

Please sign in to comment.