Skip to content

Commit

Permalink
[X86] Add DAG combines to enable removing of movddup/vbroadcast + sim…
Browse files Browse the repository at this point in the history
…ple_load isel patterns.
  • Loading branch information
topperc committed Mar 7, 2020
1 parent bc65b68 commit d41ea65
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 22 deletions.
45 changes: 45 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -35074,6 +35074,31 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return R;

switch (Opcode) {
case X86ISD::MOVDDUP: {
SDValue Src = N.getOperand(0);
// Turn a 128-bit MOVDDUP of a full vector load into movddup+vzload.
if (VT == MVT::v2f64 && Src.hasOneUse() &&
ISD::isNormalLoad(Src.getNode())) {
LoadSDNode *LN = cast<LoadSDNode>(Src);
// Unless the load is volatile or atomic.
if (LN->isSimple()) {
SDVTList Tys = DAG.getVTList(MVT::v2f64, MVT::Other);
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
SDValue VZLoad =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::f64,
LN->getPointerInfo(),
LN->getAlignment(),
LN->getMemOperand()->getFlags());
SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad);
DCI.CombineTo(N.getNode(), Movddup);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
DCI.recursivelyDeleteUnusedNodes(LN);
return N; // Return N so it doesn't get rechecked!
}
}

return SDValue();
}
case X86ISD::VBROADCAST: {
SDValue Src = N.getOperand(0);
SDValue BC = peekThroughBitcasts(Src);
Expand Down Expand Up @@ -35162,6 +35187,26 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
}

// vbroadcast(vector load X) -> vbroadcast_load
if (SrcVT == MVT::v2f64 && Src.hasOneUse() &&
ISD::isNormalLoad(Src.getNode())) {
LoadSDNode *LN = cast<LoadSDNode>(Src);
// Unless the load is volatile or atomic.
if (LN->isSimple()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
MVT::f64, LN->getPointerInfo(),
LN->getAlignment(),
LN->getMemOperand()->getFlags());
DCI.CombineTo(N.getNode(), BcastLd);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
DCI.recursivelyDeleteUnusedNodes(LN);
return N; // Return N so it doesn't get rechecked!
}
}

return SDValue();
}
case X86ISD::BLENDI: {
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -10858,8 +10858,6 @@ defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;

def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(v2f64 VR128X:$src0)),
Expand All @@ -10875,13 +10873,6 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;

def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}

//===----------------------------------------------------------------------===//
Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -4476,16 +4476,11 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;


let Predicates = [HasAVX, NoVLX] in {
def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
}

let Predicates = [UseSSE3] in {
// No need for aligned memory as this only loads 64-bits.
def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(MOVDDUPrm addr:$src)>;
}
Expand Down Expand Up @@ -7612,8 +7607,6 @@ let Predicates = [HasAVX, NoVLX] in {

def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
(VMOVDDUPrr VR128:$src)>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(VMOVDDUPrm addr:$src)>;
}

let Predicates = [HasAVX1Only] in {
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX2,X86-AVX2
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX512,X86-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX2,X64-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512,X64-AVX512
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX2,X86-AVX2
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX512,X86-AVX512
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX2,X64-AVX2
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512,X64-AVX512
;
; Combine tests involving AVX target shuffles

Expand Down

0 comments on commit d41ea65

Please sign in to comment.