Skip to content

Commit

Permalink
[x86] add load fold patterns for movddup with vzext_load
Browse files Browse the repository at this point in the history
The missed load folding noticed in D55898 is visible independent of that change 
either with an adjusted IR pattern to start or with AVX2/AVX512 (where the build 
vector becomes a broadcast first; movddup is not produced until we get into isel 
via tablegen patterns).

Differential Revision: https://reviews.llvm.org/D55936

llvm-svn: 350005
  • Loading branch information
rotateright committed Dec 22, 2018
1 parent c682c19 commit 52c02d7
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 10 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -11217,6 +11217,8 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;

def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(v2f64 VR128X:$src0)),
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -4669,12 +4669,16 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
}

let Predicates = [UseSSE3] in {
// No need for aligned memory as this only loads 64-bits.
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
(MOVDDUPrm addr:$src)>;
}

//===---------------------------------------------------------------------===//
Expand Down Expand Up @@ -8034,6 +8038,8 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDDUPrr VR128:$src)>;
def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(VMOVDDUPrm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
(VMOVDDUPrm addr:$src)>;
}

let Predicates = [HasAVX1Only] in {
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/build-vector-128.ll
Expand Up @@ -526,8 +526,7 @@ define <4 x float> @PR37502(float %x, float %y) {
;
; SSE41-32-LABEL: PR37502:
; SSE41-32: # %bb.0:
; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-32-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-32-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; SSE41-32-NEXT: retl
;
; SSE41-64-LABEL: PR37502:
Expand All @@ -538,8 +537,7 @@ define <4 x float> @PR37502(float %x, float %y) {
;
; AVX-32-LABEL: PR37502:
; AVX-32: # %bb.0:
; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX-32-NEXT: retl
;
; AVX1-64-LABEL: PR37502:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/movddup-load-fold.ll
Expand Up @@ -9,14 +9,12 @@
define <4 x float> @movddup_load_fold(float %x, float %y) {
; SSE-LABEL: movddup_load_fold:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; SSE-NEXT: retl
;
; AVX-LABEL: movddup_load_fold:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX-NEXT: retl
%i0 = insertelement <4 x float> zeroinitializer, float %x, i32 0
%i1 = insertelement <4 x float> %i0, float %y, i32 1
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
Expand Up @@ -332,8 +332,7 @@ define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
; X86AVX2-LABEL: buildvector_v4f32_0404:
; X86AVX2: # %bb.0:
; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X86AVX2-NEXT: vmovapd %xmm0, (%eax)
; X86AVX2-NEXT: retl
;
Expand Down

0 comments on commit 52c02d7

Please sign in to comment.