Skip to content

Commit

Permalink
[X86][SSE] Fix domains for VZEXT_LOAD type instructions
Browse files Browse the repository at this point in the history
Add the missing domain equivalences for movss, movsd, movd and movq zero extending loading instructions.

Differential Revision: https://reviews.llvm.org/D27684

llvm-svn: 289825
  • Loading branch information
RKSimon committed Dec 15, 2016
1 parent 879a657 commit d751889
Show file tree
Hide file tree
Showing 48 changed files with 196 additions and 202 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Expand Up @@ -8457,6 +8457,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
{ X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
{ X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
{ X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
{ X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
{ X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
{ X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
{ X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
Expand All @@ -8473,6 +8475,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
{ X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
{ X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
{ X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
{ X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
{ X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
{ X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
Expand Down Expand Up @@ -8549,6 +8553,8 @@ static const uint16_t ReplaceableInstrsAVX512[][4] = {
{ X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr },
{ X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr },
{ X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm },
{ X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm, X86::VMOVQI2PQIZrm, },
{ X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm, X86::VMOVDI2PDIZrm, },
};

static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s

; CHECK: xorpd {{.*}}{{LCPI0_0|__xmm@}}
; CHECK: xorps {{.*}}{{LCPI0_0|__xmm@}}
define void @casin({ double, double }* sret %agg.result, double %z.0, double %z.1) nounwind {
entry:
%memtmp = alloca { double, double }, align 8 ; <{ double, double }*> [#uses=3]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/2012-1-10-buildvector.ll
Expand Up @@ -18,8 +18,8 @@ define void @bad_cast() {
define void @bad_insert(i32 %t) {
; CHECK-LABEL: bad_insert:
; CHECK: # BB#0:
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovdqa %ymm0, (%eax)
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovaps %ymm0, (%eax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
%v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
Expand Up @@ -403,7 +403,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_storeu_pd:
; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovupd %xmm0, (%eax)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx-shuffle-x86_32.ll
Expand Up @@ -16,7 +16,7 @@ define <8 x i16> @test2(<4 x i16>* %v) nounwind {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: retl
%v9 = load <4 x i16>, <4 x i16> * %v, align 8
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx2-vbroadcast.ll
Expand Up @@ -279,7 +279,7 @@ define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
; X32-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
; X32-AVX2: ## BB#0:
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; X32-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,4,5,6,7,6,7],zero,zero
; X32-AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
; X32-AVX2-NEXT: retl
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/avx512-mov.ll
Expand Up @@ -31,7 +31,7 @@ define <2 x i64> @test3(i64 %x) {
define <4 x i32> @test4(i32* %x) {
; CHECK-LABEL: test4:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load i32, i32* %x
Expand Down Expand Up @@ -89,7 +89,7 @@ define i64 @test9(<2 x i64> %x) {
define <4 x i32> @test10(i32* %x) {
; CHECK-LABEL: test10:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load i32, i32* %x, align 4
Expand Down Expand Up @@ -140,7 +140,7 @@ define <4 x i32> @test14(i32 %x) {
define <4 x i32> @test15(i32* %x) {
; CHECK-LABEL: test15:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load i32, i32* %x, align 4
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fp-logic.ll
Expand Up @@ -231,7 +231,7 @@ define double @f7_double(double %x) {
; CHECK-LABEL: f7_double:
; CHECK: # BB#0:
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: andpd %xmm1, %xmm0
; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%bc1 = bitcast double %x to i64
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fp128-cast.ll
Expand Up @@ -46,7 +46,7 @@ entry:
; X64-LABEL: TestFPExtF64_F128:
; X64: movsd vf64(%rip), %xmm0
; X64-NEXT: callq __extenddftf2
; X64-NEXT: movapd %xmm0, vf128(%rip)
; X64-NEXT: movaps %xmm0, vf128(%rip)
; X64: ret
}

Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/i64-mem-copy.ll
Expand Up @@ -69,6 +69,7 @@ define void @store_i64_from_vector256(<16 x i16> %x, <16 x i16> %y, i64* %i) {
define void @PR23476(<5 x i64> %in, i64* %out, i32 %index) {
; X32-LABEL: PR23476:
; X32: movsd {{.*#+}} xmm0 = mem[0],zero
; X32: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movsd %xmm0, (%eax)
%ext = extractelement <5 x i64> %in, i32 %index
store i64 %ext, i64* %out, align 8
Expand Down
17 changes: 9 additions & 8 deletions llvm/test/CodeGen/X86/logical-load-fold.ll
@@ -1,27 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2,sse-unaligned-mem | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX

; Although we have the ability to fold an unaligned load with AVX
; Although we have the ability to fold an unaligned load with AVX
; and under special conditions with some SSE implementations, we
; can not fold the load under any circumstances in these test
; cases because they are not 16-byte loads. The load must be
; executed as a scalar ('movs*') with a zero extension to
; 128-bits and then used in the packed logical ('andp*') op.
; 128-bits and then used in the packed logical ('andp*') op.
; PR22371 - http://llvm.org/bugs/show_bug.cgi?id=22371

define double @load_double_no_fold(double %x, double %y) {
; SSE2-LABEL: load_double_no_fold:
; SSE2: BB#0:
; SSE2: # BB#0:
; SSE2-NEXT: cmplesd %xmm0, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: andpd %xmm1, %xmm0
; SSE2-NEXT: andps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: load_double_no_fold:
; AVX: BB#0:
; AVX: # BB#0:
; AVX-NEXT: vcmplesd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq

%cmp = fcmp oge double %x, %y
Expand All @@ -32,14 +33,14 @@ define double @load_double_no_fold(double %x, double %y) {

define float @load_float_no_fold(float %x, float %y) {
; SSE2-LABEL: load_float_no_fold:
; SSE2: BB#0:
; SSE2: # BB#0:
; SSE2-NEXT: cmpless %xmm0, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: andps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: load_float_no_fold:
; AVX: BB#0:
; AVX: # BB#0:
; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
Expand Up @@ -416,12 +416,12 @@ define <4 x i32> @merge_4i32_i32_23u5(i32* %ptr) nounwind uwtable noinline ssp {
define <4 x i32> @merge_4i32_i32_3zuu(i32* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_4i32_i32_3zuu:
; SSE: # BB#0:
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: merge_4i32_i32_3zuu:
; AVX: # BB#0:
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_4i32_i32_3zuu:
Expand All @@ -436,7 +436,7 @@ define <4 x i32> @merge_4i32_i32_3zuu(i32* %ptr) nounwind uwtable noinline ssp {
; X32-SSE41-LABEL: merge_4i32_i32_3zuu:
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE41-NEXT: retl
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 3
%val0 = load i32, i32* %ptr0
Expand All @@ -448,12 +448,12 @@ define <4 x i32> @merge_4i32_i32_3zuu(i32* %ptr) nounwind uwtable noinline ssp {
define <4 x i32> @merge_4i32_i32_34uu(i32* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_4i32_i32_34uu:
; SSE: # BB#0:
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: merge_4i32_i32_34uu:
; AVX: # BB#0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_4i32_i32_34uu:
Expand All @@ -469,7 +469,7 @@ define <4 x i32> @merge_4i32_i32_34uu(i32* %ptr) nounwind uwtable noinline ssp {
; X32-SSE41-LABEL: merge_4i32_i32_34uu:
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X32-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-SSE41-NEXT: retl
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 3
%ptr1 = getelementptr inbounds i32, i32* %ptr, i64 4
Expand All @@ -483,12 +483,12 @@ define <4 x i32> @merge_4i32_i32_34uu(i32* %ptr) nounwind uwtable noinline ssp {
define <4 x i32> @merge_4i32_i32_45zz(i32* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_4i32_i32_45zz:
; SSE: # BB#0:
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: merge_4i32_i32_45zz:
; AVX: # BB#0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_4i32_i32_45zz:
Expand All @@ -506,7 +506,7 @@ define <4 x i32> @merge_4i32_i32_45zz(i32* %ptr) nounwind uwtable noinline ssp {
; X32-SSE41-LABEL: merge_4i32_i32_45zz:
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X32-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-SSE41-NEXT: retl
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 4
%ptr1 = getelementptr inbounds i32, i32* %ptr, i64 5
Expand Down Expand Up @@ -599,12 +599,12 @@ define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline s
define <8 x i16> @merge_8i16_i16_34uuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_8i16_i16_34uuuuuu:
; SSE: # BB#0:
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: merge_8i16_i16_34uuuuuu:
; AVX: # BB#0:
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_8i16_i16_34uuuuuu:
Expand All @@ -620,7 +620,7 @@ define <8 x i16> @merge_8i16_i16_34uuuuuu(i16* %ptr) nounwind uwtable noinline s
; X32-SSE41-LABEL: merge_8i16_i16_34uuuuuu:
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE41-NEXT: retl
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 3
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 4
Expand All @@ -634,12 +634,12 @@ define <8 x i16> @merge_8i16_i16_34uuuuuu(i16* %ptr) nounwind uwtable noinline s
define <8 x i16> @merge_8i16_i16_45u7zzzz(i16* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_8i16_i16_45u7zzzz:
; SSE: # BB#0:
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: merge_8i16_i16_45u7zzzz:
; AVX: # BB#0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_8i16_i16_45u7zzzz:
Expand Down Expand Up @@ -667,7 +667,7 @@ define <8 x i16> @merge_8i16_i16_45u7zzzz(i16* %ptr) nounwind uwtable noinline s
; X32-SSE41-LABEL: merge_8i16_i16_45u7zzzz:
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X32-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-SSE41-NEXT: retl
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
Expand Down Expand Up @@ -811,12 +811,12 @@ define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(i8* %ptr) nounwind uwtable noin
define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(i8* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
; SSE: # BB#0:
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
; AVX: # BB#0:
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
Expand All @@ -839,7 +839,7 @@ define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(i8* %ptr) nounwind uwtable noin
; X32-SSE41-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE41-NEXT: retl
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1
Expand All @@ -861,12 +861,12 @@ define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(i8* %ptr) nounwind uwtable noin
define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
; SSE: # BB#0:
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
; AVX: # BB#0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
Expand Down Expand Up @@ -905,7 +905,7 @@ define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noin
; X32-SSE41-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X32-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-SSE41-NEXT: retl
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1
Expand Down Expand Up @@ -934,14 +934,14 @@ define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noin
define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) {
; SSE-LABEL: merge_4i32_i32_combine:
; SSE: # BB#0:
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movdqa %xmm0, (%rdi)
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movaps %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX-LABEL: merge_4i32_i32_combine:
; AVX: # BB#0:
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vmovdqa %xmm0, (%rdi)
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vmovaps %xmm0, (%rdi)
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_4i32_i32_combine:
Expand All @@ -959,8 +959,8 @@ define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) {
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE41-NEXT: movdqa %xmm0, (%eax)
; X32-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE41-NEXT: movaps %xmm0, (%eax)
; X32-SSE41-NEXT: retl
%1 = getelementptr i32, i32* %src, i32 0
%2 = load i32, i32* %1
Expand Down

0 comments on commit d751889

Please sign in to comment.