diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ecdfb867cc708..f35f663d6ba1b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22243,6 +22243,19 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); + // See if the extracted element is constant, in which case fold it if its + // a legal fp immediate. + if (IndexC && ScalarVT.isFloatingPoint()) { + APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue()); + KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask); + if (KnownElt.isConstant()) { + APFloat CstFP = + APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant()); + if (TLI.isFPImmLegal(CstFP, ScalarVT)) + return DAG.getConstantFP(CstFP, DL, ScalarVT); + } + } + // TODO: These transforms should not require the 'hasOneUse' restriction, but // there are regressions on multiple targets without it. We can end up with a // mess of scalar and vector code if we reduce only part of the DAG to scalar. diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index 55750ab34e17a..3c8aca5145261 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -69,11 +69,10 @@ define void @insert_vec_v23i32_uaddlv_from_v8i16(ptr %0) { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: add x8, x0, #88 +; CHECK-NEXT: str wzr, [x0, #88] ; CHECK-NEXT: uaddlv.8h s1, v0 ; CHECK-NEXT: stp q0, q0, [x0, #16] ; CHECK-NEXT: stp q0, q0, [x0, #48] -; CHECK-NEXT: st1.s { v0 }[2], [x8] ; CHECK-NEXT: str d0, [x0, #80] ; CHECK-NEXT: mov.s v2[0], v1[0] ; CHECK-NEXT: ucvtf.4s v1, v2 diff --git a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll index 86ebf803c5783..bbd2acbab4246 100644 --- a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll +++ b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll @@ -30,10 +30,10 @@ define [1 x <4 x float>] @test1() { define [1 x <4 x float>] @test2() { ; CHECK-LABEL: .p2align 4, 0x0 ; -- Begin function test2 ; CHECK-NEXT: lCPI1_0: -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x3f800000 ; float 1 +; CHECK-NEXT: .long 0x80000000 ; float -0 +; CHECK-NEXT: .long 0x80000000 ; float -0 +; CHECK-NEXT: .long 0x80000000 ; float -0 +; CHECK-NEXT: .long 0xbf800000 ; float -1 ; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions ; CHECK-NEXT: .globl _test2 ; CHECK-NEXT: .p2align 2 @@ -43,17 +43,7 @@ define [1 x <4 x float>] @test2() { ; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x8, lCPI1_0@PAGE ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] -; CHECK-NEXT: mov s2, v1[1] -; CHECK-NEXT: fneg s0, s1 -; CHECK-NEXT: mov s3, v1[2] -; CHECK-NEXT: mov s1, v1[3] -; CHECK-NEXT: fneg s2, s2 -; CHECK-NEXT: fneg s3, s3 -; CHECK-NEXT: fneg s1, s1 -; CHECK-NEXT: mov.s v0[1], v2[0] -; CHECK-NEXT: mov.s v0[2], v3[0] -; CHECK-NEXT: mov.s v0[3], v1[0] +; CHECK-NEXT: ldr q0, [x8, lCPI1_0@PAGEOFF] ; CHECK-NEXT: ret ; %constexpr = fneg float extractelement (<4 x float> bitcast (<1 x i128> to <4 x float>), i32 0) diff --git a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll index e7f62b9dfc221..171e16e35fc2f 100644 --- a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll +++ b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll @@ -50,13 +50,12 @@ define void @zero_test() { ; X86-LABEL: zero_test: ; X86: # %bb.0: # %entry ; X86-NEXT: xorps %xmm0, %xmm0 -; X86-NEXT: movlps %xmm0, (%eax) +; X86-NEXT: movsd %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: zero_test: ; X64: # %bb.0: # %entry -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movlps %xmm0, (%rax) +; X64-NEXT: movq $0, (%rax) ; X64-NEXT: retq entry: %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer diff --git a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll index b6ec3b34eb107..6db17251cd599 100644 --- a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll +++ b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll @@ -30,7 +30,7 @@ define void @store_64(ptr %ptr) { ; X86: # %bb.0: # %BB ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: xorps %xmm0, %xmm0 -; X86-NEXT: movlps %xmm0, (%eax) +; X86-NEXT: movsd %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: store_64: diff --git a/llvm/test/CodeGen/X86/fold-load-vec.ll b/llvm/test/CodeGen/X86/fold-load-vec.ll index 348929cdf9f79..0bf846a0930bb 100644 --- a/llvm/test/CodeGen/X86/fold-load-vec.ll +++ b/llvm/test/CodeGen/X86/fold-load-vec.ll @@ -10,8 +10,8 @@ define void @sample_test(ptr %source, ptr %dest) nounwind { ; CHECK-NEXT: subq $24, %rsp ; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movlps %xmm0, (%rsp) ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movlps %xmm0, (%rsp) ; CHECK-NEXT: movlps %xmm0, (%rsi) diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 596e465ee8cac..7225257203161 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -1082,12 +1082,11 @@ define void @main.158() #0 { ; BWON-F16C-LABEL: main.158: ; BWON-F16C: # %bb.0: # %entry ; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1 -; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1 +; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 +; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2 ; BWON-F16C-NEXT: jae .LBB20_2 ; BWON-F16C-NEXT: # %bb.1: # %entry ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -1100,8 +1099,7 @@ define void @main.158() #0 { ; CHECK-I686-LABEL: main.158: ; CHECK-I686: # %bb.0: # %entry ; CHECK-I686-NEXT: subl $12, %esp -; CHECK-I686-NEXT: pxor %xmm0, %xmm0 -; CHECK-I686-NEXT: movd %xmm0, (%esp) +; CHECK-I686-NEXT: movl $0, (%esp) ; CHECK-I686-NEXT: calll __truncsfhf2 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax ; CHECK-I686-NEXT: movw %ax, (%esp) diff --git a/llvm/test/CodeGen/X86/nontemporal-3.ll b/llvm/test/CodeGen/X86/nontemporal-3.ll index a2d2c5ca43011..f9872b10097a1 100644 --- a/llvm/test/CodeGen/X86/nontemporal-3.ll +++ b/llvm/test/CodeGen/X86/nontemporal-3.ll @@ -93,247 +93,66 @@ define void @test_zero_v4f64_align1(ptr %dst) nounwind { } define void @test_zero_v8f32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v8f32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v8f32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: movntiq %rax, 24(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v8f32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v8f32_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v8f32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v8f32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <8 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } define void @test_zero_v4i64_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v4i64_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v4i64_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v4i64_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v4i64_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v4i64_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v4i64_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <4 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } define void @test_zero_v8i32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v8i32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v8i32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v8i32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v8i32_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v8i32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v8i32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <8 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } define void @test_zero_v16i16_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v16i16_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v16i16_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v16i16_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v16i16_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v16i16_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v16i16_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <16 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } define void @test_zero_v32i8_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v32i8_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v32i8_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v32i8_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v32i8_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v32i8_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v32i8_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <32 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } @@ -508,347 +327,86 @@ define void @test_zero_v8f64_align1(ptr %dst) nounwind { } define void @test_zero_v16f32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v16f32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v16f32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: movntiq %rax, 24(%rdi) -; SSE4A-NEXT: movntiq %rax, 40(%rdi) -; SSE4A-NEXT: movntiq %rax, 56(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v16f32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v16f32_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v16f32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v16f32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <16 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } define void @test_zero_v8i64_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v8i64_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v8i64_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v8i64_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v8i64_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v8i64_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v8i64_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <8 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } define void @test_zero_v16i32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v16i32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v16i32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v16i32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v16i32_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v16i32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v16i32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <16 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } define void @test_zero_v32i16_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v32i16_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v32i16_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v32i16_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v32i16_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v32i16_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v32i16_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <32 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } define void @test_zero_v64i8_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_zero_v64i8_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v64i8_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v64i8_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_zero_v64i8_align1: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_zero_v64i8_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_zero_v64i8_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <64 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1 ret void } @@ -1214,3 +772,7 @@ define void @test_zero_v64i8_align32(ptr %dst) nounwind { } !1 = !{i32 1} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; SSE2: {{.*}} +; SSE41: {{.*}} +; SSE4A: {{.*}} diff --git a/llvm/test/CodeGen/X86/pr41619.ll b/llvm/test/CodeGen/X86/pr41619.ll index 7d1d139a38a52..88dcd7798f0c3 100644 --- a/llvm/test/CodeGen/X86/pr41619.ll +++ b/llvm/test/CodeGen/X86/pr41619.ll @@ -7,10 +7,9 @@ define void @foo(double %arg) { ; CHECK: ## %bb.0: ## %bb ; CHECK-NEXT: vmovq %xmm0, %rax ; CHECK-NEXT: vmovd %eax, %xmm0 -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vmovq %xmm0, %rax ; CHECK-NEXT: movl %eax, (%rax) -; CHECK-NEXT: vmovlps %xmm1, (%rax) +; CHECK-NEXT: movq $0, (%rax) ; CHECK-NEXT: retq bb: %tmp = bitcast double %arg to i64 diff --git a/llvm/test/CodeGen/X86/vec_zero_cse.ll b/llvm/test/CodeGen/X86/vec_zero_cse.ll index 21da191d8d693..d4357aeb2e1de 100644 --- a/llvm/test/CodeGen/X86/vec_zero_cse.ll +++ b/llvm/test/CodeGen/X86/vec_zero_cse.ll @@ -16,7 +16,7 @@ define void @test1() { ; X86-NEXT: movl $0, M1+4 ; X86-NEXT: movl $0, M1 ; X86-NEXT: xorps %xmm0, %xmm0 -; X86-NEXT: movlps %xmm0, M2 +; X86-NEXT: movsd %xmm0, M2 ; X86-NEXT: retl ; ; X64-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 1b6d2a2c6298e..be4253b6d5d10 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3225,7 +3225,7 @@ define void @PR43024() { ; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}+4(%rip), %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}+12(%rip), %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovss %xmm0, (%rax) ; AVX-NEXT: retq store <4 x float> , ptr undef, align 16