diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f1a3a685df2080..bdb09b919a391e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13705,9 +13705,15 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, V = extract128BitVector(V, ExtractIdx, DAG, DL); } - if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector()) - V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, - DAG.getBitcast(MVT::f64, V)); + // On AVX we can use VBROADCAST directly for scalar sources. + if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector()) { + V = DAG.getBitcast(MVT::f64, V); + if (Subtarget.hasAVX()) { + V = DAG.getNode(X86ISD::VBROADCAST, DL, MVT::v2f64, V); + return DAG.getBitcast(VT, V); + } + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V); + } // If this is a scalar, do the broadcast on this type and bitcast. if (!V.getValueType().isVector()) { diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index 35c449e813c094..7e9a727e7230be 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -1965,7 +1965,7 @@ define <4 x i64> @test_mm256_set1_epi64x(i64 %a0) nounwind { ; X64-LABEL: test_mm256_set1_epi64x: ; X64: # %bb.0: ; X64-NEXT: vmovq %rdi, %xmm0 -; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq %res0 = insertelement <4 x i64> undef, i64 %a0, i32 0 diff --git a/llvm/test/CodeGen/X86/avx-splat.ll b/llvm/test/CodeGen/X86/avx-splat.ll index 7602975c8872de..1890b44eb0750c 100644 --- a/llvm/test/CodeGen/X86/avx-splat.ll +++ b/llvm/test/CodeGen/X86/avx-splat.ll @@ -34,7 +34,7 @@ define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { ; X64-LABEL: funcC: ; X64: # %bb.0: # %entry ; X64-NEXT: vmovq %rdi, %xmm0 -; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll index 471298492735ec..64d12cc190d965 100644 --- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll +++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll @@ -205,7 +205,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) { ; AVX1-LABEL: ext_i4_4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm0 -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 @@ -430,7 +430,8 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) { ; ; AVX1-LABEL: ext_i8_8i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: # kill: def $edi killed $edi def $rdi +; AVX1-NEXT: vmovq %rdi, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll index d014798c78c4ff..0d04e0a2146615 100644 --- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -261,7 +261,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) { ; AVX1-LABEL: ext_i4_4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm0 -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -553,7 +553,8 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) { ; ; AVX1-LABEL: ext_i8_8i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: # kill: def $edi killed $edi def $rdi +; AVX1-NEXT: vmovq %rdi, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll index d57bd877500c7a..614d134173e7ba 100644 --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -505,26 +505,18 @@ define <4 x i64> @bitselect_v4i64_broadcast_rrr(<4 x i64> %a0, <4 x i64> %a1, i6 ; XOP-LABEL: bitselect_v4i64_broadcast_rrr: ; XOP: # %bb.0: ; XOP-NEXT: vmovq %rdi, %xmm2 -; XOP-NEXT: vmovq %rdi, %xmm3 -; XOP-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] +; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOP-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1] -; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 -; XOP-NEXT: vandps %ymm2, %ymm0, %ymm0 -; XOP-NEXT: vandnps %ymm1, %ymm3, %ymm1 -; XOP-NEXT: vorps %ymm1, %ymm0, %ymm0 +; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v4i64_broadcast_rrr: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm2 -; AVX1-NEXT: vmovq %rdi, %xmm3 -; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -881,32 +873,22 @@ define <8 x i64> @bitselect_v8i64_broadcast_rrr(<8 x i64> %a0, <8 x i64> %a1, i6 ; XOP-LABEL: bitselect_v8i64_broadcast_rrr: ; XOP: # %bb.0: ; XOP-NEXT: vmovq %rdi, %xmm4 -; XOP-NEXT: vmovq %rdi, %xmm5 -; XOP-NEXT: vmovddup {{.*#+}} xmm4 = xmm4[0,0] +; XOP-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] ; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 -; XOP-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,1,0,1] -; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm5, %ymm5 -; XOP-NEXT: vandps %ymm4, %ymm1, %ymm1 -; XOP-NEXT: vandps %ymm4, %ymm0, %ymm0 -; XOP-NEXT: vandnps %ymm3, %ymm5, %ymm3 -; XOP-NEXT: vorps %ymm3, %ymm1, %ymm1 -; XOP-NEXT: vandnps %ymm2, %ymm5, %ymm2 -; XOP-NEXT: vorps %ymm2, %ymm0, %ymm0 +; XOP-NEXT: vpcmov %ymm4, %ymm2, %ymm0, %ymm0 +; XOP-NEXT: vpcmov %ymm4, %ymm3, %ymm1, %ymm1 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v8i64_broadcast_rrr: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm4 -; AVX1-NEXT: vmovq %rdi, %xmm5 -; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = xmm4[0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm5, %ymm5 ; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 ; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 -; AVX1-NEXT: vandnps %ymm3, %ymm5, %ymm3 +; AVX1-NEXT: vandnps %ymm3, %ymm4, %ymm3 ; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 -; AVX1-NEXT: vandnps %ymm2, %ymm5, %ymm2 +; AVX1-NEXT: vandnps %ymm2, %ymm4, %ymm2 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll index 7868c8b21a935a..ae619ab590ecc2 100644 --- a/llvm/test/CodeGen/X86/combine-pmuldq.ll +++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll @@ -372,43 +372,28 @@ define <8 x i32> @PR49658_zext(i32* %ptr, i32 %mul) { ; AVX1: # %bb.0: # %start ; AVX1-NEXT: movl %esi, %eax ; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 -; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: movq $-2097152, %rax # imm = 0xFFE00000 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 -; AVX1-NEXT: vpsrlq $32, %xmm9, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB7_1: # %loop ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = mem[0],zero,mem[1],zero ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = mem[0],zero,mem[1],zero ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm5 = mem[0],zero,mem[1],zero ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm6 = mem[0],zero,mem[1],zero -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm7 = mem[0],zero,mem[1],zero -; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm3 -; AVX1-NEXT: vpmuludq %xmm7, %xmm8, %xmm7 -; AVX1-NEXT: vpsllq $32, %xmm7, %xmm7 -; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm7 -; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm2 -; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm2 -; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 -; AVX1-NEXT: vpaddq %xmm2, %xmm7, %xmm2 -; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3] -; AVX1-NEXT: vpmuludq %xmm5, %xmm9, %xmm3 -; AVX1-NEXT: vpmuludq %xmm5, %xmm8, %xmm5 -; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5 -; AVX1-NEXT: vpaddq %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm5 -; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm6 -; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4 -; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm4[1,3],xmm3[1,3] +; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm6 +; AVX1-NEXT: vpmuludq %xmm5, %xmm1, %xmm5 +; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3] +; AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm3[1,3],xmm4[1,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpaddd %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 ; AVX1-NEXT: subq $-128, %rax ; AVX1-NEXT: jne .LBB7_1 ; AVX1-NEXT: # %bb.2: # %end @@ -564,55 +549,28 @@ define <8 x i32> @PR49658_sext(i32* %ptr, i32 %mul) { ; AVX1: # %bb.0: # %start ; AVX1-NEXT: movslq %esi, %rax ; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 -; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: movq $-2097152, %rax # imm = 0xFFE00000 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 -; AVX1-NEXT: vpsrlq $32, %xmm9, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB8_1: # %loop ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX1-NEXT: vpmovsxdq 2097152(%rdi,%rax), %xmm4 -; AVX1-NEXT: vpmovsxdq 2097160(%rdi,%rax), %xmm5 -; AVX1-NEXT: vpmovsxdq 2097168(%rdi,%rax), %xmm6 -; AVX1-NEXT: vpmovsxdq 2097176(%rdi,%rax), %xmm7 -; AVX1-NEXT: vpsrlq $32, %xmm7, %xmm3 -; AVX1-NEXT: vpmuludq %xmm3, %xmm9, %xmm3 -; AVX1-NEXT: vpmuludq %xmm7, %xmm8, %xmm2 -; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 -; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm3 -; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlq $32, %xmm6, %xmm3 -; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 -; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm7 -; AVX1-NEXT: vpmuludq %xmm6, %xmm7, %xmm7 -; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 -; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm6 -; AVX1-NEXT: vpaddq %xmm3, %xmm6, %xmm3 -; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm3[1,3],xmm2[1,3] -; AVX1-NEXT: vpmuludq %xmm5, %xmm8, %xmm3 -; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm6 -; AVX1-NEXT: vpmuludq %xmm6, %xmm9, %xmm6 -; AVX1-NEXT: vpaddq %xmm3, %xmm6, %xmm3 -; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 -; AVX1-NEXT: vpmuludq %xmm5, %xmm9, %xmm5 -; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3 -; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm5 -; AVX1-NEXT: vpmuludq %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm6 -; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm6 -; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5 -; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm4 -; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm4[1,3],xmm3[1,3] +; AVX1-NEXT: vpmovsxdq 2097152(%rdi,%rax), %xmm3 +; AVX1-NEXT: vpmovsxdq 2097160(%rdi,%rax), %xmm4 +; AVX1-NEXT: vpmovsxdq 2097168(%rdi,%rax), %xmm5 +; AVX1-NEXT: vpmovsxdq 2097176(%rdi,%rax), %xmm6 +; AVX1-NEXT: vpmuldq %xmm6, %xmm2, %xmm6 +; AVX1-NEXT: vpmuldq %xmm5, %xmm1, %xmm5 +; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3] +; AVX1-NEXT: vpmuldq %xmm4, %xmm2, %xmm4 +; AVX1-NEXT: vpmuldq %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm3[1,3],xmm4[1,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpaddd %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 ; AVX1-NEXT: subq $-128, %rax ; AVX1-NEXT: jne .LBB8_1 ; AVX1-NEXT: # %bb.2: # %end diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index c88f294926a217..a0b7df81d5801f 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -461,7 +461,7 @@ define <4 x i64> @arg_i64_v4i64_undef(i64 %x, i32 %y) nounwind { ; AVX1-LABEL: arg_i64_v4i64_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm0 -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -1422,7 +1422,7 @@ define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 ; AVX1-NEXT: movslq %edi, %rax ; AVX1-NEXT: vmovq %rax, %xmm2 -; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm2, %xmm3 ; AVX1-NEXT: vpcmpeqq {{\.LCPI.*}}+{{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 @@ -1704,7 +1704,7 @@ define <4 x double> @load_f64_v4f64(<4 x double> %v, double* %p, i32 %y) nounwin ; AVX1: # %bb.0: ; AVX1-NEXT: movslq %esi, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm1, %xmm2 ; AVX1-NEXT: vpcmpeqq {{\.LCPI.*}}+{{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1