diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d219f82a7a97aa..4796c22f190805 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -55036,25 +55036,37 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0), Src.getOperand(1)); - // Reduce v2i64 to v4i32 if we don't need the upper bits. + // Reduce v2i64 to v4i32 if we don't need the upper bits or are known zero. // TODO: Move to DAGCombine/SimplifyDemandedBits? - if (VT == MVT::v2i64 || VT == MVT::v2f64) { - auto IsAnyExt64 = [](SDValue Op) { - if (Op.getValueType() != MVT::i64 || !Op.hasOneUse()) + if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Src.hasOneUse()) { + auto IsExt64 = [&DAG](SDValue Op, bool IsZeroExt) { + if (Op.getValueType() != MVT::i64) return SDValue(); - if (Op.getOpcode() == ISD::ANY_EXTEND && + unsigned Opc = IsZeroExt ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND; + if (Op.getOpcode() == Opc && Op.getOperand(0).getScalarValueSizeInBits() <= 32) return Op.getOperand(0); + unsigned Ext = IsZeroExt ? ISD::ZEXTLOAD : ISD::EXTLOAD; if (auto *Ld = dyn_cast(Op)) - if (Ld->getExtensionType() == ISD::EXTLOAD && + if (Ld->getExtensionType() == Ext && Ld->getMemoryVT().getScalarSizeInBits() <= 32) return Op; + if (IsZeroExt && DAG.MaskedValueIsZero(Op, APInt::getHighBitsSet(64, 32))) + return Op; return SDValue(); }; - if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src))) + + if (SDValue AnyExt = IsExt64(peekThroughOneUseBitcasts(Src), false)) return DAG.getBitcast( VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32, - DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32))); + DAG.getAnyExtOrTrunc(AnyExt, DL, MVT::i32))); + + if (SDValue ZeroExt = IsExt64(peekThroughOneUseBitcasts(Src), true)) + return DAG.getBitcast( + VT, + DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v4i32, + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32, + DAG.getZExtOrTrunc(ZeroExt, DL, MVT::i32)))); } // Combine (v2i64 (scalar_to_vector (i64 (bitconvert (mmx))))) to MOVQ2DQ. diff --git a/llvm/test/CodeGen/X86/buildvec-extract.ll b/llvm/test/CodeGen/X86/buildvec-extract.ll index 4826b959e39b14..ebd027b979e737 100644 --- a/llvm/test/CodeGen/X86/buildvec-extract.ll +++ b/llvm/test/CodeGen/X86/buildvec-extract.ll @@ -198,17 +198,25 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) { } define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) { -; SSE-LABEL: extract0_i32_zext_insert1_i64_zero: -; SSE: # %bb.0: -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movq %rax, %xmm0 -; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; SSE-NEXT: retq +; SSE2-LABEL: extract0_i32_zext_insert1_i64_zero: +; SSE2: # %bb.0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: extract0_i32_zext_insert1_i64_zero: +; SSE41: # %bb.0: +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE41-NEXT: retq ; ; AVX-LABEL: extract0_i32_zext_insert1_i64_zero: ; AVX: # %bb.0: -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] ; AVX-NEXT: retq %e = extractelement <4 x i32> %x, i32 0 @@ -242,24 +250,18 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) { define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; SSE2-LABEL: extract1_i32_zext_insert1_i64_zero: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movq %rax, %xmm0 +; SSE2-NEXT: psrlq $32, %xmm0 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] ; SSE2-NEXT: retq ; ; SSE41-LABEL: extract1_i32_zext_insert1_i64_zero: ; SSE41: # %bb.0: -; SSE41-NEXT: extractps $1, %xmm0, %eax -; SSE41-NEXT: movq %rax, %xmm0 -; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero ; SSE41-NEXT: retq ; ; AVX-LABEL: extract1_i32_zext_insert1_i64_zero: ; AVX: # %bb.0: -; AVX-NEXT: vextractps $1, %xmm0, %eax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero ; AVX-NEXT: retq %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -330,25 +332,15 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) { } define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) { -; SSE2-LABEL: extract3_i32_zext_insert1_i64_zero: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movq %rax, %xmm0 -; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; SSE2-NEXT: retq -; -; SSE41-LABEL: extract3_i32_zext_insert1_i64_zero: -; SSE41: # %bb.0: -; SSE41-NEXT: extractps $3, %xmm0, %eax -; SSE41-NEXT: movq %rax, %xmm0 -; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; SSE41-NEXT: retq +; SSE-LABEL: extract3_i32_zext_insert1_i64_zero: +; SSE: # %bb.0: +; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE-NEXT: retq ; ; AVX-LABEL: extract3_i32_zext_insert1_i64_zero: ; AVX: # %bb.0: -; AVX-NEXT: vextractps $3, %xmm0, %eax -; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] ; AVX-NEXT: retq %e = extractelement <4 x i32> %x, i32 3 @@ -538,17 +530,22 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) { } define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) { -; SSE-LABEL: extract0_i16_zext_insert1_i64_zero: -; SSE: # %bb.0: -; SSE-NEXT: pextrw $0, %xmm0, %eax -; SSE-NEXT: movq %rax, %xmm0 -; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; SSE-NEXT: retq +; SSE2-LABEL: extract0_i16_zext_insert1_i64_zero: +; SSE2: # %bb.0: +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE2-NEXT: retq +; +; SSE41-LABEL: extract0_i16_zext_insert1_i64_zero: +; SSE41: # %bb.0: +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE41-NEXT: retq ; ; AVX-LABEL: extract0_i16_zext_insert1_i64_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrw $0, %xmm0, %eax -; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] ; AVX-NEXT: retq %e = extractelement <8 x i16> %x, i32 0 @@ -581,18 +578,21 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) { } define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) { -; SSE-LABEL: extract1_i16_zext_insert1_i64_zero: -; SSE: # %bb.0: -; SSE-NEXT: pextrw $1, %xmm0, %eax -; SSE-NEXT: movq %rax, %xmm0 -; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; SSE-NEXT: retq +; SSE2-LABEL: extract1_i16_zext_insert1_i64_zero: +; SSE2: # %bb.0: +; SSE2-NEXT: pextrw $1, %xmm0, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE2-NEXT: retq +; +; SSE41-LABEL: extract1_i16_zext_insert1_i64_zero: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero +; SSE41-NEXT: retq ; ; AVX-LABEL: extract1_i16_zext_insert1_i64_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrw $1, %xmm0, %eax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero ; AVX-NEXT: retq %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -628,18 +628,21 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) { } define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) { -; SSE-LABEL: extract2_i16_zext_insert1_i64_zero: -; SSE: # %bb.0: -; SSE-NEXT: pextrw $2, %xmm0, %eax -; SSE-NEXT: movq %rax, %xmm0 -; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; SSE-NEXT: retq +; SSE2-LABEL: extract2_i16_zext_insert1_i64_zero: +; SSE2: # %bb.0: +; SSE2-NEXT: pextrw $2, %xmm0, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE2-NEXT: retq +; +; SSE41-LABEL: extract2_i16_zext_insert1_i64_zero: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero +; SSE41-NEXT: retq ; ; AVX-LABEL: extract2_i16_zext_insert1_i64_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrw $2, %xmm0, %eax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero ; AVX-NEXT: retq %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -674,18 +677,20 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) { } define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) { -; SSE-LABEL: extract3_i16_zext_insert1_i64_zero: -; SSE: # %bb.0: -; SSE-NEXT: pextrw $3, %xmm0, %eax -; SSE-NEXT: movq %rax, %xmm0 -; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; SSE-NEXT: retq +; SSE2-LABEL: extract3_i16_zext_insert1_i64_zero: +; SSE2: # %bb.0: +; SSE2-NEXT: psrlq $48, %xmm0 +; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE2-NEXT: retq +; +; SSE41-LABEL: extract3_i16_zext_insert1_i64_zero: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero +; SSE41-NEXT: retq ; ; AVX-LABEL: extract3_i16_zext_insert1_i64_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrw $3, %xmm0, %eax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero ; AVX-NEXT: retq %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll index 806816ec8ea31d..c3d23f49439dc1 100644 --- a/llvm/test/CodeGen/X86/combine-pmuldq.ll +++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll @@ -332,8 +332,7 @@ declare dso_local i32 @foo(i32, i32, i32, i32) define <8 x i32> @PR49658_zext(ptr %ptr, i32 %mul) { ; SSE-LABEL: PR49658_zext: ; SSE: # %bb.0: # %start -; SSE-NEXT: movl %esi, %eax -; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movd %esi, %xmm0 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] ; SSE-NEXT: pxor %xmm0, %xmm0 ; SSE-NEXT: movq $-2097152, %rax # imm = 0xFFE00000 diff --git a/llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll b/llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll index c46b5cca558d32..716a93a5aca7d5 100644 --- a/llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll +++ b/llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll @@ -15,8 +15,7 @@ define <8 x i16> @bitcast_crash(i32 %arg, <8 x i16> %x, i1 %c) { ; CHECK-LABEL: bitcast_crash: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: movd %edi, %xmm1 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; CHECK-NEXT: testb $1, %sil ; CHECK-NEXT: je .LBB0_2 diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll index 4c4e4b2dae5773..a67ce8f0be5b06 100644 --- a/llvm/test/CodeGen/X86/gather-addresses.ll +++ b/llvm/test/CodeGen/X86/gather-addresses.ll @@ -149,11 +149,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind { ; LIN-SSE2-NEXT: andl %ecx, %edx ; LIN-SSE2-NEXT: andl %ecx, %esi ; LIN-SSE2-NEXT: andl %ecx, %edi -; LIN-SSE2-NEXT: movq %rax, %xmm0 -; LIN-SSE2-NEXT: movq %rdx, %xmm1 +; LIN-SSE2-NEXT: movd %eax, %xmm0 +; LIN-SSE2-NEXT: movd %edx, %xmm1 ; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; LIN-SSE2-NEXT: movq %rdi, %xmm2 -; LIN-SSE2-NEXT: movq %rsi, %xmm1 +; LIN-SSE2-NEXT: movd %edi, %xmm2 +; LIN-SSE2-NEXT: movd %esi, %xmm1 ; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; LIN-SSE2-NEXT: retq ; @@ -169,11 +169,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind { ; LIN-SSE4-NEXT: andl %ecx, %edx ; LIN-SSE4-NEXT: andl %ecx, %esi ; LIN-SSE4-NEXT: andl %ecx, %edi -; LIN-SSE4-NEXT: movq %rdx, %xmm1 -; LIN-SSE4-NEXT: movq %rax, %xmm0 +; LIN-SSE4-NEXT: movd %edx, %xmm1 +; LIN-SSE4-NEXT: movd %eax, %xmm0 ; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; LIN-SSE4-NEXT: movq %rdi, %xmm2 -; LIN-SSE4-NEXT: movq %rsi, %xmm1 +; LIN-SSE4-NEXT: movd %edi, %xmm2 +; LIN-SSE4-NEXT: movd %esi, %xmm1 ; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; LIN-SSE4-NEXT: retq ; @@ -192,11 +192,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind { ; WIN-SSE2-NEXT: andl %r9d, %ecx ; WIN-SSE2-NEXT: andl %r9d, %edx ; WIN-SSE2-NEXT: andl %r9d, %r8d -; WIN-SSE2-NEXT: movq %rax, %xmm0 -; WIN-SSE2-NEXT: movq %rcx, %xmm1 +; WIN-SSE2-NEXT: movd %eax, %xmm0 +; WIN-SSE2-NEXT: movd %ecx, %xmm1 ; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; WIN-SSE2-NEXT: movq %r8, %xmm2 -; WIN-SSE2-NEXT: movq %rdx, %xmm1 +; WIN-SSE2-NEXT: movd %r8d, %xmm2 +; WIN-SSE2-NEXT: movd %edx, %xmm1 ; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; WIN-SSE2-NEXT: retq ; @@ -212,11 +212,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind { ; WIN-SSE4-NEXT: andl %r9d, %ecx ; WIN-SSE4-NEXT: andl %r9d, %edx ; WIN-SSE4-NEXT: andl %r9d, %r8d -; WIN-SSE4-NEXT: movq %rcx, %xmm1 -; WIN-SSE4-NEXT: movq %rax, %xmm0 +; WIN-SSE4-NEXT: movd %ecx, %xmm1 +; WIN-SSE4-NEXT: movd %eax, %xmm0 ; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; WIN-SSE4-NEXT: movq %r8, %xmm2 -; WIN-SSE4-NEXT: movq %rdx, %xmm1 +; WIN-SSE4-NEXT: movd %r8d, %xmm2 +; WIN-SSE4-NEXT: movd %edx, %xmm1 ; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; WIN-SSE4-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index 7e366e3b00ebe4..399c160335922b 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -1101,8 +1101,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind ; SSE41: # %bb.0: ; SSE41-NEXT: movapd %xmm0, %xmm2 ; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0] -; SSE41-NEXT: movl %edi, %eax -; SSE41-NEXT: movq %rax, %xmm0 +; SSE41-NEXT: movd %edi, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 @@ -1112,8 +1111,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind ; AVX1-LABEL: arg_f64_v2f64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] -; AVX1-NEXT: movl %edi, %eax -; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vmovd %edi, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 @@ -1458,8 +1456,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind { ; SSE41: # %bb.0: ; SSE41-NEXT: movapd %xmm0, %xmm1 ; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0] -; SSE41-NEXT: movl %esi, %eax -; SSE41-NEXT: movq %rax, %xmm0 +; SSE41-NEXT: movd %esi, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 @@ -1469,8 +1466,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind { ; AVX1-LABEL: load_f64_v2f64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; AVX1-NEXT: movl %esi, %eax -; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vmovd %esi, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll index 13b07532ceea4b..5ebcde3053a7b3 100644 --- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll +++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll @@ -255,7 +255,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind { ; SSE-NEXT: movzbl (%rdi), %ecx ; SSE-NEXT: movl $42, %eax ; SSE-NEXT: shrq %cl, %rax -; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: lshr_op0_constant: @@ -263,7 +263,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind { ; AVX-NEXT: movzbl (%rdi), %ecx ; AVX-NEXT: movl $42, %eax ; AVX-NEXT: shrq %cl, %rax -; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: retq %x = load i64, ptr %p %b = lshr i64 42, %x diff --git a/llvm/test/CodeGen/X86/vec_insert-7.ll b/llvm/test/CodeGen/X86/vec_insert-7.ll index cea047453de43e..8fd6e3dd94b67c 100644 --- a/llvm/test/CodeGen/X86/vec_insert-7.ll +++ b/llvm/test/CodeGen/X86/vec_insert-7.ll @@ -14,8 +14,7 @@ define x86_mmx @mmx_movzl(x86_mmx %x) nounwind { ; ; X64-LABEL: mmx_movzl: ; X64: ## %bb.0: -; X64-NEXT: movl $32, %eax -; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: movaps {{.*#+}} xmm0 = [32,0,0,0] ; X64-NEXT: retq %tmp = bitcast x86_mmx %x to <2 x i32> %tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0 diff --git a/llvm/test/CodeGen/X86/vec_set-B.ll b/llvm/test/CodeGen/X86/vec_set-B.ll index f4d6b64dc3ac52..0f5c853220b382 100644 --- a/llvm/test/CodeGen/X86/vec_set-B.ll +++ b/llvm/test/CodeGen/X86/vec_set-B.ll @@ -38,7 +38,7 @@ define <2 x i64> @test2(i64 %arg) nounwind { ; X64-LABEL: test2: ; X64: # %bb.0: ; X64-NEXT: andl $1234567, %edi # imm = 0x12D687 -; X64-NEXT: movq %rdi, %xmm0 +; X64-NEXT: movd %edi, %xmm0 ; X64-NEXT: retq %A = and i64 %arg, 1234567 %B = insertelement <2 x i64> undef, i64 %A, i32 0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index b0e319a402d9fb..bc6f2c7006064c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -385,8 +385,7 @@ define <4 x float> @PR31296(ptr %in) { ; ; X64-LABEL: PR31296: ; X64: # %bb.0: # %entry -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: vmovq %rax, %xmm0 +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0] ; X64-NEXT: retq entry: