diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 86a1dad4473ba..206abcb8c6656 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43516,6 +43516,13 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, // Use PACKSS if the input has sign-bits that extend all the way to the // packed/truncated value. e.g. Comparison result, sext_in_reg, etc. unsigned NumSignBits = DAG.ComputeNumSignBits(In); + + // Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with + // a sign splat. ComputeNumSignBits struggles to see through BITCASTs later + // on and combines/simplifications can't then use it. + if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits()) + return SDValue(); + if (NumSignBits > (InSVT.getSizeInBits() - NumPackedSignBits)) return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index 675faabac602b..8eb242d7c1fb0 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -114,15 +114,9 @@ define <4 x float> @signbits_ashr_sitofp_1(<4 x i64> %a0) nounwind { ; X86-LABEL: signbits_ashr_sitofp_1: ; X86: # %bb.0: ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 -; X86-NEXT: vpsrad $31, %xmm1, %xmm2 ; X86-NEXT: vpsrad $16, %xmm1, %xmm1 -; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; X86-NEXT: vpsrad $31, %xmm0, %xmm2 ; X86-NEXT: vpsrad $16, %xmm0, %xmm0 -; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] ; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X86-NEXT: vzeroupper ; X86-NEXT: retl @@ -130,27 +124,18 @@ define <4 x float> @signbits_ashr_sitofp_1(<4 x i64> %a0) nounwind { ; X64-AVX1-LABEL: signbits_ashr_sitofp_1: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; X64-AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 ; X64-AVX1-NEXT: vpsrad $16, %xmm1, %xmm1 -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; X64-AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 ; X64-AVX1-NEXT: vpsrad $16, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; X64-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] ; X64-AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq ; ; X64-AVX2-LABEL: signbits_ashr_sitofp_1: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 ; X64-AVX2-NEXT: vpsrad $16, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] -; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X64-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] ; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index 443d57a1ad54c..d548639782dd7 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -905,10 +905,11 @@ define <32 x i8> @trunc_v32i16_v32i8_zeroes(<32 x i16>* %x) nounwind "min-legal- define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v8i64_v8i32_sign: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsraq $48, 32(%rdi), %ymm1 -; CHECK-NEXT: vpsraq $48, (%rdi), %ymm2 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] -; CHECK-NEXT: vpermi2w %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vpsraq $48, 32(%rdi), %ymm0 +; CHECK-NEXT: vpsraq $48, (%rdi), %ymm1 +; CHECK-NEXT: vpmovqd %ymm1, %xmm1 +; CHECK-NEXT: vpmovqd %ymm0, %xmm0 +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; CHECK-NEXT: retq %a = load <8 x i64>, <8 x i64>* %x %b = ashr <8 x i64> %a,