diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a04c3e8f70c89..34854e4d8b6c0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45172,6 +45172,18 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( case X86ISD::Wrapper: case X86ISD::WrapperRIP: return true; + case X86ISD::PACKSS: + case X86ISD::PACKUS: { + APInt DemandedLHS, DemandedRHS; + getPackDemandedElts(Op.getSimpleValueType(), DemandedElts, DemandedLHS, + DemandedRHS); + return (!DemandedLHS || + DAG.isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS, + PoisonOnly, Depth + 1)) && + (!DemandedRHS || + DAG.isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS, + PoisonOnly, Depth + 1)); + } case X86ISD::INSERTPS: case X86ISD::BLENDI: case X86ISD::PSHUFB: @@ -45242,6 +45254,10 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( case X86ISD::BLENDI: case X86ISD::BLENDV: return false; + // SSE packs. + case X86ISD::PACKSS: + case X86ISD::PACKUS: + return false; // SSE target shuffles. case X86ISD::INSERTPS: case X86ISD::PSHUFB: diff --git a/llvm/test/CodeGen/X86/combine-pack.ll b/llvm/test/CodeGen/X86/combine-pack.ll index 1e7c700055e3f..2f5454dc2c3ec 100644 --- a/llvm/test/CodeGen/X86/combine-pack.ll +++ b/llvm/test/CodeGen/X86/combine-pack.ll @@ -26,14 +26,12 @@ define <8 x i16> @combine_packss_v4i32_signsplat(<4 x i32> %a0, <4 x i32> %a1) { ret <8 x i16> %signsplat } -; TODO: Failure to remove unnecessary signsplat through freeze define <8 x i16> @combine_packss_v4i32_freeze_signsplat(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_packss_v4i32_freeze_signsplat: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: psraw $15, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_packss_v4i32_freeze_signsplat: @@ -41,7 +39,6 @@ define <8 x i16> @combine_packss_v4i32_freeze_signsplat(<4 x i32> %a0, <4 x i32> ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 ; AVX-NEXT: retq %cmp = icmp sgt <4 x i32> %a0, %a1 %ext = sext <4 x i1> %cmp to <4 x i32>