From 7d3af6b586d2bf74fc9d67b4983aff4ca66aff28 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 1 Oct 2025 15:08:27 +0100 Subject: [PATCH] [X86] SimplifyDemandedBitsForTargetNode - generalize X86ISD::VSRAI handling when only demanding 'known signbits' If we only want bits that already match the signbit then we don't need to shift. Generalizes an existing pattern that just handled signbit-only demanded bits to match what we do for ISD::SRA. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++-- llvm/test/CodeGen/X86/combine-pack.ll | 3 --- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cd04ff5bc7ef4..a04c3e8f70c89 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44615,8 +44615,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( APInt DemandedMask = OriginalDemandedBits << ShAmt; - // If we just want the sign bit then we don't need to shift it. - if (OriginalDemandedBits.isSignMask()) + // If we only want bits that already match the signbit then we don't need + // to shift. + unsigned NumHiDemandedBits = BitWidth - OriginalDemandedBits.countr_zero(); + if (TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1) >= + NumHiDemandedBits) return TLO.CombineTo(Op, Op0); // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 diff --git a/llvm/test/CodeGen/X86/combine-pack.ll b/llvm/test/CodeGen/X86/combine-pack.ll index 9e740b04073e0..1e7c700055e3f 100644 --- a/llvm/test/CodeGen/X86/combine-pack.ll +++ b/llvm/test/CodeGen/X86/combine-pack.ll @@ -5,14 +5,12 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) -; TODO: Failure to remove unnecessary signsplat define <8 x i16> @combine_packss_v4i32_signsplat(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_packss_v4i32_signsplat: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: psraw $15, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_packss_v4i32_signsplat: @@ -20,7 +18,6 @@ define <8 x i16> @combine_packss_v4i32_signsplat(<4 x i32> %a0, <4 x i32> %a1) { ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 ; AVX-NEXT: retq %cmp = icmp sgt <4 x i32> %a0, %a1 %ext = sext <4 x i1> %cmp to <4 x i32>