From d7975c9d93fb4a69c0bd79d7d5b3f6be77a25c73 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 28 Mar 2024 10:35:15 -0400 Subject: [PATCH] [SLP]Add better minbitwidth analysis for udiv/urem instructions. Adds improved bitwidth analysis for udiv/urem instructions. The analysis is based on similar version in InstCombiner. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/85928 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 22 +++++++++++++++++++ .../X86/reorder-possible-strided-node.ll | 8 ++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 961380ce4ad9f..c055091feeb42 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14190,6 +14190,28 @@ bool BoUpSLP::collectValuesToDemote( return false; break; } + case Instruction::UDiv: + case Instruction::URem: { + if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) + return false; + // UDiv and URem can be truncated if all the truncated bits are zero. + if (!AttemptCheckBitwidth( + [&](unsigned BitWidth, unsigned OrigBitWidth) { + assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + return MaskedValueIsZero(I->getOperand(0), Mask, + SimplifyQuery(*DL)) && + MaskedValueIsZero(I->getOperand(1), Mask, + SimplifyQuery(*DL)); + }, + NeedToExit)) + return false; + if (NeedToExit) + return true; + if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit)) + return false; + break; + } // We can demote selects if we can demote their true and false values. case Instruction::Select: { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll index 4a23abf182e88..cfbbe14186b50 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll @@ -116,9 +116,7 @@ define void @test_div() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[TMP3]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP3]], ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 ; CHECK-NEXT: ret void ; @@ -170,9 +168,7 @@ define void @test_rem() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[TMP3]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = urem <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP3]], ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 ; CHECK-NEXT: ret void ;