diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 961380ce4ad9f2..c055091feeb42e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14190,6 +14190,28 @@ bool BoUpSLP::collectValuesToDemote( return false; break; } + case Instruction::UDiv: + case Instruction::URem: { + if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) + return false; + // UDiv and URem can be truncated if all the truncated bits are zero. + if (!AttemptCheckBitwidth( + [&](unsigned BitWidth, unsigned OrigBitWidth) { + assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + return MaskedValueIsZero(I->getOperand(0), Mask, + SimplifyQuery(*DL)) && + MaskedValueIsZero(I->getOperand(1), Mask, + SimplifyQuery(*DL)); + }, + NeedToExit)) + return false; + if (NeedToExit) + return true; + if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit)) + return false; + break; + } // We can demote selects if we can demote their true and false values. case Instruction::Select: { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll index 4a23abf182e888..cfbbe14186b501 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll @@ -116,9 +116,7 @@ define void @test_div() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[TMP3]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP3]], ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 ; CHECK-NEXT: ret void ; @@ -170,9 +168,7 @@ define void @test_rem() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[TMP3]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = urem <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP3]], ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 ; CHECK-NEXT: ret void ;