diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 048a3e691fe55..3f18bd70539a0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10546,8 +10546,11 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, PoisonValue::get(UniqueValues.front()->getType())); // Check that extended with poisons/copyable operations are still valid // for vectorization (div/rem are not allowed). - if (!S.areInstructionsWithCopyableElements() && - !getSameOpcode(PaddedUniqueValues, TLI).valid()) { + if ((!S.areInstructionsWithCopyableElements() && + !getSameOpcode(PaddedUniqueValues, TLI).valid()) || + (S.areInstructionsWithCopyableElements() && S.isMulDivLikeOp() && + (S.getMainOp()->isIntDivRem() || S.getMainOp()->isFPDivRem() || + isa(S.getMainOp())))) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); ReuseShuffleIndices.clear(); return false; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll new file mode 100644 index 0000000000000..d16843c81144d --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define ptr @test(ptr %d) { +; CHECK-LABEL: define ptr @test( +; CHECK-SAME: ptr [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1 +; CHECK-NEXT: [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, 0 +; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <6 x i64> [[TMP9]], i32 2 +; CHECK-NEXT: [[SCEVGEP42:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <6 x i64> [[TMP9]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <6 x i64> [[TMP9]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <6 x i64> [[TMP9]], i32 5 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP19]] +; CHECK-NEXT: ret ptr [[TMP20]] +; +entry: + %0 = load i8, ptr null, align 1 + %cmp4.2 = icmp eq i8 %0, 0 + %1 = select i1 %cmp4.2, i64 0, i64 0 + %2 = shl i64 %1, 1 + %3 = getelementptr i8, ptr %d, i64 %2 + %4 = xor i64 0, 0 + %5 = udiv i64 %4, 0 + %6 = mul i64 %5, 6 + %7 = getelementptr i8, ptr %d, i64 %6 + %8 = shl i64 %1, 0 + %scevgep42 = getelementptr i8, ptr %d, i64 %8 + %9 = mul i64 %5, 1 + %10 = getelementptr i8, ptr %d, i64 %9 + %11 = udiv i64 1, 0 + %12 = mul i64 %11, 1 + %13 = getelementptr i8, ptr %d, i64 %12 + %14 = mul i64 %11, 0 + %15 = getelementptr i8, ptr %d, i64 %14 + ret ptr %15 +}