diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1b55a3b235228..34b405ced8c0a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -22134,6 +22134,27 @@ bool BoUpSLP::collectValuesToDemote( {VectorizableTree[E.CombinedEntriesWithIndices.front().first].get(), VectorizableTree[E.CombinedEntriesWithIndices.back().first].get()}); + if (E.isAltShuffle()) { + // Combining these opcodes may lead to incorrect analysis, skip for now. + auto IsDangerousOpcode = [](unsigned Opcode) { + switch (Opcode) { + case Instruction::Shl: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return true; + default: + break; + } + return false; + }; + if (IsDangerousOpcode(E.getAltOpcode())) + return FinalAnalysis(); + } + switch (E.getOpcode()) { // We can always demote truncations and extensions. Since truncations can diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll index cc2e16e2b099b..959b2350d9d78 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll @@ -6,14 +6,12 @@ define float @test(i8 %0) { ; CHECK-SAME: i8 [[TMP0:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> , i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i16> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i16> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> [[TMP4]], <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP9]] to i32 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] ; CHECK-NEXT: switch i32 [[TMP8]], label %[[EXIT:.*]] [ ; CHECK-NEXT: i32 0, label %[[EXIT]]