From d405138abe8d394ebcba7f438283a971462451cc Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 13 Oct 2025 10:47:58 -0700 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 24 ++++++++++++------- .../X86/minbw-node-used-twice.ll | 11 ++------- .../X86/parent-node-non-schedulable.ll | 4 ++-- .../X86/vect_copyable_in_binops.ll | 2 +- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f95d28813fa23..be14567948c22 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10657,10 +10657,11 @@ class InstructionsCompatibilityAnalysis { /// Checks if the opcode is supported as the main opcode for copyable /// elements. static bool isSupportedOpcode(const unsigned Opcode) { - return Opcode == Instruction::Add || Opcode == Instruction::LShr || - Opcode == Instruction::Shl || Opcode == Instruction::SDiv || - Opcode == Instruction::UDiv || Opcode == Instruction::And || - Opcode == Instruction::Or || Opcode == Instruction::Xor; + return Opcode == Instruction::Add || Opcode == Instruction::Sub || + Opcode == Instruction::LShr || Opcode == Instruction::Shl || + Opcode == Instruction::SDiv || Opcode == Instruction::UDiv || + Opcode == Instruction::And || Opcode == Instruction::Or || + Opcode == Instruction::Xor; } /// Identifies the best candidate value, which represents main opcode @@ -10678,7 +10679,7 @@ class InstructionsCompatibilityAnalysis { }; // Exclude operands instructions immediately to improve compile time, it // will be unable to schedule anyway. - SmallDenseSet Operands; + SmallDenseMap> Operands; SmallMapVector, 4> Candidates; bool AnyUndef = false; for (Value *V : VL) { @@ -10692,12 +10693,12 @@ class InstructionsCompatibilityAnalysis { if (Candidates.empty()) { Candidates.try_emplace(I->getOpcode()).first->second.push_back(I); Parent = I->getParent(); - Operands.insert(I->op_begin(), I->op_end()); + Operands[I->getOpcode()].insert(I->op_begin(), I->op_end()); continue; } if (Parent == I->getParent()) { Candidates.try_emplace(I->getOpcode()).first->second.push_back(I); - Operands.insert(I->op_begin(), I->op_end()); + Operands[I->getOpcode()].insert(I->op_begin(), I->op_end()); continue; } auto *NodeA = DT.getNode(Parent); @@ -10712,7 +10713,7 @@ class InstructionsCompatibilityAnalysis { Candidates.try_emplace(I->getOpcode()).first->second.push_back(I); Parent = I->getParent(); Operands.clear(); - Operands.insert(I->op_begin(), I->op_end()); + Operands[I->getOpcode()].insert(I->op_begin(), I->op_end()); } } unsigned BestOpcodeNum = 0; @@ -10720,8 +10721,12 @@ class InstructionsCompatibilityAnalysis { for (const auto &P : Candidates) { if (P.second.size() < BestOpcodeNum) continue; + const auto &Ops = Operands.at(P.first); + // If have inner dependencies - skip. + if (any_of(P.second, [&](Instruction *I) { return Ops.contains(I); })) + continue; for (Instruction *I : P.second) { - if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) { + if (IsSupportedInstruction(I, AnyUndef)) { MainOp = I; BestOpcodeNum = P.second.size(); break; @@ -10981,6 +10986,7 @@ class InstructionsCompatibilityAnalysis { getWidenedType(S.getMainOp()->getType(), VL.size()); switch (MainOpcode) { case Instruction::Add: + case Instruction::Sub: case Instruction::LShr: case Instruction::Shl: case Instruction::SDiv: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll index 55f2b238c07df..24899900ebb3a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll @@ -4,15 +4,8 @@ define i8 @test() { ; CHECK-LABEL: define i8 @test() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SUB_I_I79_PEEL_I:%.*]] = sub i16 0, 1 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> , i16 [[SUB_I_I79_PEEL_I]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i16> [[TMP3]], [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i16> [[TMP4]], [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 [[TMP5]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i16> , +; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 false to i8 ; CHECK-NEXT: ret i8 [[CONV13_I89_PEEL_I]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll index 7c8cb02f28c63..60e13d0b4cb6a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll @@ -6,12 +6,12 @@ define void @test(ptr %0, i64 %1, i64 %2, i1 %3, i64 %4, i64 %5) { ; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]], i1 [[TMP3:%.*]], i64 [[TMP4:%.*]], i64 [[TMP5:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 240 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128 +; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> , i64 [[TMP2]], i32 3 ; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4 ; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <6 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <6 x i64> poison, i64 [[TMP14]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll index 3e0a3741d6bbc..2a0e7889f0f34 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll @@ -183,7 +183,7 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) { ; CHECK-LABEL: @addsub1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> [[TMP0]], ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: ret void ;