-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SLP]Enable SDiv/UDiv support as main op in copyables #161892
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Enable SDiv/UDiv support as main op in copyables #161892
Conversation
Created using spr 1.3.7
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesAllow SDiv/UDiv as a main operation in copyables support Full diff: https://github.com/llvm/llvm-project/pull/161892.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fedca65d241e8..21d9f20752b66 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10620,7 +10620,8 @@ class InstructionsCompatibilityAnalysis {
/// Checks if the opcode is supported as the main opcode for copyable
/// elements.
static bool isSupportedOpcode(const unsigned Opcode) {
- return Opcode == Instruction::Add || Opcode == Instruction::LShr;
+ return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
+ Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
}
/// Identifies the best candidate value, which represents main opcode
@@ -10937,6 +10938,8 @@ class InstructionsCompatibilityAnalysis {
switch (MainOpcode) {
case Instruction::Add:
case Instruction::LShr:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
break;
default:
@@ -22062,8 +22065,10 @@ bool BoUpSLP::collectValuesToDemote(
auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
return all_of(E.Scalars, [&](Value *V) {
- auto *I = cast<Instruction>(V);
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
+ if (E.hasCopyableElements() && E.isCopyableElement(V))
+ return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL));
+ auto *I = cast<Instruction>(V);
return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
});
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index cf62fd5cf66f7..a888027479817 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -4,21 +4,14 @@
define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_add_sdiv(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2
-; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
-; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4
-; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4
-; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4
; CHECK-NEXT: ret void
@@ -58,21 +51,14 @@ entry:
define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_add_udiv(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
-; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
-; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
-; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
-; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4
; CHECK-NEXT: ret void
|
@llvm/pr-subscribers-vectorizers Author: Alexey Bataev (alexey-bataev) ChangesAllow SDiv/UDiv as a main operation in copyables support Full diff: https://github.com/llvm/llvm-project/pull/161892.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fedca65d241e8..21d9f20752b66 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10620,7 +10620,8 @@ class InstructionsCompatibilityAnalysis {
/// Checks if the opcode is supported as the main opcode for copyable
/// elements.
static bool isSupportedOpcode(const unsigned Opcode) {
- return Opcode == Instruction::Add || Opcode == Instruction::LShr;
+ return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
+ Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
}
/// Identifies the best candidate value, which represents main opcode
@@ -10937,6 +10938,8 @@ class InstructionsCompatibilityAnalysis {
switch (MainOpcode) {
case Instruction::Add:
case Instruction::LShr:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
break;
default:
@@ -22062,8 +22065,10 @@ bool BoUpSLP::collectValuesToDemote(
auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
return all_of(E.Scalars, [&](Value *V) {
- auto *I = cast<Instruction>(V);
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
+ if (E.hasCopyableElements() && E.isCopyableElement(V))
+ return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL));
+ auto *I = cast<Instruction>(V);
return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
});
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index cf62fd5cf66f7..a888027479817 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -4,21 +4,14 @@
define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_add_sdiv(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2
-; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
-; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4
-; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4
-; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4
; CHECK-NEXT: ret void
@@ -58,21 +51,14 @@ entry:
define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_add_udiv(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
-; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
-; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
-; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
-; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4
; CHECK-NEXT: ret void
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please rebase after the SHL support
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/25696 Here is the relevant piece of the build log for the reference
|
Allow SDiv/UDiv as a main operation in copyables support
Allow SDiv/UDiv as a main operation in copyables support
Allow SDiv/UDiv as a main operation in copyables support