-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[ConstantFolding] Generalize constant folding for vector_interleave2 to interleave3-8. #168473
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Craig Topper (topperc) ChangesFull diff: https://github.com/llvm/llvm-project/pull/168473.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index da32542cf7870..a13df6c5bf552 100755
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1659,6 +1659,12 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::vector_extract:
case Intrinsic::vector_insert:
case Intrinsic::vector_interleave2:
+ case Intrinsic::vector_interleave3:
+ case Intrinsic::vector_interleave4:
+ case Intrinsic::vector_interleave5:
+ case Intrinsic::vector_interleave6:
+ case Intrinsic::vector_interleave7:
+ case Intrinsic::vector_interleave8:
case Intrinsic::vector_deinterleave2:
// Target intrinsics
case Intrinsic::amdgcn_perm:
@@ -4207,16 +4213,23 @@ static Constant *ConstantFoldFixedVectorCall(
}
return ConstantVector::get(Result);
}
- case Intrinsic::vector_interleave2: {
+ case Intrinsic::vector_interleave2:
+ case Intrinsic::vector_interleave3:
+ case Intrinsic::vector_interleave4:
+ case Intrinsic::vector_interleave5:
+ case Intrinsic::vector_interleave6:
+ case Intrinsic::vector_interleave7:
+ case Intrinsic::vector_interleave8: {
unsigned NumElements =
cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
+ unsigned NumOperands = Operands.size();
for (unsigned I = 0; I < NumElements; ++I) {
- Constant *Elt0 = Operands[0]->getAggregateElement(I);
- Constant *Elt1 = Operands[1]->getAggregateElement(I);
- if (!Elt0 || !Elt1)
- return nullptr;
- Result[2 * I] = Elt0;
- Result[2 * I + 1] = Elt1;
+ for (unsigned J = 0; J < NumOperands; ++J) {
+ Constant *Elt = Operands[J]->getAggregateElement(I);
+ if (!Elt)
+ return nullptr;
+ Result[NumOperands * I + J] = Elt;
+ }
}
return ConstantVector::get(Result);
}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
index 14543f339db5d..848f0d17ff373 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
@@ -51,6 +51,54 @@ define <8 x i32> @fold_vector_interleave2() {
ret <8 x i32> %1
}
+define <12 x i32> @fold_vector_interleave3() {
+; CHECK-LABEL: define <12 x i32> @fold_vector_interleave3() {
+; CHECK-NEXT: ret <12 x i32> <i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11, i32 4, i32 8, i32 12>
+;
+ %1 = call <12 x i32> @llvm.vector.interleave3.v12i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
+ ret <12 x i32> %1
+}
+
+define <16 x i32> @fold_vector_interleave4() {
+; CHECK-LABEL: define <16 x i32> @fold_vector_interleave4() {
+; CHECK-NEXT: ret <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15, i32 4, i32 8, i32 12, i32 16>
+;
+ %1 = call <16 x i32> @llvm.vector.interleave4.v16i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>)
+ ret <16 x i32> %1
+}
+
+define <20 x i32> @fold_vector_interleave5() {
+; CHECK-LABEL: define <20 x i32> @fold_vector_interleave5() {
+; CHECK-NEXT: ret <20 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 2, i32 6, i32 10, i32 14, i32 18, i32 3, i32 7, i32 11, i32 15, i32 19, i32 4, i32 8, i32 12, i32 16, i32 20>
+;
+ %1 = call <20 x i32> @llvm.vector.interleave5.v20i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>)
+ ret <20 x i32> %1
+}
+
+define <24 x i32> @fold_vector_interleave6() {
+; CHECK-LABEL: define <24 x i32> @fold_vector_interleave6() {
+; CHECK-NEXT: ret <24 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24>
+;
+ %1 = call <24 x i32> @llvm.vector.interleave6.v24i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>)
+ ret <24 x i32> %1
+}
+
+define <28 x i32> @fold_vector_interleave7() {
+; CHECK-LABEL: define <28 x i32> @fold_vector_interleave7() {
+; CHECK-NEXT: ret <28 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+;
+ %1 = call <28 x i32> @llvm.vector.interleave7.v28i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>, <4 x i32> <i32 25, i32 26, i32 27, i32 28>)
+ ret <28 x i32> %1
+}
+
+define <32 x i32> @fold_vector_interleave8() {
+; CHECK-LABEL: define <32 x i32> @fold_vector_interleave8() {
+; CHECK-NEXT: ret <32 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32>
+;
+ %1 = call <32 x i32> @llvm.vector.interleave8.v32i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>, <4 x i32> <i32 25, i32 26, i32 27, i32 28>, <4 x i32> <i32 29, i32 30, i32 31, i32 32>)
+ ret <32 x i32> %1
+}
+
define {<4 x i32>, <4 x i32>} @fold_vector_deinterleave2() {
; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleave2() {
; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8> }
|
|
@llvm/pr-subscribers-llvm-analysis Author: Craig Topper (topperc) ChangesFull diff: https://github.com/llvm/llvm-project/pull/168473.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index da32542cf7870..a13df6c5bf552 100755
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1659,6 +1659,12 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::vector_extract:
case Intrinsic::vector_insert:
case Intrinsic::vector_interleave2:
+ case Intrinsic::vector_interleave3:
+ case Intrinsic::vector_interleave4:
+ case Intrinsic::vector_interleave5:
+ case Intrinsic::vector_interleave6:
+ case Intrinsic::vector_interleave7:
+ case Intrinsic::vector_interleave8:
case Intrinsic::vector_deinterleave2:
// Target intrinsics
case Intrinsic::amdgcn_perm:
@@ -4207,16 +4213,23 @@ static Constant *ConstantFoldFixedVectorCall(
}
return ConstantVector::get(Result);
}
- case Intrinsic::vector_interleave2: {
+ case Intrinsic::vector_interleave2:
+ case Intrinsic::vector_interleave3:
+ case Intrinsic::vector_interleave4:
+ case Intrinsic::vector_interleave5:
+ case Intrinsic::vector_interleave6:
+ case Intrinsic::vector_interleave7:
+ case Intrinsic::vector_interleave8: {
unsigned NumElements =
cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
+ unsigned NumOperands = Operands.size();
for (unsigned I = 0; I < NumElements; ++I) {
- Constant *Elt0 = Operands[0]->getAggregateElement(I);
- Constant *Elt1 = Operands[1]->getAggregateElement(I);
- if (!Elt0 || !Elt1)
- return nullptr;
- Result[2 * I] = Elt0;
- Result[2 * I + 1] = Elt1;
+ for (unsigned J = 0; J < NumOperands; ++J) {
+ Constant *Elt = Operands[J]->getAggregateElement(I);
+ if (!Elt)
+ return nullptr;
+ Result[NumOperands * I + J] = Elt;
+ }
}
return ConstantVector::get(Result);
}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
index 14543f339db5d..848f0d17ff373 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
@@ -51,6 +51,54 @@ define <8 x i32> @fold_vector_interleave2() {
ret <8 x i32> %1
}
+define <12 x i32> @fold_vector_interleave3() {
+; CHECK-LABEL: define <12 x i32> @fold_vector_interleave3() {
+; CHECK-NEXT: ret <12 x i32> <i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11, i32 4, i32 8, i32 12>
+;
+ %1 = call <12 x i32> @llvm.vector.interleave3.v12i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
+ ret <12 x i32> %1
+}
+
+define <16 x i32> @fold_vector_interleave4() {
+; CHECK-LABEL: define <16 x i32> @fold_vector_interleave4() {
+; CHECK-NEXT: ret <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15, i32 4, i32 8, i32 12, i32 16>
+;
+ %1 = call <16 x i32> @llvm.vector.interleave4.v16i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>)
+ ret <16 x i32> %1
+}
+
+define <20 x i32> @fold_vector_interleave5() {
+; CHECK-LABEL: define <20 x i32> @fold_vector_interleave5() {
+; CHECK-NEXT: ret <20 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 2, i32 6, i32 10, i32 14, i32 18, i32 3, i32 7, i32 11, i32 15, i32 19, i32 4, i32 8, i32 12, i32 16, i32 20>
+;
+ %1 = call <20 x i32> @llvm.vector.interleave5.v20i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>)
+ ret <20 x i32> %1
+}
+
+define <24 x i32> @fold_vector_interleave6() {
+; CHECK-LABEL: define <24 x i32> @fold_vector_interleave6() {
+; CHECK-NEXT: ret <24 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24>
+;
+ %1 = call <24 x i32> @llvm.vector.interleave6.v24i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>)
+ ret <24 x i32> %1
+}
+
+define <28 x i32> @fold_vector_interleave7() {
+; CHECK-LABEL: define <28 x i32> @fold_vector_interleave7() {
+; CHECK-NEXT: ret <28 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+;
+ %1 = call <28 x i32> @llvm.vector.interleave7.v28i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>, <4 x i32> <i32 25, i32 26, i32 27, i32 28>)
+ ret <28 x i32> %1
+}
+
+define <32 x i32> @fold_vector_interleave8() {
+; CHECK-LABEL: define <32 x i32> @fold_vector_interleave8() {
+; CHECK-NEXT: ret <32 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32>
+;
+ %1 = call <32 x i32> @llvm.vector.interleave8.v32i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>, <4 x i32> <i32 25, i32 26, i32 27, i32 28>, <4 x i32> <i32 29, i32 30, i32 31, i32 32>)
+ ret <32 x i32> %1
+}
+
define {<4 x i32>, <4 x i32>} @fold_vector_deinterleave2() {
; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleave2() {
; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8> }
|
b6e000a to
5626571
Compare
🐧 Linux x64 Test Results
|
preames
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
mshockwave
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| case Intrinsic::vector_interleave6: | ||
| case Intrinsic::vector_interleave7: | ||
| case Intrinsic::vector_interleave8: { | ||
| unsigned NumElements = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I forgot whether we have this discussions before already, but do we have a scalable vector version of this folding (for splat specifically)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't have one right now. I plan to add it.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/205/builds/28308 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/203/builds/29517 Here is the relevant piece of the build log for the reference |
No description provided.