New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Look through freeze to find insert instruction #86948
base: main
Are you sure you want to change the base?
[InstCombine] Look through freeze to find insert instruction #86948
Conversation
Change: llvm@9d4557920f1 introduced additional freeze instructions which are preventing extractelement optimization.
@llvm/pr-subscribers-llvm-transforms Author: Mariusz Sikora (mariusz-sikora-at-amd) ChangesChange: 9d4557920f1 introduced additional freeze instructions which are preventing extractelement optimization. Full diff: https://github.com/llvm/llvm-project/pull/86948.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index bf7bc0ba84a033..03a91f9de65624 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -173,6 +173,12 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
if (Constant *C = dyn_cast<Constant>(V))
return C->getAggregateElement(EltNo);
+ if (auto *F = dyn_cast<FreezeInst>(V)) {
+ if (isGuaranteedNotToBeUndefOrPoison(F)) {
+ return findScalarElement(F->getOperand(0), EltNo);
+ }
+ }
+
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
if (!isa<ConstantInt>(III->getOperand(2)))
diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll
index bc5dd060a540ae..738e7459704128 100644
--- a/llvm/test/Transforms/InstCombine/extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/extractelement.ll
@@ -926,3 +926,17 @@ define float @crash_4b8320(<2 x float> %i1, float %i12) {
%i29 = extractelement <4 x float> %i26, i64 0
ret float %i29
}
+
+define i32 @freeze_between_extractelement_and_shufflevector(<2 x float> %inp) {
+; ANY-LABEL: @freeze_between_extractelement_and_shufflevector(
+; ANY-NEXT: entry:
+; ANY-NEXT: ret i32 0
+;
+entry:
+ %sh1 = shufflevector <2 x float> %inp, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+ %sh2 = shufflevector <4 x float> %sh1, <4 x float> <float poison, float poison, float 0.000000e+00, float undef>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+ %fr = freeze <4 x float> %sh2
+ %bc13 = bitcast <4 x float> %fr to <4 x i32>
+ %ext = extractelement <4 x i32> %bc13, i64 2
+ ret i32 %ext
+}
|
@llvm/pr-subscribers-llvm-analysis Author: Mariusz Sikora (mariusz-sikora-at-amd) ChangesChange: 9d4557920f1 introduced additional freeze instructions which are preventing extractelement optimization. Full diff: https://github.com/llvm/llvm-project/pull/86948.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index bf7bc0ba84a033..03a91f9de65624 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -173,6 +173,12 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
if (Constant *C = dyn_cast<Constant>(V))
return C->getAggregateElement(EltNo);
+ if (auto *F = dyn_cast<FreezeInst>(V)) {
+ if (isGuaranteedNotToBeUndefOrPoison(F)) {
+ return findScalarElement(F->getOperand(0), EltNo);
+ }
+ }
+
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
if (!isa<ConstantInt>(III->getOperand(2)))
diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll
index bc5dd060a540ae..738e7459704128 100644
--- a/llvm/test/Transforms/InstCombine/extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/extractelement.ll
@@ -926,3 +926,17 @@ define float @crash_4b8320(<2 x float> %i1, float %i12) {
%i29 = extractelement <4 x float> %i26, i64 0
ret float %i29
}
+
+define i32 @freeze_between_extractelement_and_shufflevector(<2 x float> %inp) {
+; ANY-LABEL: @freeze_between_extractelement_and_shufflevector(
+; ANY-NEXT: entry:
+; ANY-NEXT: ret i32 0
+;
+entry:
+ %sh1 = shufflevector <2 x float> %inp, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+ %sh2 = shufflevector <4 x float> %sh1, <4 x float> <float poison, float poison, float 0.000000e+00, float undef>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+ %fr = freeze <4 x float> %sh2
+ %bc13 = bitcast <4 x float> %fr to <4 x i32>
+ %ext = extractelement <4 x i32> %bc13, i64 2
+ ret i32 %ext
+}
|
@@ -173,6 +173,12 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { | |||
if (Constant *C = dyn_cast<Constant>(V)) | |||
return C->getAggregateElement(EltNo); | |||
|
|||
if (auto *F = dyn_cast<FreezeInst>(V)) { | |||
if (isGuaranteedNotToBeUndefOrPoison(F)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Won't this always return true? You're passing in the freeze itself. This should be checking F->getOperand(0)?
@@ -173,6 +173,12 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { | |||
if (Constant *C = dyn_cast<Constant>(V)) | |||
return C->getAggregateElement(EltNo); | |||
|
|||
if (auto *F = dyn_cast<FreezeInst>(V)) { | |||
if (isGuaranteedNotToBeUndefOrPoison(F)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isGuaranteedNotToBeUndefOrPoison
always evaluates to true :(
llvm-project/llvm/lib/Analysis/ValueTracking.cpp
Lines 7038 to 7041 in a3efc53
// If the value is a freeze instruction, then it can never | |
// be undef or poison. | |
if (isa<FreezeInst>(V)) | |
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh no :/
Change: 9d4557920f1 introduced additional freeze instructions which are preventing extractelement optimization.