[SDAG] Add freeze when simplifying select with undef arms#175199
[SDAG] Add freeze when simplifying select with undef arms#175199
Conversation
|
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-llvm-selectiondag Author: Yingwei Zheng (dtcxzyw) ChangesConsider the following pattern: We cannot simplify This patch checks whether the replacement may be poison. If so, it will insert a freeze. Closes #175018. Full diff: https://github.com/llvm/llvm-project/pull/175199.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 35e443b40c41f..8eeb456a95dd1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -10820,9 +10820,9 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
if (Cond.isUndef())
return isConstantValueOfAnyType(T) ? T : F;
if (T.isUndef())
- return F;
+ return isGuaranteedNotToBePoison(F) ? F : getFreeze(F);
if (F.isUndef())
- return T;
+ return isGuaranteedNotToBePoison(T) ? T : getFreeze(T);
// select true, T, F --> T
// select false, T, F --> F
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index f998128af95f8..9da2640ea8392 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -571,16 +571,16 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
; X86-SLOW-LABEL: const_shift_i64:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: pushl %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movl %ecx, %esi
; X86-SLOW-NEXT: shrl $25, %esi
-; X86-SLOW-NEXT: movl %ecx, %eax
-; X86-SLOW-NEXT: shll $7, %eax
-; X86-SLOW-NEXT: orl %esi, %eax
-; X86-SLOW-NEXT: shrl $25, %ecx
; X86-SLOW-NEXT: shll $7, %edx
-; X86-SLOW-NEXT: orl %ecx, %edx
+; X86-SLOW-NEXT: orl %esi, %edx
+; X86-SLOW-NEXT: shll $7, %ecx
+; X86-SLOW-NEXT: shrl $25, %eax
+; X86-SLOW-NEXT: orl %ecx, %eax
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 252cb3333f1d1..78d7e7eb3c136 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -262,9 +262,9 @@ define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind {
define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind {
; X86-SSE2-LABEL: fshl_i64_const_overshift:
; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: shldl $9, %ecx, %edx
; X86-SSE2-NEXT: shrdl $23, %ecx, %eax
; X86-SSE2-NEXT: retl
@@ -1004,9 +1004,9 @@ define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind {
define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind {
; X86-SSE2-LABEL: fshr_i64_const_overshift:
; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shrdl $9, %ecx, %eax
; X86-SSE2-NEXT: shldl $23, %ecx, %edx
; X86-SSE2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4e31b48ec5cec..fe90028aa6aff 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -2198,3 +2198,71 @@ define i32 @select_uaddo_common_op1(i32 %a, i32 %b, i32 %c, i1 %cond) {
%sel = select i1 %cond, i32 %ab0, i32 %cb0
ret i32 %sel
}
+
+define i56 @select_undef_rhs(i64 %x, i1 %cmp) {
+; GENERIC-LABEL: select_undef_rhs:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: movabsq $281474976710655, %rax ## imm = 0xFFFFFFFFFFFF
+; GENERIC-NEXT: andq %rdi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: select_undef_rhs:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: movabsq $281474976710655, %rax ## imm = 0xFFFFFFFFFFFF
+; ATOM-NEXT: andq %rdi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; ATHLON-LABEL: select_undef_rhs:
+; ATHLON: ## %bb.0:
+; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
+; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; ATHLON-NEXT: movzwl %cx, %edx
+; ATHLON-NEXT: retl
+;
+; MCU-LABEL: select_undef_rhs:
+; MCU: # %bb.0:
+; MCU-NEXT: movzwl %dx, %edx
+; MCU-NEXT: retl
+ %trunc = trunc nuw i64 %x to i48
+ %sel = select i1 %cmp, i48 %trunc, i48 undef
+ %zext = zext i48 %sel to i56
+ ret i56 %zext
+}
+
+define i56 @select_undef_lhs(i64 %x, i1 %cmp) {
+; GENERIC-LABEL: select_undef_lhs:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: movabsq $281474976710655, %rax ## imm = 0xFFFFFFFFFFFF
+; GENERIC-NEXT: andq %rdi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: select_undef_lhs:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: movabsq $281474976710655, %rax ## imm = 0xFFFFFFFFFFFF
+; ATOM-NEXT: andq %rdi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; ATHLON-LABEL: select_undef_lhs:
+; ATHLON: ## %bb.0:
+; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
+; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; ATHLON-NEXT: movzwl %cx, %edx
+; ATHLON-NEXT: retl
+;
+; MCU-LABEL: select_undef_lhs:
+; MCU: # %bb.0:
+; MCU-NEXT: movzwl %dx, %edx
+; MCU-NEXT: retl
+ %trunc = trunc nuw i64 %x to i48
+ %sel = select i1 %cmp, i48 undef, i48 %trunc
+ %zext = zext i48 %sel to i56
+ ret i56 %zext
+}
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef([^a-zA-Z0-9_-]|$)|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/test/CodeGen/PowerPC/widen-vec-correctly-be.ll llvm/test/CodeGen/X86/fshl.ll llvm/test/CodeGen/X86/funnel-shift.ll llvm/test/CodeGen/X86/select.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
| ; | ||
| ; MCU-LABEL: select_undef_rhs: | ||
| ; MCU: # %bb.0: | ||
| ; MCU-NEXT: movzwl %dx, %edx |
There was a problem hiding this comment.
I don't understand why this only produces movzwl.
There was a problem hiding this comment.
https://godbolt.org/z/j7hrTr645 An i64 parameter is stored in two registers.
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
| // select, ?, undef, F --> F | ||
| // select, ?, T, undef --> T | ||
| // select, ?, undef, F --> freeze(F) | ||
| // select, ?, T, undef --> freeze(T) |
There was a problem hiding this comment.
Should we implement select, ?, poison, F --> F?
There was a problem hiding this comment.
Yes. I'd leave it as a follow-up.
This reverts commit 6c43e29.
Consider the following pattern: ``` %trunc = trunc nuw i64 %x to i48 %sel = select i1 %cmp, i48 %trunc, i48 undef ``` We cannot simplify `%sel` to `%trunc` as `%trunc` may be poison, which cannot be refined into undef. This patch checks whether the replacement may be poison. If so, it will insert a freeze. We may need SDAG's version of `impliesPoison` if it causes significant regressions. Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=ded109c0cff41714ebf9bd60b073aaab07fa4ca8&to=103e605ce6b33bc9145526faf805ee38b972c215&stat=instructions%3Au Closes llvm#175018.
Consider the following pattern:
We cannot simplify
%selto%truncas%truncmay be poison, which cannot be refined into undef.This patch checks whether the replacement may be poison. If so, it will insert a freeze.
We may need SDAG's version of
impliesPoisonif it causes significant regressions.Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=ded109c0cff41714ebf9bd60b073aaab07fa4ca8&to=103e605ce6b33bc9145526faf805ee38b972c215&stat=instructions%3Au
Closes #175018.