-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Fold (sub (xor X, (sext C)), (sext C)) => (select C (neg X), X) #79417
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-x86 Author: Kai Luo (bzEq) ChangesThis is useful when computing absdiff. Correctness prove: https://alive2.llvm.org/ce/z/eMbxps. Full diff: https://github.com/llvm/llvm-project/pull/79417.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 8a00b75a1f74042..1a13fa4e2099946 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2448,6 +2448,16 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
}
}
+ {
+ // (sub (xor X, (sext C)), (sext C)) => (select C (neg X), X)
+ Value *C0, *C1, *X;
+ if (match(Op0, m_Xor(m_Value(X), m_SExt(m_Value(C0)))) &&
+ (C0->getType()->getScalarSizeInBits() == 1) &&
+ match(Op1, m_SExt(m_Value(C1))) && (C0 == C1)) {
+ return SelectInst::Create(C0, Builder.CreateNeg(X), X);
+ }
+ }
+
if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))
return R;
diff --git a/llvm/test/CodeGen/AArch64/absdiff.ll b/llvm/test/CodeGen/AArch64/absdiff.ll
new file mode 100644
index 000000000000000..88c6fdad0a2030d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/absdiff.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mtriple=aarch64-linux-gnu -passes=instcombine < %s -o - | llc -mtriple=aarch64-linux-gnu -o - | FileCheck %s
+
+define i64 @absdiff(i64 %0, i64 %1) {
+; CHECK-LABEL: absdiff:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, lo
+; CHECK-NEXT: ret
+ %3 = icmp ult i64 %0, %1
+ %4 = sext i1 %3 to i64
+ %5 = sub i64 %0, %1
+ %6 = xor i64 %5, %4
+ %7 = sub i64 %6, %4
+ ret i64 %7
+}
diff --git a/llvm/test/CodeGen/PowerPC/absdiff.ll b/llvm/test/CodeGen/PowerPC/absdiff.ll
new file mode 100644
index 000000000000000..c806da988f0a0bf
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/absdiff.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mtriple=powerpc64-linux-gnu -passes=instcombine < %s -o - | llc -mtriple=powerpc64-linux-gnu -o - | FileCheck %s
+
+define i64 @absdiff(i64 %0, i64 %1) {
+; CHECK-LABEL: absdiff:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: neg 6, 5
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: bc 12, 0, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: ori 3, 5, 0
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: addi 3, 6, 0
+; CHECK-NEXT: blr
+ %3 = icmp ult i64 %0, %1
+ %4 = sext i1 %3 to i64
+ %5 = sub i64 %0, %1
+ %6 = xor i64 %5, %4
+ %7 = sub i64 %6, %4
+ ret i64 %7
+}
diff --git a/llvm/test/CodeGen/X86/absdiff.ll b/llvm/test/CodeGen/X86/absdiff.ll
new file mode 100644
index 000000000000000..ce9a00935448131
--- /dev/null
+++ b/llvm/test/CodeGen/X86/absdiff.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mtriple=x86_64-linux-gnu -passes=instcombine < %s -o - | llc -mtriple=x86_64-linux-gnu -o - | FileCheck %s
+
+define i64 @absdiff(i64 %0, i64 %1) {
+; CHECK-LABEL: absdiff:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: subq %rsi, %rax
+; CHECK-NEXT: negq %rax
+; CHECK-NEXT: subq %rsi, %rdi
+; CHECK-NEXT: cmovaeq %rdi, %rax
+; CHECK-NEXT: retq
+ %3 = icmp ult i64 %0, %1
+ %4 = sext i1 %3 to i64
+ %5 = sub i64 %0, %1
+ %6 = xor i64 %5, %4
+ %7 = sub i64 %6, %4
+ ret i64 %7
+}
diff --git a/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll b/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll
new file mode 100644
index 000000000000000..393f2bd45bb8d77
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i64 @absdiff(i64 %0, i64 %1) {
+; CHECK-LABEL: define i64 @absdiff(
+; CHECK-SAME: i64 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i64 [[TMP5]], i64 [[TMP4]]
+; CHECK-NEXT: ret i64 [[TMP6]]
+;
+ %3 = icmp ult i64 %0, %1
+ %4 = sext i1 %3 to i64
+ %5 = sub i64 %0, %1
+ %6 = xor i64 %5, %4
+ %7 = sub i64 %6, %4
+ ret i64 %7
+}
|
@llvm/pr-subscribers-llvm-transforms Author: Kai Luo (bzEq) ChangesThis is useful when computing absdiff. Correctness prove: https://alive2.llvm.org/ce/z/eMbxps. Full diff: https://github.com/llvm/llvm-project/pull/79417.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 8a00b75a1f74042..1a13fa4e2099946 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2448,6 +2448,16 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
}
}
+ {
+ // (sub (xor X, (sext C)), (sext C)) => (select C (neg X), X)
+ Value *C0, *C1, *X;
+ if (match(Op0, m_Xor(m_Value(X), m_SExt(m_Value(C0)))) &&
+ (C0->getType()->getScalarSizeInBits() == 1) &&
+ match(Op1, m_SExt(m_Value(C1))) && (C0 == C1)) {
+ return SelectInst::Create(C0, Builder.CreateNeg(X), X);
+ }
+ }
+
if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))
return R;
diff --git a/llvm/test/CodeGen/AArch64/absdiff.ll b/llvm/test/CodeGen/AArch64/absdiff.ll
new file mode 100644
index 000000000000000..88c6fdad0a2030d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/absdiff.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mtriple=aarch64-linux-gnu -passes=instcombine < %s -o - | llc -mtriple=aarch64-linux-gnu -o - | FileCheck %s
+
+define i64 @absdiff(i64 %0, i64 %1) {
+; CHECK-LABEL: absdiff:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, lo
+; CHECK-NEXT: ret
+ %3 = icmp ult i64 %0, %1
+ %4 = sext i1 %3 to i64
+ %5 = sub i64 %0, %1
+ %6 = xor i64 %5, %4
+ %7 = sub i64 %6, %4
+ ret i64 %7
+}
diff --git a/llvm/test/CodeGen/PowerPC/absdiff.ll b/llvm/test/CodeGen/PowerPC/absdiff.ll
new file mode 100644
index 000000000000000..c806da988f0a0bf
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/absdiff.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mtriple=powerpc64-linux-gnu -passes=instcombine < %s -o - | llc -mtriple=powerpc64-linux-gnu -o - | FileCheck %s
+
+define i64 @absdiff(i64 %0, i64 %1) {
+; CHECK-LABEL: absdiff:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: neg 6, 5
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: bc 12, 0, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: ori 3, 5, 0
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: addi 3, 6, 0
+; CHECK-NEXT: blr
+ %3 = icmp ult i64 %0, %1
+ %4 = sext i1 %3 to i64
+ %5 = sub i64 %0, %1
+ %6 = xor i64 %5, %4
+ %7 = sub i64 %6, %4
+ ret i64 %7
+}
diff --git a/llvm/test/CodeGen/X86/absdiff.ll b/llvm/test/CodeGen/X86/absdiff.ll
new file mode 100644
index 000000000000000..ce9a00935448131
--- /dev/null
+++ b/llvm/test/CodeGen/X86/absdiff.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mtriple=x86_64-linux-gnu -passes=instcombine < %s -o - | llc -mtriple=x86_64-linux-gnu -o - | FileCheck %s
+
+define i64 @absdiff(i64 %0, i64 %1) {
+; CHECK-LABEL: absdiff:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: subq %rsi, %rax
+; CHECK-NEXT: negq %rax
+; CHECK-NEXT: subq %rsi, %rdi
+; CHECK-NEXT: cmovaeq %rdi, %rax
+; CHECK-NEXT: retq
+ %3 = icmp ult i64 %0, %1
+ %4 = sext i1 %3 to i64
+ %5 = sub i64 %0, %1
+ %6 = xor i64 %5, %4
+ %7 = sub i64 %6, %4
+ ret i64 %7
+}
diff --git a/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll b/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll
new file mode 100644
index 000000000000000..393f2bd45bb8d77
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i64 @absdiff(i64 %0, i64 %1) {
+; CHECK-LABEL: define i64 @absdiff(
+; CHECK-SAME: i64 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i64 [[TMP5]], i64 [[TMP4]]
+; CHECK-NEXT: ret i64 [[TMP6]]
+;
+ %3 = icmp ult i64 %0, %1
+ %4 = sext i1 %3 to i64
+ %5 = sub i64 %0, %1
+ %6 = xor i64 %5, %4
+ %7 = sub i64 %6, %4
+ ret i64 %7
+}
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: opt < %s -passes=instcombine -S | FileCheck %s | ||
|
||
define i64 @absdiff(i64 %0, i64 %1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the motivation of this patch? Does this pattern exist in some real-world applications?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this pattern exist in some real-world applications?
Yes. We have observed this pattern in our internal workload.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
// (sub (sext C), (xor X, (sext C))) => (select C, X, (neg X)) | ||
Value *C, *X; | ||
auto m_SubXorCmp = [&C, &X](Value *LHS, Value *RHS) { | ||
return match(LHS, m_c_Xor(m_Value(X), m_SExt(m_OneUse(m_Value(C))))) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the purpose of the m_OneUse?
Could the patterns be better matched as?
return match(LHS, m_c_Xor(m_Value(X), m_Specific(RHS))) &&
match(RHS, m_SExt(m_Specific(C)) &&
(C->getType()->getScalarSizeInBits() == 1);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the purpose of the m_OneUse?
Looks no need to check m_OneUse
of the i1
value, since we are still using it in the result.
What concerns me is usage of (sext i1)
, if it's still using by other instructions other than this (sub (sext C), (xor X, (sext C)))
, not sure if the transformation is still profitable. I'll add test cases to demonstrate as suggested by @dtcxzyw .
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Please wait for additional approval from other reviewers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
This is useful when computing absdiff.
Correctness prove: https://alive2.llvm.org/ce/z/eMbxps, https://alive2.llvm.org/ce/z/SNCWJe.