-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[AArch64] Support USDOT in performAddDotCombine #171864
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This function does // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y) Which can equally apply to USDOT too now that we have a node for it.
|
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThis function does Which can equally apply to USDOT too now that we have a node for it. Full diff: https://github.com/llvm/llvm-project/pull/171864.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 41caa817c11a4..35d40eb4e6e3f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21813,7 +21813,8 @@ static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
// Handle commutivity
auto isZeroDot = [](SDValue Dot) {
return (Dot.getOpcode() == AArch64ISD::UDOT ||
- Dot.getOpcode() == AArch64ISD::SDOT) &&
+ Dot.getOpcode() == AArch64ISD::SDOT ||
+ Dot.getOpcode() == AArch64ISD::USDOT) &&
isZerosVector(Dot.getOperand(0).getNode());
};
if (!isZeroDot(Dot))
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
index e7e9ee7330613..c6776f3dd2513 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s | FileCheck %s
-; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm -global-isel < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: smmla.v4i32.v16i8:
@@ -160,6 +160,42 @@ entry:
ret <4 x i32> %vusdot1.i
}
+define <2 x i32> @usdot_add_zero.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+; CHECK-SD-LABEL: usdot_add_zero.v2i32.v8i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usdot v0.2s, v1.8b, v2.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: usdot_add_zero.v2i32.v8i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-NEXT: usdot v3.2s, v1.8b, v2.8b
+; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
+; CHECK-GI-NEXT: ret
+entry:
+ %x = tail call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %a, <8 x i8> %b)
+ %y = add <2 x i32> %x, %r
+ ret <2 x i32> %y
+}
+
+define <4 x i32> @usdot_add_zero.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-SD-LABEL: usdot_add_zero.v4i32.v16i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usdot v0.4s, v1.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: usdot_add_zero.v4i32.v16i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-NEXT: usdot v3.4s, v1.16b, v2.16b
+; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
+; CHECK-GI-NEXT: ret
+entry:
+ %x = tail call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %a, <16 x i8> %b)
+ %y = add <4 x i32> %x, %r
+ ret <4 x i32> %y
+}
+
declare <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
declare <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
declare <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
|
hazzlim
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: s/instruction/instructions in the commit message? Or perhaps [AArch64] Support USDOT in performAddDotCombine or similar?
Otherwise LGTM :)
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
This function does
// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
Which can equally apply to USDOT too now that we have a node for it.