-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[X86] Remove rest of AMX-TRANSPOSE #171906
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Remove rest of AMX-TRANSPOSE #171906
Conversation
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-x86 Author: Mikołaj Piróg (mikolaj-pirog) ChangesThis is a followup to #165556 I've missed some parts of amx-transpose during initial removal Full diff: https://github.com/llvm/llvm-project/pull/171906.diff 6 Files Affected:
diff --git a/clang/test/CodeGen/X86/amx_tf32.c b/clang/test/CodeGen/X86/amx_tf32.c
index 54ad6bb714933..24893243b66e6 100644
--- a/clang/test/CodeGen/X86/amx_tf32.c
+++ b/clang/test/CodeGen/X86/amx_tf32.c
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-tf32 \
-// RUN: -target-feature +amx-transpose -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
#include <immintrin.h>
#include <stddef.h>
diff --git a/clang/test/CodeGen/X86/amx_tf32_api.c b/clang/test/CodeGen/X86/amx_tf32_api.c
index 8f574b7bc71dc..531378dbd0d72 100644
--- a/clang/test/CodeGen/X86/amx_tf32_api.c
+++ b/clang/test/CodeGen/X86/amx_tf32_api.c
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-tf32 -target-feature +amx-transpose \
+// RUN: -target-feature +amx-tf32 \
// RUN: -target-feature +amx-bf16 -target-feature +avx512f \
// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s
diff --git a/clang/test/CodeGen/X86/amx_tf32_errors.c b/clang/test/CodeGen/X86/amx_tf32_errors.c
index f0fdd060363cf..a1c525547c786 100644
--- a/clang/test/CodeGen/X86/amx_tf32_errors.c
+++ b/clang/test/CodeGen/X86/amx_tf32_errors.c
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-tf32 -target-feature +amx-transpose -verify
+// RUN: -target-feature +amx-tf32 -verify
#include <immintrin.h>
#include <stddef.h>
diff --git a/clang/test/CodeGen/X86/amx_tf32_inline_asm.c b/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
index 76d164737d88b..ed67dda04e9f7 100644
--- a/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
+++ b/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tf32 -target-feature +amx-transpose -emit-llvm -o - -Wall -Werror -pedantic | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tf32 -emit-llvm -o - -Wall -Werror -pedantic | FileCheck %s
void f_tilemul(short a)
{
@@ -8,11 +8,4 @@ void f_tilemul(short a)
"tmmultf32ps %%tmm6, %%tmm0, %%tmm7 \n\t"
"tilestored %%tmm7, 0(%%r12,%%r15,4) \n\t"
::: "memory", "tmm0", "tmm6", "tmm7");
-
- //CHECK: call void asm sideeffect "tileloadd 0(%rsi,%r13,4), %tmm0 \0A\09tileloadd 0(%rdx,%r14,4), %tmm6 \0A\09ttmmultf32ps %tmm6, %tmm0, %tmm7 \0A\09tilestored %tmm7, 0(%r12,%r15,4) \0A\09", "~{memory},~{tmm0},~{tmm6},~{tmm7},~{dirflag},~{fpsr},~{flags}"()
- __asm__ volatile ("tileloadd 0(%%rsi,%%r13,4), %%tmm0 \n\t"
- "tileloadd 0(%%rdx,%%r14,4), %%tmm6 \n\t"
- "ttmmultf32ps %%tmm6, %%tmm0, %%tmm7 \n\t"
- "tilestored %%tmm7, 0(%%r12,%%r15,4) \n\t"
- ::: "memory", "tmm0", "tmm6", "tmm7");
}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index ec80ba3e1ee81..0245611bc422b 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5505,20 +5505,6 @@ let TargetPrefix = "x86" in {
[ImmArg<ArgIndex<0>>,
ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
- // AMX-MORVS, AMX-TRANSPOSE
- def int_x86_t2rpntlvwz0rs : ClangBuiltin<"__builtin_ia32_t2rpntlvwz0rs">,
- Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
- [ImmArg<ArgIndex<0>>]>;
- def int_x86_t2rpntlvwz0rst1 : ClangBuiltin<"__builtin_ia32_t2rpntlvwz0rst1">,
- Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
- [ImmArg<ArgIndex<0>>]>;
- def int_x86_t2rpntlvwz1rs : ClangBuiltin<"__builtin_ia32_t2rpntlvwz1rs">,
- Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
- [ImmArg<ArgIndex<0>>]>;
- def int_x86_t2rpntlvwz1rst1 : ClangBuiltin<"__builtin_ia32_t2rpntlvwz1rst1">,
- Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
- [ImmArg<ArgIndex<0>>]>;
-
// AMX-AVX512
def int_x86_tcvtrowd2ps : ClangBuiltin<"__builtin_ia32_tcvtrowd2ps">,
Intrinsic<[llvm_v16f32_ty], [llvm_i8_ty, llvm_i32_ty],
@@ -5627,24 +5613,6 @@ let TargetPrefix = "x86" in {
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
- // AMX-MORVS, AMX-TRANSPOSE - internal intrinsics
- def int_x86_t2rpntlvwz0rs_internal :
- Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
- [IntrArgMemOnly, IntrReadMem]>;
- def int_x86_t2rpntlvwz0rst1_internal :
- Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
- [IntrArgMemOnly]>;
- def int_x86_t2rpntlvwz1rs_internal :
- Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
- [IntrArgMemOnly]>;
- def int_x86_t2rpntlvwz1rst1_internal :
- Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
- [IntrArgMemOnly]>;
-
def int_x86_tcvtrowd2ps_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowd2ps_internal">,
Intrinsic<[llvm_v16f32_ty],
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 2fc5d38ef5055..d93bcd31c5721 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -163,10 +163,6 @@ std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo) {
case Intrinsic::x86_tileloadd64_internal:
case Intrinsic::x86_tileloaddt164_internal:
case Intrinsic::x86_tilestored64_internal:
- case Intrinsic::x86_t2rpntlvwz0rs_internal:
- case Intrinsic::x86_t2rpntlvwz0rst1_internal:
- case Intrinsic::x86_t2rpntlvwz1rs_internal:
- case Intrinsic::x86_t2rpntlvwz1rst1_internal:
case Intrinsic::x86_tileloaddrs64_internal:
case Intrinsic::x86_tileloaddrst164_internal: {
Row = II->getArgOperand(0);
|
e-kud
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Grepped once more. LGTM.
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
This is a followup to llvm#165556 I've missed some parts of amx-transpose during initial removal
This is a followup to #165556
I've missed some parts of amx-transpose during initial removal