Skip to content

Conversation

@mikolaj-pirog
Copy link
Member

This is a followup to #165556

I've missed some parts of amx-transpose during initial removal

@llvmbot
Copy link
Member

llvmbot commented Dec 11, 2025

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-x86

Author: Mikołaj Piróg (mikolaj-pirog)

Changes

This is a followup to #165556

I've missed some parts of amx-transpose during initial removal


Full diff: https://github.com/llvm/llvm-project/pull/171906.diff

6 Files Affected:

  • (modified) clang/test/CodeGen/X86/amx_tf32.c (+1-1)
  • (modified) clang/test/CodeGen/X86/amx_tf32_api.c (+1-1)
  • (modified) clang/test/CodeGen/X86/amx_tf32_errors.c (+1-1)
  • (modified) clang/test/CodeGen/X86/amx_tf32_inline_asm.c (+1-8)
  • (modified) llvm/include/llvm/IR/IntrinsicsX86.td (-32)
  • (modified) llvm/lib/Target/X86/X86LowerAMXType.cpp (-4)
diff --git a/clang/test/CodeGen/X86/amx_tf32.c b/clang/test/CodeGen/X86/amx_tf32.c
index 54ad6bb714933..24893243b66e6 100644
--- a/clang/test/CodeGen/X86/amx_tf32.c
+++ b/clang/test/CodeGen/X86/amx_tf32.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-tf32 \
-// RUN: -target-feature +amx-transpose -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
 
 #include <immintrin.h>
 #include <stddef.h>
diff --git a/clang/test/CodeGen/X86/amx_tf32_api.c b/clang/test/CodeGen/X86/amx_tf32_api.c
index 8f574b7bc71dc..531378dbd0d72 100644
--- a/clang/test/CodeGen/X86/amx_tf32_api.c
+++ b/clang/test/CodeGen/X86/amx_tf32_api.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-tf32 -target-feature +amx-transpose  \
+// RUN: -target-feature +amx-tf32 \
 // RUN: -target-feature +amx-bf16 -target-feature +avx512f \
 // RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s
 
diff --git a/clang/test/CodeGen/X86/amx_tf32_errors.c b/clang/test/CodeGen/X86/amx_tf32_errors.c
index f0fdd060363cf..a1c525547c786 100644
--- a/clang/test/CodeGen/X86/amx_tf32_errors.c
+++ b/clang/test/CodeGen/X86/amx_tf32_errors.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-tf32 -target-feature +amx-transpose -verify
+// RUN: -target-feature +amx-tf32 -verify
 
 #include <immintrin.h>
 #include <stddef.h>
diff --git a/clang/test/CodeGen/X86/amx_tf32_inline_asm.c b/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
index 76d164737d88b..ed67dda04e9f7 100644
--- a/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
+++ b/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tf32 -target-feature +amx-transpose -emit-llvm -o - -Wall -Werror -pedantic | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tf32 -emit-llvm -o - -Wall -Werror -pedantic | FileCheck %s
 
 void f_tilemul(short a)
 {
@@ -8,11 +8,4 @@ void f_tilemul(short a)
                     "tmmultf32ps %%tmm6, %%tmm0, %%tmm7   \n\t"
                     "tilestored %%tmm7, 0(%%r12,%%r15,4) \n\t"
           ::: "memory", "tmm0", "tmm6", "tmm7");
-
-  //CHECK:  call void asm sideeffect "tileloadd 0(%rsi,%r13,4), %tmm0   \0A\09tileloadd 0(%rdx,%r14,4), %tmm6   \0A\09ttmmultf32ps %tmm6, %tmm0, %tmm7    \0A\09tilestored %tmm7, 0(%r12,%r15,4) \0A\09", "~{memory},~{tmm0},~{tmm6},~{tmm7},~{dirflag},~{fpsr},~{flags}"()
-  __asm__ volatile ("tileloadd 0(%%rsi,%%r13,4), %%tmm0   \n\t"
-                    "tileloadd 0(%%rdx,%%r14,4), %%tmm6   \n\t"
-                    "ttmmultf32ps %%tmm6, %%tmm0, %%tmm7  \n\t"
-                    "tilestored %%tmm7, 0(%%r12,%%r15,4) \n\t"
-          ::: "memory", "tmm0", "tmm6", "tmm7");
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index ec80ba3e1ee81..0245611bc422b 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5505,20 +5505,6 @@ let TargetPrefix = "x86" in {
                         [ImmArg<ArgIndex<0>>,
                         ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
 
-  // AMX-MORVS, AMX-TRANSPOSE
-  def int_x86_t2rpntlvwz0rs : ClangBuiltin<"__builtin_ia32_t2rpntlvwz0rs">,
-              Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [ImmArg<ArgIndex<0>>]>;
-  def int_x86_t2rpntlvwz0rst1 : ClangBuiltin<"__builtin_ia32_t2rpntlvwz0rst1">,
-              Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [ImmArg<ArgIndex<0>>]>;
-  def int_x86_t2rpntlvwz1rs : ClangBuiltin<"__builtin_ia32_t2rpntlvwz1rs">,
-              Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [ImmArg<ArgIndex<0>>]>;
-  def int_x86_t2rpntlvwz1rst1 : ClangBuiltin<"__builtin_ia32_t2rpntlvwz1rst1">,
-              Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [ImmArg<ArgIndex<0>>]>;
-
   // AMX-AVX512
   def int_x86_tcvtrowd2ps : ClangBuiltin<"__builtin_ia32_tcvtrowd2ps">,
               Intrinsic<[llvm_v16f32_ty], [llvm_i8_ty, llvm_i32_ty],
@@ -5627,24 +5613,6 @@ let TargetPrefix = "x86" in {
                          llvm_x86amx_ty, llvm_x86amx_ty,
                          llvm_x86amx_ty], []>;
 
-  // AMX-MORVS, AMX-TRANSPOSE - internal intrinsics
-  def int_x86_t2rpntlvwz0rs_internal :
-              Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [IntrArgMemOnly, IntrReadMem]>;
-  def int_x86_t2rpntlvwz0rst1_internal :
-              Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [IntrArgMemOnly]>;
-  def int_x86_t2rpntlvwz1rs_internal :
-              Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [IntrArgMemOnly]>;
-  def int_x86_t2rpntlvwz1rst1_internal :
-              Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [IntrArgMemOnly]>;
-
   def int_x86_tcvtrowd2ps_internal :
               ClangBuiltin<"__builtin_ia32_tcvtrowd2ps_internal">,
               Intrinsic<[llvm_v16f32_ty],
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 2fc5d38ef5055..d93bcd31c5721 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -163,10 +163,6 @@ std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo) {
   case Intrinsic::x86_tileloadd64_internal:
   case Intrinsic::x86_tileloaddt164_internal:
   case Intrinsic::x86_tilestored64_internal:
-  case Intrinsic::x86_t2rpntlvwz0rs_internal:
-  case Intrinsic::x86_t2rpntlvwz0rst1_internal:
-  case Intrinsic::x86_t2rpntlvwz1rs_internal:
-  case Intrinsic::x86_t2rpntlvwz1rst1_internal:
   case Intrinsic::x86_tileloaddrs64_internal:
   case Intrinsic::x86_tileloaddrst164_internal: {
     Row = II->getArgOperand(0);

Copy link
Contributor

@e-kud e-kud left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Grepped once more. LGTM.

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

@mikolaj-pirog mikolaj-pirog merged commit 81a75b1 into llvm:main Dec 12, 2025
13 checks passed
anonymouspc pushed a commit to anonymouspc/llvm that referenced this pull request Dec 15, 2025
This is a followup to llvm#165556

I've missed some parts of amx-transpose during initial removal
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants