[X86] Remove AMX-TRANSPOSE #165556

mikolaj-pirog · 2025-10-29T13:31:37Z

Per Intel Architecture Instruction Set Extensions Programming Reference rev. 59 (https://cdrdv2.intel.com/v1/dl/getContent/671368), Revision History entry for revision -59, AMX-TRANSPOSE was removed

llvmbot · 2025-10-29T13:32:10Z

@llvm/pr-subscribers-mlgo
@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-clang-driver

@llvm/pr-subscribers-clang

Author: Mikołaj Piróg (mikolaj-pirog)

Changes

Per Intel Architecture Instruction Set Extensions Programming Reference rev. 59 (https://cdrdv2.intel.com/v1/dl/getContent/671368), Revision History entry for revision -59, AMX-TRANSPOSE was removed

Patch is 526.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165556.diff

81 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsX86_64.td (-89)
(modified) clang/include/clang/Driver/Options.td (-2)
(modified) clang/lib/Basic/Targets/X86.cpp (-6)
(modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (-68)
(modified) clang/lib/Headers/CMakeLists.txt (-6)
(removed) clang/lib/Headers/amxbf16transposeintrin.h (-94)
(removed) clang/lib/Headers/amxcomplextransposeintrin.h (-303)
(removed) clang/lib/Headers/amxfp16transposeintrin.h (-94)
(modified) clang/lib/Headers/amxintrin.h (-2)
(removed) clang/lib/Headers/amxmovrstransposeintrin.h (-200)
(removed) clang/lib/Headers/amxtf32transposeintrin.h (-105)
(removed) clang/lib/Headers/amxtransposeintrin.h (-248)
(modified) clang/lib/Headers/immintrin.h (-12)
(modified) clang/lib/Sema/SemaX86.cpp (-17)
(removed) clang/test/CodeGen/X86/amx_movrs_tranpose.c (-53)
(removed) clang/test/CodeGen/X86/amx_movrs_tranpose_api.c (-81)
(removed) clang/test/CodeGen/X86/amx_movrs_transpose_errors.c (-22)
(modified) clang/test/CodeGen/X86/amx_tf32.c (-5)
(modified) clang/test/CodeGen/X86/amx_tf32_api.c (-7)
(modified) clang/test/CodeGen/X86/amx_tf32_errors.c (-8)
(removed) clang/test/CodeGen/X86/amx_transpose.c (-75)
(removed) clang/test/CodeGen/X86/amx_transpose_api.c (-114)
(removed) clang/test/CodeGen/X86/amx_transpose_errors.c (-75)
(modified) clang/test/Driver/x86-target-features.c (-7)
(modified) clang/test/Preprocessor/predefined-arch-macros.c (-2)
(modified) clang/test/Preprocessor/x86_target_features.c (-12)
(modified) llvm/include/llvm/CodeGen/TileShapeInfo.h (+7-81)
(modified) llvm/include/llvm/IR/IntrinsicsX86.td (-104)
(modified) llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h (-1)
(modified) llvm/include/llvm/TargetParser/X86TargetParser.def (-1)
(modified) llvm/lib/Target/X86/AsmParser/X86Operand.h (-31)
(modified) llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp (-5)
(modified) llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h (-7)
(modified) llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp (-19)
(modified) llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h (-1)
(modified) llvm/lib/Target/X86/X86.td (+1-5)
(modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (-167)
(modified) llvm/lib/Target/X86/X86FastPreTileConfig.cpp (+8-24)
(modified) llvm/lib/Target/X86/X86FastTileConfig.cpp (-6)
(modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+2-76)
(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+1-154)
(modified) llvm/lib/Target/X86/X86InstrAMX.td (-208)
(modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+3-12)
(modified) llvm/lib/Target/X86/X86InstrOperands.td (-7)
(modified) llvm/lib/Target/X86/X86InstrPredicates.td (-1)
(modified) llvm/lib/Target/X86/X86LowerAMXType.cpp (+26-177)
(modified) llvm/lib/Target/X86/X86PreTileConfig.cpp (-9)
(modified) llvm/lib/Target/X86/X86RegisterInfo.cpp (+4-66)
(modified) llvm/lib/Target/X86/X86RegisterInfo.td (+2-11)
(modified) llvm/lib/Target/X86/X86TileConfig.cpp (+12-71)
(modified) llvm/lib/TargetParser/Host.cpp (-1)
(modified) llvm/lib/TargetParser/X86TargetParser.cpp (+1-1)
(modified) llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt (-22)
(modified) llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt (-22)
(modified) llvm/test/CodeGen/X86/amx-tf32-internal.ll (+2-5)
(modified) llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll (+1-11)
(removed) llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll (-122)
(removed) llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll (-136)
(removed) llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir (-165)
(removed) llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir (-153)
(removed) llvm/test/CodeGen/X86/amx_tile_pair_copy.mir (-97)
(removed) llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll (-87)
(removed) llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll (-61)
(removed) llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir (-134)
(removed) llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir (-113)
(removed) llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll (-371)
(modified) llvm/test/CodeGen/X86/ipra-reg-usage.ll (+2-2)
(modified) llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt (-128)
(modified) llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt (-8)
(removed) llvm/test/MC/Disassembler/X86/amx-transpose-att.txt (-154)
(modified) llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s (-128)
(modified) llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s (-128)
(modified) llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s (-7)
(modified) llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s (-7)
(removed) llvm/test/MC/X86/amx-transpose-att.s (-153)
(removed) llvm/test/MC/X86/amx-transpose-intel.s (-153)
(modified) llvm/test/TableGen/x86-instr-mapping.inc (-8)
(modified) llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt (+26-26)
(modified) llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt (+5711-5733)
(modified) llvm/unittests/CodeGen/InstrRefLDVTest.cpp (+1-1)
(modified) llvm/utils/TableGen/X86RecognizableInstr.cpp (-1)

diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td
index 275278c5ac089..062060e6afbbe 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.td
+++ b/clang/include/clang/Basic/BuiltinsX86_64.td
@@ -239,57 +239,6 @@ let Features = "amx-complex", Attributes = [NoThrow] in {
   def tcmmrlfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
 }
 
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0rs_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0rst1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz1rs_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz1t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz1rst1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def ttransposed_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">;
-}
-
-let Features = "amx-bf16,amx-transpose", Attributes = [NoThrow] in {
-  def ttdpbf16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-}
-
-let Features = "amx-fp16,amx-transpose", Attributes = [NoThrow] in {
-  def ttdpfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-}
-
-let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
-  def ttcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-  def ttcmmrlfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-  def tconjtcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-  def tconjtfp16_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">;
-}
-
 let Features = "amx-avx512,avx10.2", Attributes = [NoThrow] in {
   def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
   def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
@@ -303,10 +252,6 @@ let Features = "amx-tf32", Attributes = [NoThrow] in {
   def tmmultf32ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
 }
 
-let Features = "amx-tf32,amx-transpose", Attributes = [NoThrow] in {
-  def ttmmultf32ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-}
-
 let Features = "amx-fp8", Attributes = [NoThrow] in {
   def tdpbf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
   def tdpbhf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
@@ -321,13 +266,6 @@ let Features = "amx-tile", Attributes = [NoThrow] in {
   def tilezero : X86Builtin<"void(unsigned char)">;
 }
 
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0rs : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz0rst1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz1rs : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz1rst1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-}
-
 let Features = "amx-movrs", Attributes = [NoThrow] in {
   def tileloaddrs64 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
   def tileloaddrst164 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
@@ -359,29 +297,6 @@ let Features = "amx-complex", Attributes = [NoThrow] in {
   def tcmmrlfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
 }
 
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz0t1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz1t1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def ttransposed : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">;
-}
-
-let Features = "amx-bf16,amx-transpose", Attributes = [NoThrow] in {
-  def ttdpbf16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-}
-
-let Features = "amx-fp16,amx-transpose", Attributes = [NoThrow] in {
-  def ttdpfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-}
-
-let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
-  def ttcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-  def ttcmmrlfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-  def tconjtcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-  def tconjtfp16 : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">;
-}
-
 let Features = "amx-avx512,avx10.2", Attributes = [NoThrow] in {
   def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">;
   def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
@@ -406,10 +321,6 @@ let Features = "amx-tf32", Attributes = [NoThrow] in {
   def tmmultf32ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
 }
 
-let Features = "amx-tf32,amx-transpose", Attributes = [NoThrow] in {
-  def ttmmultf32ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-}
-
 let Features = "prefetchi", Attributes = [NoThrow, Const] in {
   def prefetchi : X86Builtin<"void(void const *, unsigned int)">;
 }
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 8784c9d7d206d..1d11db1209e47 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6695,8 +6695,6 @@ def mamx_tf32 : Flag<["-"], "mamx-tf32">, Group<m_x86_Features_Group>;
 def mno_amx_tf32 : Flag<["-"], "mno-amx-tf32">, Group<m_x86_Features_Group>;
 def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>;
 def mno_amx_tile : Flag<["-"], "mno-amx-tile">, Group<m_x86_Features_Group>;
-def mamx_transpose : Flag<["-"], "mamx-transpose">, Group<m_x86_Features_Group>;
-def mno_amx_transpose : Flag<["-"], "mno-amx-transpose">, Group<m_x86_Features_Group>;
 def mamx_movrs: Flag<["-"], "mamx-movrs">, Group<m_x86_Features_Group>;
 def mno_amx_movrs: Flag<["-"], "mno-amx-movrs">, Group<m_x86_Features_Group>;
 def mcmpccxadd : Flag<["-"], "mcmpccxadd">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index e71f10c4c16fc..7a90c89dd7dc0 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -396,8 +396,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAMXFP8 = true;
     } else if (Feature == "+amx-movrs") {
       HasAMXMOVRS = true;
-    } else if (Feature == "+amx-transpose") {
-      HasAMXTRANSPOSE = true;
     } else if (Feature == "+amx-avx512") {
       HasAMXAVX512 = true;
     } else if (Feature == "+amx-tf32") {
@@ -925,8 +923,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AMX_FP8__");
   if (HasAMXMOVRS)
     Builder.defineMacro("__AMX_MOVRS__");
-  if (HasAMXTRANSPOSE)
-    Builder.defineMacro("__AMX_TRANSPOSE__");
   if (HasAMXAVX512)
     Builder.defineMacro("__AMX_AVX512__");
   if (HasAMXTF32)
@@ -1068,7 +1064,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("amx-movrs", true)
       .Case("amx-tf32", true)
       .Case("amx-tile", true)
-      .Case("amx-transpose", true)
       .Case("avx", true)
       .Case("avx10.1", true)
       .Case("avx10.2", true)
@@ -1189,7 +1184,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("amx-movrs", HasAMXMOVRS)
       .Case("amx-tf32", HasAMXTF32)
       .Case("amx-tile", HasAMXTILE)
-      .Case("amx-transpose", HasAMXTRANSPOSE)
       .Case("avx", SSELevel >= AVX)
       .Case("avx10.1", HasAVX10_1)
       .Case("avx10.2", HasAVX10_2)
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index b924407b6ddd7..2381b2e7cf2cf 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -2931,74 +2931,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // instruction, but it will create a memset that won't be optimized away.
     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
   }
-  // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
-  case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
-    Intrinsic::ID IID;
-    switch (BuiltinID) {
-    default:
-      llvm_unreachable("Unsupported intrinsic!");
-    case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
-      IID = Intrinsic::x86_t2rpntlvwz0_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
-      IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz1_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
-      IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
-      break;
-    }
-
-    // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
-    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
-                                     {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
-
-    auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
-    assert(PtrTy && "arg3 must be of pointer type");
-    QualType PtreeTy = PtrTy->getPointeeType();
-    llvm::Type *TyPtee = ConvertType(PtreeTy);
-
-    // Bitcast amx type (x86_amx) to vector type (256 x i32)
-    // Then store tile0 into DstPtr0
-    Value *T0 = Builder.CreateExtractValue(Call, 0);
-    Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
-                                           {TyPtee}, {T0});
-    Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
-
-    // Then store tile1 into DstPtr1
-    Value *T1 = Builder.CreateExtractValue(Call, 1);
-    Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
-                                           {TyPtee}, {T1});
-    Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
-
-    // Note: Here we escape directly use x86_tilestored64_internal to store
-    // the results due to it can't make sure the Mem written scope. This may
-    // cause shapes reloads after first amx intrinsic, which current amx reg-
-    // ister allocation has no ability to handle it.
-
-    return Store;
-  }
   case X86::BI__ud2:
     // llvm.trap makes a ud2a instruction on x86.
     return EmitTrapCall(Intrinsic::trap);
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 18589125697b0..33fff7645df65 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -162,18 +162,12 @@ set(x86_files
   adxintrin.h
   ammintrin.h
   amxavx512intrin.h
-  amxbf16transposeintrin.h
   amxcomplexintrin.h
-  amxcomplextransposeintrin.h
   amxfp16intrin.h
-  amxfp16transposeintrin.h
   amxfp8intrin.h
   amxintrin.h
   amxmovrsintrin.h
-  amxmovrstransposeintrin.h
   amxtf32intrin.h
-  amxtf32transposeintrin.h
-  amxtransposeintrin.h
   avx10_2_512bf16intrin.h
   avx10_2_512convertintrin.h
   avx10_2_512minmaxintrin.h
diff --git a/clang/lib/Headers/amxbf16transposeintrin.h b/clang/lib/Headers/amxbf16transposeintrin.h
deleted file mode 100644
index 86f09f2ad8db2..0000000000000
--- a/clang/lib/Headers/amxbf16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_BF16TRANSPOSEINTRIN_H
-#define __AMX_BF16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-bf16,amx-transpose")))
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-///    tiles \a a and \a b, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in \a dst, and store the
-///    32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO (a.colsb / 4) - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) *
-///					FP32(b.row[k].bf16[2*n+0])
-///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) *
-///					FP32(b.row[k].bf16[2*n+1])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPBF16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k,
-                         _tile1024i dst, _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-///    tiles src0 and src1, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in "dst", and store the
-///    32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0,
-                                        __tile1024i src1) {
-  dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
-                                       src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_BF16TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxcomplextransposeintrin.h b/clang/lib/Headers/amxcomplextransposeintrin.h
deleted file mode 100644
index 11abaf98e9371..0000000000000
--- a/clang/lib/Headers/amxcomplextransposeintrin.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H
-#define __AMX_COMPLEXTRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-complex,amx-transpose")))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles \a a and \a b is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-/// Calculates the imaginary part of the result. For each possible combination
-///    of (transposed column of \a a, column of \a b), it performs a set of
-///    multiplication and accumulations on all corre...
[truncated]

github-actions · 2025-10-29T13:33:52Z

✅ With the latest revision this PR passed the C/C++ code formatter.

phoebewang · 2025-10-30T03:55:49Z

llvm/lib/Target/X86/X86ExpandPseudo.cpp

-      break;
-    case X86::PTCONJTCMMIMFP16PSV:
-      Opc = X86::TCONJTCMMIMFP16PS;
+    case X86::PTDPFP16PSV:


Move it the same line to keep it as is. We may unify below code to the same format too.

I unified code below, I turned off clang-format for this block

llvm/lib/Target/X86/X86FastPreTileConfig.cpp

llvm/lib/Target/X86/X86InstrInfo.cpp

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Target/X86/X86RegisterInfo.td

phoebewang

Just a few minors, otherwise LGTM.

llvm/lib/Target/X86/X86InstrAMX.td

llvm/lib/Target/X86/X86PreTileConfig.cpp

llvm/lib/Target/X86/X86FastPreTileConfig.cpp

e-kud

LGTM. Thanks!

llvm-ci · 2025-10-31T12:00:02Z

LLVM Buildbot has detected a new failure on builder flang-x86_64-windows running on minipc-ryzen-win while building clang,llvm at step 6 "build-unified-tree".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/166/builds/3551

Here is the relevant piece of the build log for the reference

Step 6 (build-unified-tree) failure: build (failure)
...
3.327 [7248/6/1] Building CXX object lib\Demangle\CMakeFiles\LLVMDemangle.dir\Demangle.cpp.obj
3.371 [7247/6/2] Building CXX object lib\Demangle\CMakeFiles\LLVMDemangle.dir\DLangDemangle.cpp.obj
3.385 [7246/6/3] Building CXX object lib\Demangle\CMakeFiles\LLVMDemangle.dir\MicrosoftDemangleNodes.cpp.obj
3.528 [7245/6/4] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\ABIBreak.cpp.obj
4.237 [7244/6/5] Building CXX object lib\Demangle\CMakeFiles\LLVMDemangle.dir\RustDemangle.cpp.obj
7.039 [7243/6/6] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\APFixedPoint.cpp.obj
7.549 [7242/6/7] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\AMDGPUMetadata.cpp.obj
7.553 [7241/6/8] Building CXX object lib\Demangle\CMakeFiles\LLVMDemangle.dir\ItaniumDemangle.cpp.obj
9.752 [7240/6/9] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\APFloat.cpp.obj
9.950 [7239/6/10] Building CXX object lib\Demangle\CMakeFiles\LLVMDemangle.dir\MicrosoftDemangle.cpp.obj
FAILED: [code=3221225477] lib/Demangle/CMakeFiles/LLVMDemangle.dir/MicrosoftDemangle.cpp.obj 
ccache C:\PROGRA~1\MICROS~2\2022\COMMUN~1\VC\Tools\MSVC\1444~1.352\bin\Hostx64\x64\cl.exe  /nologo /TP -DGTEST_HAS_RTTI=0 -DUNICODE -D_CRT_NONSTDC_NO_DEPRECATE -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS -D_GLIBCXX_ASSERTIONS -D_HAS_EXCEPTIONS=0 -D_SCL_SECURE_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS -D_UNICODE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -IC:\buildbot\flang-x86_64-windows\build\lib\Demangle -IC:\buildbot\flang-x86_64-windows\llvm-project\llvm\lib\Demangle -IC:\buildbot\flang-x86_64-windows\build\include -IC:\buildbot\flang-x86_64-windows\llvm-project\llvm\include /DWIN32 /D_WINDOWS   /Zc:inline /Zc:preprocessor /Zc:__cplusplus /Oi /bigobj /permissive- /W4 -wd4141 -wd4146 -wd4244 -wd4267 -wd4291 -wd4351 -wd4456 -wd4457 -wd4458 -wd4459 -wd4503 -wd4624 -wd4722 -wd4100 -wd4127 -wd4512 -wd4505 -wd4610 -wd4510 -wd4702 -wd4245 -wd4706 -wd4310 -wd4701 -wd4703 -wd4389 -wd4611 -wd4805 -wd4204 -wd4577 -wd4091 -wd4592 -wd4319 -wd4709 -wd5105 -wd4324 -wd4251 -wd4275 -w14062 -we4238 /Gw /O2 /Ob2  -std:c++17 -MD  /EHs-c- /GR- -UNDEBUG /showIncludes /Folib\Demangle\CMakeFiles\LLVMDemangle.dir\MicrosoftDemangle.cpp.obj /Fdlib\Demangle\CMakeFiles\LLVMDemangle.dir\LLVMDemangle.pdb /FS -c C:\buildbot\flang-x86_64-windows\llvm-project\llvm\lib\Demangle\MicrosoftDemangle.cpp
C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\include\xstring(547): fatal error C1001: Internal compiler error.
(compiler file 'msc1.cpp', line 1589)
 To work around this problem, try simplifying or changing the program near the locations listed above.
If possible please provide a repro here: https://developercommunity.visualstudio.com 
Please choose the Technical Support command on the Visual C++ 
 Help menu, or open the Technical Support help file for more information
C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\include\xstring(547): note: the template instantiation context (the oldest one first) is
C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\include\xstring(541): note: while compiling class template 'std::basic_string'
INTERNAL COMPILER ERROR in 'C:\PROGRA~1\MICROS~2\2022\COMMUN~1\VC\Tools\MSVC\1444~1.352\bin\Hostx64\x64\cl.exe'
    Please choose the Technical Support command on the Visual C++
    Help menu, or open the Technical Support help file for more information
9.966 [7239/5/11] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\APSInt.cpp.obj
10.257 [7239/4/12] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\APInt.cpp.obj
10.895 [7239/3/13] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\ARMBuildAttributes.cpp.obj
11.064 [7239/2/14] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\AArch64BuildAttributes.cpp.obj
11.220 [7239/1/15] Building CXX object lib\Support\CMakeFiles\LLVMSupport.dir\AArch64AttributeParser.cpp.obj
ninja: build stopped: subcommand failed.
Cache directory:      C:\Users\buildbot-worker\AppData\Local\ccache
Config file:          C:\Users\buildbot-worker\AppData\Local\ccache\ccache.conf
System config file:   C:\ProgramData\ccache\ccache.conf
Stats updated:        10/31/25 04:59:52
Cacheable calls:         14 /  15 (93.33%)
  Hits:                   0 /  14 ( 0.00%)
    Direct:               0
    Preprocessed:         0
  Misses:                14 /  14 (100.0%)
Uncacheable calls:        1 /  15 ( 6.67%)
  Compilation failed:     1 /   1 (100.0%)
Successful lookups:
  Direct:                 0 /  15 ( 0.00%)
  Preprocessed:           0 /  15 ( 0.00%)
Local storage:
  Cache size (GB):      5.0 / 5.0 (99.78%)
  Files:              16155
  Hits:                   0 /  14 ( 0.00%)
  Misses:                14 /  14 (100.0%)
  Reads:                 30

llvm-ci · 2025-10-31T12:13:10Z

LLVM Buildbot has detected a new failure on builder mlir-nvidia running on mlir-nvidia while building clang,llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/21159

Here is the relevant piece of the build log for the reference

Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x58d51075e790 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x58d51075e790 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

llvm-ci · 2025-10-31T12:23:35Z

LLVM Buildbot has detected a new failure on builder ml-opt-devrel-x86-64 running on ml-opt-devrel-x86-64-b2 while building clang,llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/27971

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 7
/b/ml-opt-devrel-x86-64-b1/build/bin/llc -o /dev/null -mtriple=x86_64-linux-unknown -regalloc=greedy    -regalloc-enable-advisor=development    -regalloc-training-log=/b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1    -regalloc-enable-development-features < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/Inputs/input.ll
# executed command: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -o /dev/null -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development -regalloc-training-log=/b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1 -regalloc-enable-development-features
# RUN: at line 11
"/usr/bin/python3" /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/../../../lib/Analysis/models/log_reader.py /b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1 > /b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable
# executed command: /usr/bin/python3 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/../../../lib/Analysis/models/log_reader.py /b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1
# RUN: at line 12
/b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck --input-file /b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
# executed command: /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck --input-file /b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
# .---command stderr------------
# | /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll:29:10: error: CHECK: expected string not found in input
# | ; CHECK: instructions: 20,{{([0-9]{4})}},1{{([0-9]{3})}},2{{([0-9]{3})}},{{.*}},0,
# |          ^
# | /b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable:1:1: note: scanning from here
# | context: SyFgets
# | ^
# | /b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable:24:1: note: possible intended match here
# | instructions: 20,4988,1976,1996,365,4994,1976,1996,365,4994,1976,1996,2130,2130,1312,1976,1996,744,20,368,2560,1141,20,746,1846,2579,2130,2579,2560,2596,2553,4994,1976,1996,365,4994,1976,365,1996,365,2579,2577,2596,365,365,2128,1300,1976,1996,2128,1300,1976,1996,1989,1989,1996,4994,1976,1996,2560,1996,1996,2560,1996,1503,4984,20,1976,1996,2560,1996,1503,4984,20,1976,1996,4994,10,1976,1996,4994,10,1976,1996,10,20,4994,1976,1996,4994,1976,1996,744,368,1141,746,4994,1976,1996,2130,4994,1976,1996,1903,2128,1300,1976,1996,1300,1976,1996,1082,1976,1996,1300,1976,1996,2590,4994,1976,1996,4994,1976,1996,2590,1996,2560,1996,1996,1996,1312,1976,365,2560,1996,1996,1892,1300,1976,2560,1078,1976,365,1996,365,4994,1976,1996,20,744,368,1141,746,4825,2579,2130,365,4994,1976,1996,1892,365,1996,2579,744,20,1141,746,1892,4994,1976,1996,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
# | ^
# | 
# | Input file: /b/ml-opt-devrel-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable
# | Check file: /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: context: SyFgets 
# | check:29'0     X~~~~~~~~~~~~~~~~ error: no match found
# |             2: observation: 0 
# | check:29'0     ~~~~~~~~~~~~~~~
# |             3: mask: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |             4: is_free: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |             5: nr_urgent: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |             6: nr_broken_hints: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |            19: liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7939082384109497,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            20: use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.014218696393072605,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 
...

llvm-ci · 2025-10-31T12:23:58Z

LLVM Buildbot has detected a new failure on builder ml-opt-dev-x86-64 running on ml-opt-dev-x86-64-b1 while building clang,llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/28156

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 7
/b/ml-opt-dev-x86-64-b1/build/bin/llc -o /dev/null -mtriple=x86_64-linux-unknown -regalloc=greedy    -regalloc-enable-advisor=development    -regalloc-training-log=/b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1    -regalloc-enable-development-features < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/Inputs/input.ll
# executed command: /b/ml-opt-dev-x86-64-b1/build/bin/llc -o /dev/null -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development -regalloc-training-log=/b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1 -regalloc-enable-development-features
# RUN: at line 11
"/usr/bin/python3" /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/../../../lib/Analysis/models/log_reader.py /b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1 > /b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable
# executed command: /usr/bin/python3 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/../../../lib/Analysis/models/log_reader.py /b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1
# RUN: at line 12
/b/ml-opt-dev-x86-64-b1/build/bin/FileCheck --input-file /b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
# executed command: /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck --input-file /b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
# .---command stderr------------
# | /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll:29:10: error: CHECK: expected string not found in input
# | ; CHECK: instructions: 20,{{([0-9]{4})}},1{{([0-9]{3})}},2{{([0-9]{3})}},{{.*}},0,
# |          ^
# | /b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable:1:1: note: scanning from here
# | context: SyFgets
# | ^
# | /b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable:24:1: note: possible intended match here
# | instructions: 20,4988,1976,1996,365,4994,1976,1996,365,4994,1976,1996,2130,2130,1312,1976,1996,744,20,368,2560,1141,20,746,1846,2579,2130,2579,2560,2596,2553,4994,1976,1996,365,4994,1976,365,1996,365,2579,2577,2596,365,365,2128,1300,1976,1996,2128,1300,1976,1996,1989,1989,1996,4994,1976,1996,2560,1996,1996,2560,1996,1503,4984,20,1976,1996,2560,1996,1503,4984,20,1976,1996,4994,10,1976,1996,4994,10,1976,1996,10,20,4994,1976,1996,4994,1976,1996,744,368,1141,746,4994,1976,1996,2130,4994,1976,1996,1903,2128,1300,1976,1996,1300,1976,1996,1082,1976,1996,1300,1976,1996,2590,4994,1976,1996,4994,1976,1996,2590,1996,2560,1996,1996,1996,1312,1976,365,2560,1996,1996,1892,1300,1976,2560,1078,1976,365,1996,365,4994,1976,1996,20,744,368,1141,746,4825,2579,2130,365,4994,1976,1996,1892,365,1996,2579,744,20,1141,746,1892,4994,1976,1996,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
# | ^
# | 
# | Input file: /b/ml-opt-dev-x86-64-b1/build/test/CodeGen/MLRegAlloc/Output/dev-mode-extra-features-logging.ll.tmp1.readable
# | Check file: /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: context: SyFgets 
# | check:29'0     X~~~~~~~~~~~~~~~~ error: no match found
# |             2: observation: 0 
# | check:29'0     ~~~~~~~~~~~~~~~
# |             3: mask: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |             4: is_free: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |             5: nr_urgent: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |             6: nr_broken_hints: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |            19: liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7939082384109497,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 
# | check:29'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            20: use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.014218696393072605,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 
...

Remove amx_transpose

12269e7

mikolaj-pirog requested review from RKSimon, e-kud and phoebewang October 29, 2025 13:31

mikolaj-pirog added 2 commits October 29, 2025 20:29

Delete unused variable

4e86b45

Formatting

ef75547