From 54ac616a28d1aa5544192a8a2cdbce30641fa22f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 2 Nov 2025 09:07:51 +0000 Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 [skip ci] --- llvm/lib/Target/X86/X86.h | 13 ++++- llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp | 48 +++++++++++++++---- llvm/lib/Target/X86/X86PassRegistry.def | 2 +- llvm/lib/Target/X86/X86TargetMachine.cpp | 2 +- .../AMX/amx-low-intrinsics-no-amx-bitcast.ll | 3 +- .../CodeGen/X86/AMX/amx-low-intrinsics.ll | 3 +- 6 files changed, 56 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 51b540a7a51d0..bdb43cfb4adb4 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -179,7 +179,18 @@ FunctionPass *createX86LowerAMXTypeLegacyPass(); /// The pass transforms amx intrinsics to scalar operation if the function has /// optnone attribute or it is O0. -FunctionPass *createX86LowerAMXIntrinsicsPass(); +class X86LowerAMXIntrinsicsPass + : public PassInfoMixin { +private: + const TargetMachine *TM; + +public: + X86LowerAMXIntrinsicsPass(const TargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + static bool isRequired() { return true; } +}; + +FunctionPass *createX86LowerAMXIntrinsicsLegacyPass(); InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &, diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp index 7f3393910da2c..662aec2c15241 100644 --- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp @@ -23,12 +23,15 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Analysis.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsX86.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -40,7 +43,7 @@ using namespace llvm; using namespace PatternMatch; -#define DEBUG_TYPE "lower-amx-intrinsics" +#define DEBUG_TYPE "x86-lower-amx-intrinsics" #ifndef NDEBUG static bool isV256I32Ty(Type *Ty) { @@ -626,6 +629,37 @@ bool X86LowerAMXIntrinsics::visit() { return C; } +namespace { +bool shouldRunLowerAMXIntrinsics(const Function &F, const TargetMachine *TM) { + return X86ScalarizeAMX && (F.hasFnAttribute(Attribute::OptimizeNone) || + TM->getOptLevel() == CodeGenOptLevel::None); +} + +bool runLowerAMXIntrinsics(Function &F, DominatorTree *DT, LoopInfo *LI) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + + X86LowerAMXIntrinsics LAT(F, DTU, LI); + return LAT.visit(); +} +} // namespace + +PreservedAnalyses X86LowerAMXIntrinsicsPass::run(Function &F, + FunctionAnalysisManager &FAM) { + if (!shouldRunLowerAMXIntrinsics(F, TM)) + return PreservedAnalyses::all(); + + DominatorTree &DT = FAM.getResult(F); + LoopInfo &LI = FAM.getResult(F); + bool Changed = runLowerAMXIntrinsics(F, &DT, &LI); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA = PreservedAnalyses::none(); + PA.preserve(); + PA.preserve(); + return PA; +} + namespace { class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass { public: @@ -634,21 +668,15 @@ class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass { X86LowerAMXIntrinsicsLegacyPass() : FunctionPass(ID) {} bool runOnFunction(Function &F) override { - if (!X86ScalarizeAMX) - return false; TargetMachine *TM = &getAnalysis().getTM(); - if (!F.hasFnAttribute(Attribute::OptimizeNone) && - TM->getOptLevel() != CodeGenOptLevel::None) + if (!shouldRunLowerAMXIntrinsics(F, TM)) return false; auto *DTWP = getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - - X86LowerAMXIntrinsics LAT(F, DTU, LI); - return LAT.visit(); + return runLowerAMXIntrinsics(F, DT, LI); } StringRef getPassName() const override { return "Lower AMX intrinsics"; } @@ -668,6 +696,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(X86LowerAMXIntrinsicsLegacyPass, DEBUG_TYPE, PassName, false, false) -FunctionPass *llvm::createX86LowerAMXIntrinsicsPass() { +FunctionPass *llvm::createX86LowerAMXIntrinsicsLegacyPass() { return new X86LowerAMXIntrinsicsLegacyPass(); } diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def index fc25d55d3059a..81c98febc4ba8 100644 --- a/llvm/lib/Target/X86/X86PassRegistry.def +++ b/llvm/lib/Target/X86/X86PassRegistry.def @@ -15,13 +15,13 @@ #ifndef FUNCTION_PASS #define FUNCTION_PASS(NAME, CREATE_PASS) #endif +FUNCTION_PASS("x86-lower-amx-intrinsics", X86LowerAMXIntrinsicsPass(this)) FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this)) #undef FUNCTION_PASS #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS) #endif -DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this)) DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction()) DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass()) #undef DUMMY_FUNCTION_PASS diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 9a76abcd351bf..bf4dab0371b88 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -422,7 +422,7 @@ void X86PassConfig::addIRPasses() { // We add both pass anyway and when these two passes run, we skip the pass // based on the option level and option attribute. - addPass(createX86LowerAMXIntrinsicsPass()); + addPass(createX86LowerAMXIntrinsicsLegacyPass()); addPass(createX86LowerAMXTypeLegacyPass()); TargetPassConfig::addIRPasses(); diff --git a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll index 87059c5d474e6..6ae7b2260c15c 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=x86_64 -lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s +; RUN: opt -mtriple=x86_64 -x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s +; RUN: opt -mtriple=x86_64 -passes=x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s define dso_local void @test_no_bitcast(ptr %A_mem, ptr %B_mem, ptr %C_mem) local_unnamed_addr #0 { ; CHECK-LABEL: @test_no_bitcast( diff --git a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll index 5fb2dcdc1d621..ca7c3573a3294 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=x86_64 -lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s +; RUN: opt -mtriple=x86_64 -x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s +; RUN: opt -mtriple=x86_64 -passes=x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s define dso_local void @test_amx_load_non_O0(i16 signext %row, i16 signext %col, ptr%ptr, i64 %stride, ptr %vptr) { ; CHECK-LABEL: @test_amx_load_non_O0( From b47b6dc6ec37a3e3a01f35b2da60c3f1fdd17d85 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 2 Nov 2025 21:36:37 +0000 Subject: [PATCH 2/2] feedback Created using spr 1.3.7 --- llvm/lib/Target/X86/X86.h | 4 ++-- llvm/lib/Target/X86/X86PartialReduction.cpp | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 2c4834e503607..fa23656e23fc3 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -160,10 +160,10 @@ FunctionPass *createX86InsertX87waitPass(); /// ways. class X86PartialReductionPass : public PassInfoMixin { private: - const TargetMachine *TM; + const X86TargetMachine *TM; public: - X86PartialReductionPass(const TargetMachine *TM) : TM(TM) {} + X86PartialReductionPass(const X86TargetMachine *TM) : TM(TM) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp index 99a987a38134a..898c83cf9b468 100644 --- a/llvm/lib/Target/X86/X86PartialReduction.cpp +++ b/llvm/lib/Target/X86/X86PartialReduction.cpp @@ -552,8 +552,7 @@ bool X86PartialReductionLegacy::runOnFunction(Function &F) { PreservedAnalyses X86PartialReductionPass::run(Function &F, FunctionAnalysisManager &FAM) { - bool Changed = - X86PartialReduction(static_cast(TM)).run(F); + bool Changed = X86PartialReduction(TM).run(F); if (!Changed) return PreservedAnalyses::all();