diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 623bbb2db32555..c06f9ad66009df 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -123,9 +123,27 @@ struct AMDGPULowerKernelAttributesPass void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); extern char &AMDGPUPropagateAttributesEarlyID; +struct AMDGPUPropagateAttributesEarlyPass + : PassInfoMixin { + AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + TargetMachine &TM; +}; + void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); extern char &AMDGPUPropagateAttributesLateID; +struct AMDGPUPropagateAttributesLatePass + : PassInfoMixin { + AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + TargetMachine &TM; +}; + void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); extern char &AMDGPURewriteOutArgumentsID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp index dcbe4270e8a92c..56512529e7fe3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" #include @@ -409,3 +410,21 @@ ModulePass *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { return new AMDGPUPropagateAttributesLate(TM); } + +PreservedAnalyses +AMDGPUPropagateAttributesEarlyPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) + return PreservedAnalyses::all(); + + return AMDGPUPropagateAttributes(&TM, false).process(F) + ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +PreservedAnalyses +AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) { + return AMDGPUPropagateAttributes(&TM, true).process(M) + ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 6c730be97b9dda..765ec045d5f1df 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -491,6 +491,15 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, bool DebugPassManager) { + PB.registerPipelineParsingCallback( + [this](StringRef PassName, ModulePassManager &PM, + ArrayRef) { + if (PassName == "amdgpu-propagate-attributes-late") { + PM.addPass(AMDGPUPropagateAttributesLatePass(*this)); + return true; + } + return false; + }); PB.registerPipelineParsingCallback( [this](StringRef PassName, FunctionPassManager &PM, ArrayRef) { @@ -514,13 +523,19 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, PM.addPass(AMDGPULowerKernelAttributesPass()); return true; } + if (PassName == "amdgpu-propagate-attributes-early") { + PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this)); + return true; + } + return false; }); - PB.registerPipelineStartEPCallback([DebugPassManager]( + PB.registerPipelineStartEPCallback([this, DebugPassManager]( ModulePassManager &PM, PassBuilder::OptimizationLevel Level) { FunctionPassManager FPM(DebugPassManager); + FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this)); FPM.addPass(AMDGPUUseNativeCallsPass()); if (EnableLibCallSimplify && Level != PassBuilder::OptimizationLevel::O0) FPM.addPass(AMDGPUSimplifyLibCallsPass()); @@ -528,12 +543,15 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, }); PB.registerPipelineEarlySimplificationEPCallback( - [](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) { + [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) { if (Level == PassBuilder::OptimizationLevel::O0) return; if (InternalizeSymbols) { PM.addPass(InternalizePass(mustPreserveGV)); + } + PM.addPass(AMDGPUPropagateAttributesLatePass(*this)); + if (InternalizeSymbols) { PM.addPass(GlobalDCEPass()); } }); diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll index 3a50acc0296b73..72effaa056de02 100644 --- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck -check-prefixes=OPT,OPT-EXT %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default' < %s | FileCheck -check-prefixes=OPT,OPT-EXT %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 --amdgpu-internalize-symbols < %s | FileCheck -check-prefixes=OPT,OPT-INT %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default' --amdgpu-internalize-symbols < %s | FileCheck -check-prefixes=OPT,OPT-INT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s ; OPT: declare void @foo4() local_unnamed_addr #0 diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll index 30c6eded2397a6..210d04265ccaeb 100644 --- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-propagate-attributes-late %s | FileCheck %s ; CHECK: define internal void @max_flat_1_1024() #0 { define internal void @max_flat_1_1024() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-single-set.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-single-set.ll index 8832f5db8e5d0b..4f83f16f2924b4 100644 --- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-single-set.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-single-set.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default' < %s | FileCheck -check-prefix=OPT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s ; OPT: declare void @foo4() local_unnamed_addr #0 diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 94f4eb03e636be..a02b884996e9bc 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -463,8 +463,13 @@ struct TimeTracerRAII { // it exists. static bool shouldPinPassToLegacyPM(StringRef Pass) { std::vector PassNameExactToIgnore = { - "amdgpu-simplifylib", "amdgpu-usenative", "amdgpu-promote-alloca", - "amdgpu-promote-alloca-to-vector", "amdgpu-lower-kernel-attributes"}; + "amdgpu-simplifylib", + "amdgpu-usenative", + "amdgpu-promote-alloca", + "amdgpu-promote-alloca-to-vector", + "amdgpu-lower-kernel-attributes", + "amdgpu-propagate-attributes-early", + "amdgpu-propagate-attributes-late"}; for (const auto &P : PassNameExactToIgnore) if (Pass == P) return false;