Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors
This patch mostly adapts the existing AMDGPUCtorDtorLoweringPass for use by the Nvidia backend. This pass transforms the ctor / dtor list into a kernel call that can be used to invoke those functinos. Furthermore, we emit globals such that the names and addresses of these constructor functions can be found by the driver. Unfortunately, since NVPTX has no way to emit variables at a named section, nor a functioning linker to provide the begin / end symbols, we need to mangle these names and have an external application find them. This work is related to the work in D149398 and D149340. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D149451
- Loading branch information
Showing
10 changed files
with
232 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
//===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
/// | ||
/// \file | ||
/// This pass creates a unified init and fini kernel with the required metadata | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "NVPTXCtorDtorLowering.h" | ||
#include "NVPTX.h" | ||
#include "llvm/IR/Constants.h" | ||
#include "llvm/IR/Function.h" | ||
#include "llvm/IR/GlobalVariable.h" | ||
#include "llvm/IR/IRBuilder.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/IR/Value.h" | ||
#include "llvm/Pass.h" | ||
#include "llvm/Support/CommandLine.h" | ||
#include "llvm/Transforms/Utils/ModuleUtils.h" | ||
|
||
using namespace llvm; | ||
|
||
#define DEBUG_TYPE "nvptx-lower-ctor-dtor" | ||
|
||
static cl::opt<std::string> | ||
GlobalStr("nvptx-lower-global-ctor-dtor-id", | ||
cl::desc("Override unique ID of ctor/dtor globals."), | ||
cl::init(""), cl::Hidden); | ||
|
||
namespace { | ||
|
||
static std::string getHash(StringRef Str) { | ||
llvm::MD5 Hasher; | ||
llvm::MD5::MD5Result Hash; | ||
Hasher.update(Str); | ||
Hasher.final(Hash); | ||
return llvm::utohexstr(Hash.low(), /*LowerCase=*/true); | ||
} | ||
|
||
static bool createInitOrFiniGlobls(Module &M, StringRef GlobalName, | ||
bool IsCtor) { | ||
GlobalVariable *GV = M.getGlobalVariable(GlobalName); | ||
if (!GV || !GV->hasInitializer()) | ||
return false; | ||
ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer()); | ||
if (!GA || GA->getNumOperands() == 0) | ||
return false; | ||
|
||
// NVPTX has no way to emit variables at specific sections or support for | ||
// the traditional constructor sections. Instead, we emit mangled global | ||
// names so the runtime can build the list manually. | ||
for (Value *V : GA->operands()) { | ||
auto *CS = cast<ConstantStruct>(V); | ||
auto *F = cast<Constant>(CS->getOperand(1)); | ||
uint64_t Priority = cast<ConstantInt>(CS->getOperand(0))->getSExtValue(); | ||
std::string PriorityStr = "." + std::to_string(Priority); | ||
// We append a semi-unique hash and the priority to the global name. | ||
std::string GlobalID = | ||
!GlobalStr.empty() ? GlobalStr : getHash(M.getSourceFileName()); | ||
std::string NameStr = | ||
((IsCtor ? "__init_array_object_" : "__fini_array_object_") + | ||
F->getName() + "_" + GlobalID + "_" + std::to_string(Priority)) | ||
.str(); | ||
// PTX does not support exported names with '.' in them. | ||
llvm::transform(NameStr, NameStr.begin(), | ||
[](char c) { return c == '.' ? '_' : c; }); | ||
|
||
auto *GV = new GlobalVariable(M, F->getType(), /*IsConstant=*/true, | ||
GlobalValue::ExternalLinkage, F, NameStr, | ||
nullptr, GlobalValue::NotThreadLocal, | ||
/*AddressSpace=*/4); | ||
// This isn't respected by Nvidia, simply put here for clarity. | ||
GV->setSection(IsCtor ? ".init_array" + PriorityStr | ||
: ".fini_array" + PriorityStr); | ||
GV->setVisibility(GlobalVariable::ProtectedVisibility); | ||
appendToUsed(M, {GV}); | ||
} | ||
|
||
GV->eraseFromParent(); | ||
return true; | ||
} | ||
|
||
static bool lowerCtorsAndDtors(Module &M) { | ||
bool Modified = false; | ||
Modified |= createInitOrFiniGlobls(M, "llvm.global_ctors", /*IsCtor =*/true); | ||
Modified |= createInitOrFiniGlobls(M, "llvm.global_dtors", /*IsCtor =*/false); | ||
return Modified; | ||
} | ||
|
||
class NVPTXCtorDtorLoweringLegacy final : public ModulePass { | ||
public: | ||
static char ID; | ||
NVPTXCtorDtorLoweringLegacy() : ModulePass(ID) {} | ||
bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); } | ||
}; | ||
|
||
} // End anonymous namespace | ||
|
||
PreservedAnalyses NVPTXCtorDtorLoweringPass::run(Module &M, | ||
ModuleAnalysisManager &AM) { | ||
return lowerCtorsAndDtors(M) ? PreservedAnalyses::none() | ||
: PreservedAnalyses::all(); | ||
} | ||
|
||
char NVPTXCtorDtorLoweringLegacy::ID = 0; | ||
char &llvm::NVPTXCtorDtorLoweringLegacyPassID = NVPTXCtorDtorLoweringLegacy::ID; | ||
INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy, DEBUG_TYPE, | ||
"Lower ctors and dtors for NVPTX", false, false) | ||
|
||
ModulePass *llvm::createNVPTXCtorDtorLoweringLegacyPass() { | ||
return new NVPTXCtorDtorLoweringLegacy(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
//===-- NVPTXCtorDtorLowering.h --------------------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H | ||
#define LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H | ||
|
||
#include "llvm/IR/PassManager.h" | ||
|
||
namespace llvm { | ||
class Module; | ||
class PassRegistry; | ||
|
||
extern char &NVPTXCtorDtorLoweringLegacyPassID; | ||
extern void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &); | ||
|
||
/// Lower llvm.global_ctors and llvm.global_dtors to special kernels. | ||
class NVPTXCtorDtorLoweringPass | ||
: public PassInfoMixin<NVPTXCtorDtorLoweringPass> { | ||
public: | ||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); | ||
}; | ||
|
||
} // namespace llvm | ||
|
||
#endif // LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s | FileCheck %s | ||
; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s | FileCheck %s | ||
; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor \ | ||
; RUN: -nvptx-lower-global-ctor-dtor-id=unique_id < %s | FileCheck %s --check-prefix=GLOBAL | ||
|
||
; Make sure we get the same result if we run multiple times | ||
; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s | ||
; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY | ||
|
||
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] | ||
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] | ||
|
||
; CHECK-NOT: @llvm.global_ctors | ||
; CHECK-NOT: @llvm.global_dtors | ||
|
||
; CHECK: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1" | ||
; CHECK: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1" | ||
; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_[[HASH]]_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_[[HASH]]_1 to ptr)], section "llvm.metadata" | ||
; GLOBAL: @__init_array_object_foo_unique_id_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1" | ||
; GLOBAL: @__fini_array_object_bar_unique_id_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1" | ||
; GLOBAL: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_unique_id_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_unique_id_1 to ptr)], section "llvm.metadata" | ||
|
||
; VISIBILITY: .visible .const .align 8 .u64 __init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = foo; | ||
; VISIBILITY: .visible .const .align 8 .u64 __fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = bar; | ||
|
||
define internal void @foo() { | ||
ret void | ||
} | ||
|
||
define internal void @bar() { | ||
ret void | ||
} |