-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[LLVM] Maintain element type of @llvm.compiler.used/@llvm.used if it already exists #162660
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[LLVM] Maintain element type of @llvm.compiler.used/@llvm.used if it already exists #162660
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Juan Manuel Martinez Caamaño (jmmartinez) ChangesAt the moment, the address space for the pointer stored in the
This PR doesn't solve this issue. It only adds one test that documents one problem with it, and makes the BitcodeWriter use the llvm helpers to reduce the places where these variables change. Without this patch, the test added in the PR fails with an assertion:
Full diff: https://github.com/llvm/llvm-project/pull/162660.diff 4 Files Affected:
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7ed140d392fca..f7afa393f3e00 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -75,6 +75,7 @@
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -5850,25 +5851,25 @@ static const char *getSectionNameForCommandline(const Triple &T) {
void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
bool EmbedBitcode, bool EmbedCmdline,
const std::vector<uint8_t> &CmdArgs) {
- // Save llvm.compiler.used and remove it.
- SmallVector<Constant *, 2> UsedArray;
- SmallVector<GlobalValue *, 4> UsedGlobals;
- GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true);
- Type *UsedElementType = Used ? Used->getValueType()->getArrayElementType()
- : PointerType::getUnqual(M.getContext());
- for (auto *GV : UsedGlobals) {
- if (GV->getName() != "llvm.embedded.module" &&
- GV->getName() != "llvm.cmdline")
- UsedArray.push_back(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
- }
- if (Used)
- Used->eraseFromParent();
// Embed the bitcode for the llvm module.
std::string Data;
ArrayRef<uint8_t> ModuleData;
Triple T(M.getTargetTriple());
+ SmallVector<GlobalValue *, 2> NewGlobals;
+
+ auto IsCmdOrBitcode = [&](Constant *C) {
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(C);
+ StringRef Name = GV ? GV->getName() : "";
+ if (EmbedBitcode && Name == "llvm.embedded.module")
+ return true;
+ if (EmbedCmdline && Name == "llvm.cmdline")
+ return true;
+ return false;
+ };
+
+ if (EmbedBitcode || EmbedCmdline)
+ removeFromUsedLists(M, IsCmdOrBitcode);
if (EmbedBitcode) {
if (Buf.getBufferSize() == 0 ||
@@ -5887,23 +5888,22 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
}
llvm::Constant *ModuleConstant =
llvm::ConstantDataArray::get(M.getContext(), ModuleData);
- llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ llvm::GlobalVariable *EmbeddedModule = new llvm::GlobalVariable(
M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
ModuleConstant);
- GV->setSection(getSectionNameForBitcode(T));
+ EmbeddedModule->setSection(getSectionNameForBitcode(T));
// Set alignment to 1 to prevent padding between two contributions from input
// sections after linking.
- GV->setAlignment(Align(1));
- UsedArray.push_back(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+ EmbeddedModule->setAlignment(Align(1));
+ NewGlobals.push_back(EmbeddedModule);
if (llvm::GlobalVariable *Old =
M.getGlobalVariable("llvm.embedded.module", true)) {
assert(Old->hasZeroLiveUses() &&
"llvm.embedded.module can only be used once in llvm.compiler.used");
- GV->takeName(Old);
+ EmbeddedModule->takeName(Old);
Old->eraseFromParent();
} else {
- GV->setName("llvm.embedded.module");
+ EmbeddedModule->setName("llvm.embedded.module");
}
// Skip if only bitcode needs to be embedded.
@@ -5913,30 +5913,20 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
CmdArgs.size());
llvm::Constant *CmdConstant =
llvm::ConstantDataArray::get(M.getContext(), CmdData);
- GV = new llvm::GlobalVariable(M, CmdConstant->getType(), true,
- llvm::GlobalValue::PrivateLinkage,
- CmdConstant);
- GV->setSection(getSectionNameForCommandline(T));
- GV->setAlignment(Align(1));
- UsedArray.push_back(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+ GlobalVariable *CmdLine = new llvm::GlobalVariable(
+ M, CmdConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
+ CmdConstant);
+ CmdLine->setSection(getSectionNameForCommandline(T));
+ CmdLine->setAlignment(Align(1));
if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) {
assert(Old->hasZeroLiveUses() &&
"llvm.cmdline can only be used once in llvm.compiler.used");
- GV->takeName(Old);
+ CmdLine->takeName(Old);
Old->eraseFromParent();
} else {
- GV->setName("llvm.cmdline");
+ CmdLine->setName("llvm.cmdline");
}
+ NewGlobals.push_back(CmdLine);
+ appendToCompilerUsed(M, NewGlobals);
}
-
- if (UsedArray.empty())
- return;
-
- // Recreate llvm.compiler.used.
- ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size());
- auto *NewUsed = new GlobalVariable(
- M, ATy, false, llvm::GlobalValue::AppendingLinkage,
- llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used");
- NewUsed->setSection("llvm.metadata");
}
diff --git a/llvm/lib/Bitcode/Writer/CMakeLists.txt b/llvm/lib/Bitcode/Writer/CMakeLists.txt
index 2c508ca9fae95..5bbb872a90341 100644
--- a/llvm/lib/Bitcode/Writer/CMakeLists.txt
+++ b/llvm/lib/Bitcode/Writer/CMakeLists.txt
@@ -15,4 +15,5 @@ add_llvm_component_library(LLVMBitWriter
ProfileData
Support
TargetParser
+ TransformUtils
)
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 596849ecab742..d1acb0ff1ad6b 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -138,10 +138,11 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *>
SmallSetVector<Constant *, 16> Init;
collectUsedGlobals(GV, Init);
+ Type *ArrayEltTy = GV ? GV->getValueType()->getArrayElementType()
+ : PointerType::getUnqual(M.getContext());
if (GV)
GV->eraseFromParent();
- Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
for (auto *V : Values)
Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
diff --git a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp
index d4094c5307060..0cc408af43bc5 100644
--- a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp
@@ -69,6 +69,23 @@ TEST(ModuleUtils, AppendToUsedList2) {
EXPECT_EQ(1, getListSize(*M, "llvm.used"));
}
+TEST(ModuleUtils, AppendToUsedList3) {
+ LLVMContext C;
+
+ std::unique_ptr<Module> M = parseIR(C, R"(
+ @x = addrspace(1) global [2 x i32] zeroinitializer, align 4
+ @y = addrspace(2) global [2 x i32] zeroinitializer, align 4
+ @llvm.compiler.used = appending global [1 x ptr addrspace (3)] [ptr addrspace(3) addrspacecast (ptr addrspace (1) @x to ptr addrspace(3))]
+ )");
+ GlobalVariable *X = M->getNamedGlobal("x");
+ GlobalVariable *Y = M->getNamedGlobal("y");
+ EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used"));
+ appendToCompilerUsed(*M, X);
+ EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used"));
+ appendToCompilerUsed(*M, Y);
+ EXPECT_EQ(2, getListSize(*M, "llvm.compiler.used"));
+}
+
using AppendFnType = decltype(&appendToGlobalCtors);
using TransformFnType = decltype(&transformGlobalCtors);
using ParamType = std::tuple<StringRef, AppendFnType, TransformFnType>;
|
It should be defined to 0 |
…@llvm.used if it already exists This new test fails with: /home/juamarti/llvm/_llvm/llvm/lib/IR/Constants.cpp:1327: static Constant *llvm::ConstantArray::getImpl(ArrayType *, ArrayRef<Constant *>): Assertion `C->getType() == Ty->getElementType() && "Wrong type in array element initializer"' failed.
…already exists At the moment, the pointer type stored in the llvm.compiler.used/llvm.used is not well fixed. The frontend uses a pointer to the default address space (which may not be 0; for example, it is 4 for SPIRV). This patch makes `appendToUsed/appendToCompilerUsed` match the behaviour in BitcodeWriter.cpp: if the variable already exists, preserve its element type, otherwise use `ptr addrspace (0)`. This fixes the following error in the newly added test: UtilsTests: /home/juamarti/llvm/_llvm/llvm/lib/IR/Constants.cpp:1327: static Constant *llvm::ConstantArray::getImpl(ArrayType *, ArrayRef<Constant *>): Assertion `C->getType() == Ty->getElementType() && "Wrong type in array element initializer"' failed.
In that case, should the LLVM-IR verifier (or at least the linter) reject bitcode where these variables have the wrong type? I still have to check what breaks exactly when clang emits an |
3d08e3a
to
7b445e0
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
Yes. verifier failure.
SPIR-V probably shouldn't just drop these on the floor. It probably will need special case code to stripPointerCasts and re-package the references into whatever SPIRV type it prefers |
#include "llvm/Support/SHA1.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include "llvm/TargetParser/Triple.h" | ||
#include "llvm/Transforms/Utils/ModuleUtils.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bitcode cannot depend on Transforms
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would moving these 3 functions to llvm/IR/UsedGlobals.h
(or something like that) work for you ?
- llvm::appendToUsed
- llvm::appendToCompilerUsed
- llvm::removeFromUsedLists
I'd really like to eventually use this elsewhere (in Clang's CodeGenModule
and in the BitcodeWriter
to avoid these kind of invariant violations in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably even to llvm/include/llvm/IR/GlobalValue.h
, since at the end of the day these are manipulating GlobalValue
.
At the moment, the address space for the pointer stored in the
llvm.compiler.used/llvm.used
is not well defined:ModuleUtils
use the 0 address space,This PR doesn't solve this issue. It only adds one test that documents one problem with it, and makes the BitcodeWriter use the llvm helpers to reduce the places where these variables change.
Without this patch, the test added in the PR fails with an assertion: