Skip to content

Commit

Permalink
[LLD] Search archives for symbol defs to override COMMON symbols.
Browse files Browse the repository at this point in the history
This patch changes the archive handling to enable the semantics needed
for legacy FORTRAN common blocks and block data. When we have a COMMON
definition of a symbol and are including an archive, LLD will now
search the members for global/weak defintions to override the COMMON
symbol. The previous LLD behavior (where a member would only be included
if it satisifed some other needed symbol definition) can be re-enabled with the
option '-no-fortran-common'.

Differential Revision: https://reviews.llvm.org/D86142
  • Loading branch information
mandlebug committed Dec 7, 2020
1 parent 733e2ae commit 8f91f38
Show file tree
Hide file tree
Showing 9 changed files with 326 additions and 0 deletions.
1 change: 1 addition & 0 deletions lld/ELF/Config.h
Expand Up @@ -163,6 +163,7 @@ struct Configuration {
bool fixCortexA53Errata843419;
bool fixCortexA8;
bool formatBinary = false;
bool fortranCommon;
bool gcSections;
bool gdbIndex;
bool gnuHash = false;
Expand Down
2 changes: 2 additions & 0 deletions lld/ELF/Driver.cpp
Expand Up @@ -974,6 +974,8 @@ static void readConfigs(opt::InputArgList &args) {
!args.hasArg(OPT_relocatable);
config->fixCortexA8 =
args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable);
config->fortranCommon =
args.hasFlag(OPT_fortran_common, OPT_no_fortran_common, true);
config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
Expand Down
89 changes: 89 additions & 0 deletions lld/ELF/InputFiles.cpp
Expand Up @@ -1237,6 +1237,88 @@ void ArchiveFile::fetch(const Archive::Symbol &sym) {
parseFile(file);
}

// The handling of tentative definitions (COMMON symbols) in archives is murky.
// A tentative defintion will be promoted to a global definition if there are no
// non-tentative definitions to dominate it. When we hold a tentative definition
// to a symbol and are inspecting archive memebers for inclusion there are 2
// ways we can proceed:
//
// 1) Consider the tentative definition a 'real' definition (ie promotion from
// tentative to real definition has already happened) and not inspect
// archive members for Global/Weak definitions to replace the tentative
// definition. An archive member would only be included if it satisfies some
// other undefined symbol. This is the behavior Gold uses.
//
// 2) Consider the tentative definition as still undefined (ie the promotion to
// a real definiton happens only after all symbol resolution is done).
// The linker searches archive memebers for global or weak definitions to
// replace the tentative definition with. This is the behavior used by
// GNU ld.
//
// The second behavior is inherited from SysVR4, which based it on the FORTRAN
// COMMON BLOCK model. This behavior is needed for proper initalizations in old
// (pre F90) FORTRAN code that is packaged into an archive.
//
// The following functions search archive members for defintions to replace
// tentative defintions (implementing behavior 2).
static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
StringRef archiveName) {
IRSymtabFile symtabFile = check(readIRSymtab(mb));
for (const irsymtab::Reader::SymbolRef &sym :
symtabFile.TheReader.symbols()) {
if (sym.isGlobal() && sym.getName() == symName)
return !sym.isUndefined() && !sym.isCommon();
}
return false;
}

template <class ELFT>
static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
StringRef archiveName) {
ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(mb, archiveName);
StringRef stringtable = obj->getStringTable();

for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
Expected<StringRef> name = sym.getName(stringtable);
if (name && name.get() == symName)
return sym.isDefined() && !sym.isCommon();
}
return false;
}

static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
StringRef archiveName) {
switch (getELFKind(mb, archiveName)) {
case ELF32LEKind:
return isNonCommonDef<ELF32LE>(mb, symName, archiveName);
case ELF32BEKind:
return isNonCommonDef<ELF32BE>(mb, symName, archiveName);
case ELF64LEKind:
return isNonCommonDef<ELF64LE>(mb, symName, archiveName);
case ELF64BEKind:
return isNonCommonDef<ELF64BE>(mb, symName, archiveName);
default:
llvm_unreachable("getELFKind");
}
}

bool ArchiveFile::shouldFetchForCommon(const Archive::Symbol &sym) {
Archive::Child c =
CHECK(sym.getMember(), toString(this) +
": could not get the member for symbol " +
toELFString(sym));
MemoryBufferRef mb =
CHECK(c.getMemoryBufferRef(),
toString(this) +
": could not get the buffer for the member defining symbol " +
toELFString(sym));

if (isBitcode(mb))
return isBitcodeNonCommonDef(mb, sym.getName(), getName());

return isNonCommonDef(mb, sym.getName(), getName());
}

size_t ArchiveFile::getMemberCount() const {
size_t count = 0;
Error err = Error::success();
Expand Down Expand Up @@ -1771,6 +1853,13 @@ template <class ELFT> void LazyObjFile::parse() {
}
}

bool LazyObjFile::shouldFetchForCommon(const StringRef &name) {
if (isBitcode(mb))
return isBitcodeNonCommonDef(mb, name, archiveName);

return isNonCommonDef(mb, name, archiveName);
}

std::string elf::replaceThinLTOSuffix(StringRef path) {
StringRef suffix = config->thinLTOObjectSuffixReplace.first;
StringRef repl = config->thinLTOObjectSuffixReplace.second;
Expand Down
8 changes: 8 additions & 0 deletions lld/ELF/InputFiles.h
Expand Up @@ -312,6 +312,10 @@ class LazyObjFile : public InputFile {
template <class ELFT> void parse();
void fetch();

// Check if a non-common symbol should be fetched to override a common
// definition.
bool shouldFetchForCommon(const StringRef &name);

bool fetched = false;

private:
Expand All @@ -331,6 +335,10 @@ class ArchiveFile : public InputFile {
// more than once.)
void fetch(const Archive::Symbol &sym);

// Check if a non-common symbol should be fetched to override a common
// definition.
bool shouldFetchForCommon(const Archive::Symbol &sym);

size_t getMemberCount() const;
size_t getFetchedMemberCount() const { return seen.size(); }

Expand Down
4 changes: 4 additions & 0 deletions lld/ELF/Options.td
Expand Up @@ -64,6 +64,10 @@ defm optimize_bb_jumps: BB<"optimize-bb-jumps",
"Remove direct jumps at the end to the next basic block",
"Do not remove any direct jumps at the end to the next basic block (default)">;

defm fortran_common : BB<"fortran-common",
"Search archive members for definitions to override COMMON symbols (default)",
"Do not search archive members for definitions to override COMMON symbols">;

defm split_stack_adjust_size
: Eq<"split-stack-adjust-size",
"Specify adjustment to stack size when a split-stack function calls a "
Expand Down
26 changes: 26 additions & 0 deletions lld/ELF/Symbols.cpp
Expand Up @@ -689,7 +689,33 @@ void Symbol::resolveDefined(const Defined &other) {
other.value);
}

template <class LazyT>
static void replaceCommon(Symbol &oldSym, const LazyT &newSym) {
backwardReferences.erase(&oldSym);
oldSym.replace(newSym);
newSym.fetch();
}

template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
// For common objects, we want to look for global or weak definitions that
// should be fetched as the cannonical definition instead.
if (isCommon() && elf::config->fortranCommon) {
if (auto *laSym = dyn_cast<LazyArchive>(&other)) {
ArchiveFile *archive = cast<ArchiveFile>(laSym->file);
const Archive::Symbol &archiveSym = laSym->sym;
if (archive->shouldFetchForCommon(archiveSym)) {
replaceCommon(*this, other);
return;
}
} else if (auto *loSym = dyn_cast<LazyObject>(&other)) {
LazyObjFile *obj = cast<LazyObjFile>(loSym->file);
if (obj->shouldFetchForCommon(loSym->getName())) {
replaceCommon(*this, other);
return;
}
}
}

if (!isUndefined()) {
// See the comment in resolveUndefined().
if (isDefined())
Expand Down
2 changes: 2 additions & 0 deletions lld/docs/ld.lld.1
Expand Up @@ -311,6 +311,8 @@ Do not demangle symbol names.
Inhibit output of an
.Li .interp
section.
.It Fl -no-fortran-common
Do not search archive members for definitions to override COMMON symbols.
.It Fl -no-gc-sections
Disable garbage collection of unused sections.
.It Fl -no-gnu-unique
Expand Down
182 changes: 182 additions & 0 deletions lld/test/ELF/common-archive-lookup.s
@@ -0,0 +1,182 @@
# REQUIRES: ppc

# RUN: rm -rf %t.dir
# RUN: split-file %s %t.dir
# RUN: cd %t.dir

## Object files.
# RUN: llvm-mc -triple=powerpc64le -filetype=obj ref.s -o %t1.o
# RUN: llvm-mc -triple=powerpc64le -filetype=obj refanddef.s -o %t2.o
# RUN: llvm-mc -triple=powerpc64le -filetype=obj def.s -o %tstrong_data_only.o
# RUN: llvm-mc -triple=powerpc64le -filetype=obj weak.s -o %tweak_data_only.o

# RUN: llvm-mc -triple=powerpc64le -filetype=obj main.s -o %tmain.o

## Object file archives.
# RUN: llvm-ar crs %t1.a %t1.o %tstrong_data_only.o
# RUN: llvm-ar crs %t2.a %t1.o %tweak_data_only.o
# RUN: llvm-ar crs %t3.a %t2.o %tstrong_data_only.o

## Bitcode files.
# RUN: llvm-as -o %t1.bc commonblock.ll
# RUN: llvm-as -o %t2.bc blockdata.ll

## Bitcode archive.
# RUN: llvm-ar crs %t4.a %t1.bc %t2.bc

# RUN: ld.lld -o %t1 %tmain.o %t1.a
# RUN: llvm-objdump -D -j .data %t1 | FileCheck --check-prefix=TEST1 %s

# RUN: ld.lld -o %t2 %tmain.o --start-lib %t1.o %tstrong_data_only.o --end-lib
# RUN: llvm-objdump -D -j .data %t2 | FileCheck --check-prefix=TEST1 %s

# RUN: ld.lld -o %t3 %tmain.o %t2.a
# RUN: llvm-objdump -D -j .data %t3 | FileCheck --check-prefix=TEST1 %s

# RUN: ld.lld -o %t4 %tmain.o --start-lib %t1.o %tweak_data_only.o --end-lib
# RUN: llvm-objdump -D -j .data %t4 | FileCheck --check-prefix=TEST1 %s

# RUN: ld.lld -o %t5 %tmain.o %t3.a --print-map | FileCheck --check-prefix=MAP %s

# RUN: ld.lld -o %t6 %tmain.o %t2.o %t1.a
# RUN: llvm-objdump -D -j .data %t6 | FileCheck --check-prefix=TEST2 %s

# RUN: ld.lld -o %t7 %tmain.o %t2.o --start-lib %t1.o %tstrong_data_only.o --end-lib
# RUN: llvm-objdump -D -j .data %t7 | FileCheck --check-prefix=TEST2 %s

# RUN: not ld.lld -o %t8 %tmain.o %t1.a %tstrong_data_only.o 2>&1 | \
# RUN: FileCheck --check-prefix=ERR %s

# RUN: not ld.lld -o %t9 %tmain.o --start-lib %t1.o %t2.o --end-lib %tstrong_data_only.o 2>&1 | \
# RUN: FileCheck --check-prefix=ERR %s

# ERR: ld.lld: error: duplicate symbol: block

# RUN: ld.lld --no-fortran-common -o %t10 %tmain.o %t1.a
# RUN: llvm-readobj --syms %t10 | FileCheck --check-prefix=NFC %s

# RUN: ld.lld --no-fortran-common -o %t11 %tmain.o --start-lib %t1.o %tstrong_data_only.o --end-lib
# RUN: llvm-readobj --syms %t11 | FileCheck --check-prefix=NFC %s

# RUN: ld.lld -o - %tmain.o %t4.a --lto-emit-asm | FileCheck --check-prefix=ASM %s

# RUN: ld.lld -o - %tmain.o --start-lib %t1.bc %t2.bc --end-lib --lto-emit-asm | \
# RUN: FileCheck --check-prefix=ASM %s

## Old FORTRAN that mixes use of COMMON blocks and BLOCK DATA requires that we
## search through archives for non-tentative definitions (from the BLOCK DATA)
## to replace the tentative definitions (from the COMMON block(s)).

## Ensure we have used the initialized definition of 'block' instead of a
## common definition.
# TEST1-LABEL: Disassembly of section .data:
# TEST1: <block>:
# TEST1-NEXT: ea 2e 44 54
# TEST1-NEXT: fb 21 09 40
# TEST1-NEXT: ...


# NFC: Name: block
# NFC-NEXT: Value:
# NFC-NEXT: Size: 40
# NFC-NEXT: Binding: Global (0x1)
# NFC-NEXT: Type: Object (0x1)
# NFC-NEXT: Other: 0
# NFC-NEXT: Section: .bss

## Expecting the strong definition from the object file, and the defintions from
## the archive do not interfere.
# TEST2-LABEL: Disassembly of section .data:
# TEST2: <block>:
# TEST2-NEXT: 03 57 14 8b
# TEST2-NEXT: 0a bf 05 40
# TEST2-NEXT: ...

# MAP: 28 8 {{.*}}tmp3.a(common-archive-lookup.s.tmp2.o):(.data)
# MAP-NEXT: 28 1 block

# ASM: .type block,@object
# ASM: block:
# ASM-NEXT: .long 5
# ASM: .size block, 20

#--- ref.s
.text
.abiversion 2
.global bar
.type bar,@function
bar:
addis 4, 2, block@toc@ha
addi 4, 4, block@toc@l

## Tentative definition of 'block'.
.comm block,40,8

#--- refanddef.s
## An alternate strong definition of block, in the same file as
## a different referenced symbol.
.text
.abiversion 2
.global bar
.type bar,@function
bar:
addis 4, 2, block@toc@ha
addi 4, 4, block@toc@l

.data
.type block,@object
.global block
.p2align 3
block:
.quad 0x4005bf0a8b145703 # double 2.7182818284589998
.space 32
.size block, 40

#--- def.s
## Strong definition of 'block'.
.data
.type block,@object
.global block
.p2align 3
block:
.quad 0x400921fb54442eea # double 3.1415926535900001
.space 32
.size block, 40

#--- weak.s
## Weak definition of `block`.
.data
.type block,@object
.weak block
.p2align 3
block:
.quad 0x400921fb54442eea # double 3.1415926535900001
.space 32
.size block, 40

#--- main.s
.global _start
_start:
bl bar
blr


#--- blockdata.ll
target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
target triple = "powerpc64le-unknown-linux-gnu"

@block = dso_local local_unnamed_addr global [5 x i32] [i32 5, i32 0, i32 0, i32 0, i32 0], align 4

#--- commonblock.ll
target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
target triple = "powerpc64le-unknown-linux-gnu"

@block = common dso_local local_unnamed_addr global [5 x i32] zeroinitializer, align 4

define dso_local i32 @bar(i32 signext %i) local_unnamed_addr {
entry:
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @block, i64 0, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 8
ret i32 %0
}
12 changes: 12 additions & 0 deletions lld/test/ELF/warn-backrefs.s
Expand Up @@ -66,6 +66,18 @@
# RUN: echo '.weak foo; foo:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tweak.o
# RUN: ld.lld --fatal-warnings --warn-backrefs --start-lib %tweak.o --end-lib %t1.o %t2.o -o /dev/null

## Check common symbols. A common sym might later be replaced by a non-common definition.
# RUN: echo '.comm obj, 4' | llvm-mc -filetype=obj -triple=x86_64 -o %tcomm.o
# RUN: echo '.type obj,@object; .data; .globl obj; .p2align 2; obj: .long 55; .size obj, 4' | llvm-mc -filetype=obj -triple=x86_64 -o %tstrong.o
# RUN: echo '.globl foo; foo: movl obj(%rip), %eax' | llvm-mc -triple=x86_64 -filetype=obj -o %t5.o
# RUN: llvm-ar rcs %tcomm.a %tcomm.o
# RUN: llvm-ar rcs %tstrong.a %tstrong.o
# RUN: ld.lld --warn-backrefs %tcomm.a %t1.o %t5.o 2>&1 -o /dev/null | FileCheck --check-prefix=COMM %s
# RUN: ld.lld --fatal-warnings --warn-backrefs %tcomm.a %t1.o %t5.o %tstrong.a 2>&1 -o /dev/null
# RUN: ld.lld --warn-backrefs --no-fortran-common %tcomm.a %t1.o %t5.o %tstrong.a 2>&1 -o /dev/null | FileCheck --check-prefix=COMM %s

# COMM: ld.lld: warning: backward reference detected: obj in {{.*}}5.o refers to {{.*}}comm.a

## If a lazy definition appears after the backward reference, don't warn.
## A traditional Unix linker will resolve the reference to the later definition.
# RUN: ld.lld --fatal-warnings --warn-backrefs %t2.a %t1.o %t2.a -o /dev/null
Expand Down

0 comments on commit 8f91f38

Please sign in to comment.