Skip to content

Commit

Permalink
[LTO][MC] Discard non-prevailing defined symbols in module-level asse…
Browse files Browse the repository at this point in the history
…mbly

This is the alternative approach to D96931.

In LTO, for each module with inlineasm block, prepend directive ".lto_discard <sym>, <sym>*" to the beginning of the inline
asm.  ".lto_discard" is both a module inlineasm block marker and (optionally) provides a list of symbols to be discarded.

In MC while emitting for inlineasm, discard symbol binding & symbol
definitions according to ".lto_disard".

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D98762
  • Loading branch information
Yuanfang Chen committed Mar 18, 2021
1 parent 2df65f8 commit b4a8c0e
Show file tree
Hide file tree
Showing 8 changed files with 231 additions and 3 deletions.
1 change: 0 additions & 1 deletion llvm/include/llvm/MC/MCContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,6 @@ namespace llvm {

void initInlineSourceManager();
SourceMgr *getInlineSourceManager() {
assert(InlineSrcMgr);
return InlineSrcMgr.get();
}
std::vector<const MDNode *> &getLocInfos() { return LocInfos; }
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/MC/MCParser/MCAsmParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ class MCAsmParser {
virtual void setParsingMSInlineAsm(bool V) = 0;
virtual bool isParsingMSInlineAsm() = 0;

virtual bool discardLTOSymbol(StringRef) const { return false; }

virtual bool isParsingMasm() const { return false; }

virtual bool defineMacro(StringRef Name, StringRef Value) { return true; }
Expand Down
30 changes: 29 additions & 1 deletion llvm/lib/LTO/LTO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
//===----------------------------------------------------------------------===//

#include "llvm/LTO/LTO.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
Expand Down Expand Up @@ -752,6 +754,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
Skip();

std::set<const Comdat *> NonPrevailingComdats;
SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
Expand Down Expand Up @@ -798,7 +801,14 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
DefaultStorageClass);
}
} else if (auto *AS = Msym.dyn_cast<ModuleSymbolTable::AsmSymbol *>()) {
// Collect non-prevailing symbols.
if (!Res.Prevailing)
NonPrevailingAsmSymbols.insert(AS->first);
} else {
llvm_unreachable("unknown symbol type");
}

// Common resolution: collect the maximum size/alignment over all commons.
// We also record if we see an instance of a common as prevailing, so that
// if none is prevailing we can ignore it later.
Expand All @@ -812,11 +822,29 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
CommonRes.Align = max(*SymAlign, CommonRes.Align);
CommonRes.Prevailing |= Res.Prevailing;
}

}

if (!M.getComdatSymbolTable().empty())
for (GlobalValue &GV : M.global_values())
handleNonPrevailingComdat(GV, NonPrevailingComdats);

// Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
// block.
if (!M.getModuleInlineAsm().empty()) {
std::string NewIA = ".lto_discard";
if (!NonPrevailingAsmSymbols.empty()) {
// Don't dicard a symbol if there is a live .symver for it.
ModuleSymbolTable::CollectAsmSymvers(
M, [&](StringRef Name, StringRef Alias) {
if (!NonPrevailingAsmSymbols.count(Alias))
NonPrevailingAsmSymbols.erase(Name);
});
NewIA += " " + llvm::join(NonPrevailingAsmSymbols, ", ");
}
NewIA += "\n";
M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
}

assert(MsymI == MsymE);
return std::move(Mod);
}
Expand Down
48 changes: 47 additions & 1 deletion llvm/lib/MC/MCParser/AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
Expand Down Expand Up @@ -168,6 +169,8 @@ class AsmParser : public MCAsmParser {
/// List of forward directional labels for diagnosis at the end.
SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;

SmallSet<StringRef, 2> LTODiscardSymbols;

/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
unsigned AssemblerDialect = ~0U;

Expand Down Expand Up @@ -235,6 +238,10 @@ class AsmParser : public MCAsmParser {
}
bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }

bool discardLTOSymbol(StringRef Name) const override {
return LTODiscardSymbols.contains(Name);
}

bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
SmallVectorImpl<std::pair<void *,bool>> &OpDecls,
Expand Down Expand Up @@ -516,6 +523,7 @@ class AsmParser : public MCAsmParser {
DK_ADDRSIG,
DK_ADDRSIG_SYM,
DK_PSEUDO_PROBE,
DK_LTO_DISCARD,
DK_END
};

Expand Down Expand Up @@ -682,6 +690,9 @@ class AsmParser : public MCAsmParser {
// .pseudoprobe
bool parseDirectivePseudoProbe();

// ".lto_discard"
bool parseDirectiveLTODiscard();

// Directives to support address-significance tables.
bool parseDirectiveAddrsig();
bool parseDirectiveAddrsigSym();
Expand Down Expand Up @@ -892,6 +903,8 @@ bool AsmParser::enabledGenDwarfForAssembly() {
}

bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
LTODiscardSymbols.clear();

// Create the initial section, if requested.
if (!NoInitialTextSection)
Out.InitSections(false);
Expand Down Expand Up @@ -1770,7 +1783,6 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
StringMap<DirectiveKind>::const_iterator DirKindIt =
DirectiveKindMap.find(IDVal.lower());
DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())

? DK_NO_DIRECTIVE
: DirKindIt->getValue();
switch (DirKind) {
Expand Down Expand Up @@ -1868,6 +1880,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
Lex();
}

if (discardLTOSymbol(IDVal))
return false;

getTargetParser().doBeforeLabelEmit(Sym);

// Emit the label.
Expand Down Expand Up @@ -2208,6 +2223,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveAddrsigSym();
case DK_PSEUDO_PROBE:
return parseDirectivePseudoProbe();
case DK_LTO_DISCARD:
return parseDirectiveLTODiscard();
}

return Error(IDLoc, "unknown directive");
Expand Down Expand Up @@ -2852,6 +2869,9 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
return false;
}

if (discardLTOSymbol(Name))
return false;

// Do the assignment.
Out.emitAssignment(Sym, Value);
if (NoDeadStrip)
Expand Down Expand Up @@ -4870,6 +4890,10 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
SMLoc Loc = getTok().getLoc();
if (parseIdentifier(Name))
return Error(Loc, "expected identifier");

if (discardLTOSymbol(Name))
return false;

MCSymbol *Sym = getContext().getOrCreateSymbol(Name);

// Assembler local symbols don't make any sense here. Complain loudly.
Expand Down Expand Up @@ -5493,6 +5517,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".addrsig"] = DK_ADDRSIG;
DirectiveKindMap[".addrsig_sym"] = DK_ADDRSIG_SYM;
DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE;
DirectiveKindMap[".lto_discard"] = DK_LTO_DISCARD;
}

MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
Expand Down Expand Up @@ -5806,6 +5831,27 @@ bool AsmParser::parseDirectivePseudoProbe() {
return false;
}

/// parseDirectiveLTODiscard
/// ::= ".lto_discard" [ identifier ( , identifier )* ]
/// The LTO library emits this directive to discard non-prevailing symbols.
/// We ignore symbol assignments and attribute changes for the specified
/// symbols.
bool AsmParser::parseDirectiveLTODiscard() {
auto ParseOp = [&]() -> bool {
StringRef Name;
SMLoc Loc = getTok().getLoc();
if (parseIdentifier(Name))
return Error(Loc, "expected identifier");
LTODiscardSymbols.insert(Name);
return false;
};

LTODiscardSymbols.clear();
if (parseMany(ParseOp))
return addErrorSuffix(" in directive");
return false;
}

// We are comparing pointers, but the pointers are relative to a single string.
// Thus, this should always be deterministic.
static int rewritesSort(const AsmRewrite *AsmRewriteA,
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/MC/MCParser/ELFAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");

if (getParser().discardLTOSymbol(Name)) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
continue;
}

MCSymbol *Sym = getContext().getOrCreateSymbol(Name);

getStreamer().emitSymbolAttribute(Sym, Attr);
Expand Down
87 changes: 87 additions & 0 deletions llvm/test/LTO/X86/inline-asm-lto-discard.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; Check that non-prevailing symbols in module inline assembly are discarded
; during regular LTO otherwise the final symbol binding could be wrong.

; RUN: split-file %s %t
; RUN: opt %t/t1.ll -o %t1
; RUN: opt %t/t2.ll -o %t2
; RUN: opt %t/t3.ll -o %t3
; RUN: opt %t/t4.ll -o %t4

; RUN: llvm-lto2 run -o %to1 -save-temps %t1 %t2 \
; RUN: -r %t1,foo,px \
; RUN: -r %t2,foo, \
; RUN: -r %t2,bar,pl
; RUN: llvm-dis < %to1.0.0.preopt.bc | FileCheck %s --check-prefix=ASM1
; RUN: llvm-nm %to1.0 | FileCheck %s --check-prefix=SYM
; RUN: llvm-objdump -d --disassemble-symbols=foo %to1.0 \
; RUN: | FileCheck %s --check-prefix=DEF

; RUN: llvm-lto2 run -o %to2 -save-temps %t2 %t3 \
; RUN: -r %t2,foo, \
; RUN: -r %t2,bar,pl \
; RUN: -r %t3,foo,px
; RUN: llvm-dis < %to2.0.0.preopt.bc | FileCheck %s --check-prefix=ASM2
; RUN: llvm-nm %to2.0 | FileCheck %s --check-prefix=SYM
; RUN: llvm-objdump -d --disassemble-symbols=foo %to2.0 \
; RUN: | FileCheck %s --check-prefix=DEF

; Check that ".symver" is properly handled.
; RUN: llvm-lto2 run -o %to3 -save-temps %t4 \
; RUN: -r %t4,bar, \
; RUN: -r %t4,foo, \
; RUN: -r %t4,foo@@VER1,px
; RUN: llvm-dis < %to3.0.0.preopt.bc | FileCheck %s --check-prefix=ASM3

; ASM1: module asm ".lto_discard foo"
; ASM1-NEXT: module asm ".weak foo"
; ASM1-NEXT: module asm ".equ foo,bar"

; ASM2: module asm ".lto_discard foo"
; ASM2-NEXT: module asm ".weak foo"
; ASM2-NEXT: module asm ".equ foo,bar"
; ASM2-NEXT: module asm ".lto_discard"
; ASM2-NEXT: module asm " .global foo ; foo: leal 2(%rdi), %eax"

; ASM3-NOT: module asm ".lto_discard foo"

; SYM: T foo

; DEF: leal 2(%rdi), %eax

;--- t1.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define dso_local i32 @foo(i32 %0) {
%2 = add nsw i32 %0, 2
ret i32 %2
}

;--- t2.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

module asm ".weak foo"
module asm ".equ foo,bar"

@llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @bar to i8*)], section "llvm.metadata"

define internal i32 @bar(i32 %0) {
%2 = add nsw i32 %0, 1
ret i32 %2
}

;--- t3.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

module asm " .global foo ; foo: leal 2(%rdi), %eax"

;--- t4.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

module asm ".global foo"
module asm "foo: call bar"
module asm ".symver foo,foo@@@VER1"
module asm ".symver bar,bar@@@VER1"
29 changes: 29 additions & 0 deletions llvm/test/LTO/X86/inline-asm-lto-discard2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; Check that
; 1. ".lto_discard" works as module inlineasm marker and its argument symbols
; are discarded.
; 2. there is no reassignment error in the presence of ".lto_discard"
; RUN: llc < %s | FileCheck %s

; CHECK: .data
; CHECK-NOT: .weak foo
; CHECK-NOT: .set foo, bar
; CHECK: .globl foo
; CHECK: foo:
; CHECK: .byte 1

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

module asm ".lto_discard foo"
module asm " .text"
module asm "bar:"
module asm " .data"
module asm ".weak foo"
module asm ".set foo, bar"
module asm ".weak foo"
module asm ".set foo, bar"

module asm ".lto_discard"
module asm ".globl foo"
module asm "foo:"
module asm " .byte 1"
31 changes: 31 additions & 0 deletions llvm/test/MC/ELF/lto-discard.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Check that ".lto_discard" ignores symbol assignments and attribute changes
// for the specified symbols.
// RUN: llvm-mc -triple x86_64 < %s | FileCheck %s

// Check that ".lto_discard" only accepts identifiers.
// RUN: not llvm-mc -filetype=obj -triple x86_64 --defsym ERR=1 %s 2>&1 |\
// RUN: FileCheck %s --check-prefix=ERR

// CHECK: .weak foo
// CHECK: foo:
// CHECK: .byte 1
// CHECK: .weak bar
// CHECK: bar:
// CHECK: .byte 2

.lto_discard foo
.weak foo
foo:
.byte 1

.lto_discard
.weak bar
bar:
.byte 2


.ifdef ERR
.text
# ERR: {{.*}}.s:[[#@LINE+1]]:14: error: expected identifier in directive
.lto_discard 1
.endif

0 comments on commit b4a8c0e

Please sign in to comment.