Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WebAssembly] Add segment RETAIN flag to support private retained data #81539

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
62 changes: 62 additions & 0 deletions lld/test/wasm/no-strip-segment.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/main.o %t/main.s
# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/liba_x.o %t/liba_x.s
# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/liba_y.o %t/liba_y.s
# RUN: rm -f %t/liba.a
# RUN: llvm-ar rcs %t/liba.a %t/liba_x.o %t/liba_y.o
# RUN: wasm-ld %t/main.o %t/liba.a --gc-sections -o %t/main.wasm --print-gc-sections | FileCheck %s --check-prefix=GC
# RUN: obj2yaml %t/main.wasm | FileCheck %s

# --gc-sections should remove non-retained and unused "weathers" section from live object liba_x.o
# GC: removing unused section {{.*}}/liba.a(liba_x.o):(weathers)
# Should not remove retained "greetings" sections from live objects main.o and liba_x.o
# GC-NOT: removing unused section %t/main.o:(greetings)
# GC-NOT: removing unused section %t/liba_x.o:(greetings)

# Note: All symbols are private so that they don't join the symbol table.

#--- main.s
.functype grab_liba () -> ()
.globl _start
_start:
.functype _start () -> ()
call grab_liba
end_function

.section greetings,"R",@
.asciz "hello"
.section weathers,"R",@
.asciz "cloudy"

#--- liba_x.s
.globl grab_liba
grab_liba:
.functype grab_liba () -> ()
end_function

.section greetings,"R",@
.asciz "world"
.section weathers,"",@
.asciz "rainy"

#--- liba_y.s
.section greetings,"R",@
.asciz "bye"


# "greetings" section
# CHECK: - Type: DATA
# CHECK: Segments:
# CHECK: - SectionOffset: 7
# CHECK: InitFlags: 0
# CHECK: Offset:
# CHECK: Opcode: I32_CONST
# CHECK: Value: 1024
# CHECK: Content: 68656C6C6F00776F726C6400
# "weahters" section.
# CHECK: - SectionOffset: 25
# CHECK: InitFlags: 0
# CHECK: Offset:
# CHECK: Opcode: I32_CONST
# CHECK: Value: 1036
# CHECK: Content: 636C6F75647900
1 change: 1 addition & 0 deletions lld/wasm/InputChunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class InputChunk {
void generateRelocationCode(raw_ostream &os) const;

bool isTLS() const { return flags & llvm::wasm::WASM_SEG_FLAG_TLS; }
bool isRetained() const { return flags & llvm::wasm::WASM_SEG_FLAG_RETAIN; }

ObjFile *file;
OutputSection *outputSec = nullptr;
Expand Down
39 changes: 31 additions & 8 deletions lld/wasm/MarkLive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ class MarkLive {

private:
void enqueue(Symbol *sym);
void enqueue(InputChunk *chunk);
void enqueueInitFunctions(const ObjFile *sym);
void enqueueRetainedSegments(const ObjFile *file);
void mark();
bool isCallCtorsLive();

Expand All @@ -56,21 +58,30 @@ void MarkLive::enqueue(Symbol *sym) {
LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");

InputFile *file = sym->getFile();
bool needInitFunctions = file && !file->isLive() && sym->isDefined();
bool markImplicitDeps = file && !file->isLive() && sym->isDefined();

sym->markLive();

// Mark ctor functions in the object that defines this symbol live.
// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
if (needInitFunctions)
if (markImplicitDeps) {
// Mark ctor functions in the object that defines this symbol live.
// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
enqueueInitFunctions(cast<ObjFile>(file));
// Mark retained segments in the object that defines this symbol live.
enqueueRetainedSegments(cast<ObjFile>(file));
}

if (InputChunk *chunk = sym->getChunk())
queue.push_back(chunk);
kateinoigakukun marked this conversation as resolved.
Show resolved Hide resolved
}

void MarkLive::enqueue(InputChunk *chunk) {
LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
chunk->live = true;
queue.push_back(chunk);
}

// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
Expand All @@ -83,6 +94,14 @@ void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
}
}

// Mark segments flagged by segment-level no-strip. Segment-level no-strip is
// usually used to retain segments without having symbol table entry.
void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
for (InputChunk *chunk : file->segments)
if (chunk->isRetained())
enqueue(chunk);
}

void MarkLive::run() {
// Add GC root symbols.
if (!config->entry.empty())
Expand All @@ -96,10 +115,14 @@ void MarkLive::run() {
if (WasmSym::callDtors)
enqueue(WasmSym::callDtors);

// Enqueue constructors in objects explicitly live from the command-line.
for (const ObjFile *obj : ctx.objectFiles)
if (obj->isLive())
if (obj->isLive()) {
// Enqueue constructors in objects explicitly live from the command-line.
enqueueInitFunctions(obj);
// Enqueue retained segments in objects explicitly live from the
// command-line.
enqueueRetainedSegments(obj);
}

mark();

Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/BinaryFormat/Wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ enum WasmSymbolType : unsigned {
enum WasmSegmentFlag : unsigned {
WASM_SEG_FLAG_STRINGS = 0x1,
WASM_SEG_FLAG_TLS = 0x2,
WASM_SEG_FLAG_RETAIN = 0x4,
};

// Kinds of tag attributes.
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,14 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {

class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile {
mutable unsigned NextUniqueID = 0;
SmallPtrSet<GlobalObject *, 2> Used;

public:
TargetLoweringObjectFileWasm() = default;
~TargetLoweringObjectFileWasm() override = default;

void getModuleMetadata(Module &M) override;

MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
const TargetMachine &TM) const override;

Expand Down
29 changes: 22 additions & 7 deletions llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2142,7 +2142,7 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) {
return C;
}

static unsigned getWasmSectionFlags(SectionKind K) {
static unsigned getWasmSectionFlags(SectionKind K, bool Retain) {
unsigned Flags = 0;

if (K.isThreadLocal())
Expand All @@ -2151,11 +2151,22 @@ static unsigned getWasmSectionFlags(SectionKind K) {
if (K.isMergeableCString())
Flags |= wasm::WASM_SEG_FLAG_STRINGS;

if (Retain)
Flags |= wasm::WASM_SEG_FLAG_RETAIN;

// TODO(sbc): Add suport for K.isMergeableConst()

return Flags;
}

void TargetLoweringObjectFileWasm::getModuleMetadata(Module &M) {
SmallVector<GlobalValue *, 4> Vec;
collectUsedGlobalVariables(M, Vec, false);
for (GlobalValue *GV : Vec)
if (auto *GO = dyn_cast<GlobalObject>(GV))
Used.insert(GO);
}

MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// We don't support explict section names for functions in the wasm object
Expand All @@ -2179,16 +2190,18 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
Group = C->getName();
}

unsigned Flags = getWasmSectionFlags(Kind);
unsigned Flags = getWasmSectionFlags(Kind, Used.count(GO));
MCSectionWasm *Section = getContext().getWasmSection(
Name, Kind, Flags, Group, MCContext::GenericSectionID);

return Section;
}

static MCSectionWasm *selectWasmSectionForGlobal(
MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {
static MCSectionWasm *
selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection,
unsigned *NextUniqueID, bool Retain) {
StringRef Group = "";
if (const Comdat *C = getWasmComdat(GO)) {
Group = C->getName();
Expand All @@ -2213,7 +2226,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
(*NextUniqueID)++;
}

unsigned Flags = getWasmSectionFlags(Kind);
unsigned Flags = getWasmSectionFlags(Kind, Retain);
return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID);
}

Expand All @@ -2231,9 +2244,11 @@ MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
else
EmitUniqueSection = TM.getDataSections();
EmitUniqueSection |= GO->hasComdat();
bool Retain = Used.count(GO);
EmitUniqueSection |= Retain;

return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
EmitUniqueSection, &NextUniqueID);
EmitUniqueSection, &NextUniqueID, Retain);
}

bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/MC/MCParser/WasmAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ class WasmAsmParser : public MCAsmParserExtension {
case 'S':
flags |= wasm::WASM_SEG_FLAG_STRINGS;
break;
case 'R':
flags |= wasm::WASM_SEG_FLAG_RETAIN;
break;
default:
return -1U;
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/MC/MCSectionWasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ void MCSectionWasm::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << 'S';
if (SegmentFlags & wasm::WASM_SEG_FLAG_TLS)
OS << 'T';
if (SegmentFlags & wasm::WASM_SEG_FLAG_RETAIN)
OS << 'R';
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this match ELF?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, ELF has a similar flag named SHF_GNU_RETAIN and it's represented as 'R'

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we mirror that name and use WASM_SEG_FLAG_RETAIN as the name of our flag?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose NO_STRIP to align with the existing symbol-level flag WASM_SYMBOL_NO_STRIP, but I don't have strong opinions here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like to mirror the ELF flag names and I like that that assembly format "R" mnemonic matches, but I don't feel strongly. @MaskRay WDYT?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record, it looks like SHF_MIPS_NOSTRIP also exists in the ELF world and SHF_GNU_RETAIN is a GNU extension rather than part of ELF core.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on how WASM_SYMBOL_NO_STRIP was chosen as a symbol flag name in https://reviews.llvm.org/D62542#1519799, I agree we don't need to align with it because the segment flag is not based on .no_dead_strip. Now I feel WASM_SEG_FLAG_RETAIN makes more sense 👍


OS << '"';

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjectYAML/WasmYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ void ScalarBitSetTraits<WasmYAML::SegmentFlags>::bitset(
#define BCase(X) IO.bitSetCase(Value, #X, wasm::WASM_SEG_FLAG_##X)
BCase(STRINGS);
BCase(TLS);
BCase(RETAIN);
#undef BCase
}

Expand Down
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/WebAssembly/no-strip.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
; RUN: llc < %s --mtriple=wasm32-unknown-unknown | FileCheck %s

@llvm.used = appending global [
5 x ptr
] [
ptr @ga, ptr @gb, ptr @gc, ptr @gd, ptr @ge
], section "llvm.metadata"

; CHECK: .section .data.ga,"R",@
@ga = global i32 42
; CHECK: .section .data.gb,"R",@
@gb = internal global i32 41
; CHECK: .section .data..Lgc,"R",@
@gc = private global i32 40
; CHECK: .section .rodata.gd,"R",@
@gd = constant i32 39

; All sections with the same explicit name are flagged as retained if a part of them is retained.
; CHECK: .section dddd,"R",@
@ge = global i32 38, section "dddd"
; CHECK: .section dddd,"R",@
@gg = global i32 37, section "dddd"
76 changes: 62 additions & 14 deletions llvm/test/MC/WebAssembly/no-dead-strip.ll
Original file line number Diff line number Diff line change
@@ -1,21 +1,69 @@
; RUN: llc -filetype=obj -wasm-keep-registers %s -o - | llvm-readobj --symbols - | FileCheck %s
; RUN: llc < %s --mtriple=wasm32-unknown-unknown -filetype=obj -wasm-keep-registers -o - | obj2yaml - | FileCheck %s

target triple = "wasm32-unknown-unknown"

@llvm.used = appending global [1 x ptr] [ptr @foo], section "llvm.metadata"
@llvm.used = appending global [5 x ptr] [
ptr @foo, ptr @gv0, ptr @gv1, ptr @gv2, ptr @gv3
], section "llvm.metadata"

define i32 @foo() {
entry:
ret i32 0
}

; CHECK: Symbols [
; CHECK-NEXT: Symbol {
; CHECK-NEXT: Name: foo
; CHECK-NEXT: Type: FUNCTION (0x0)
; CHECK-NEXT: Flags [ (0x80)
; CHECK-NEXT: NO_STRIP (0x80)
; CHECK-NEXT: ]
; CHECK-NEXT: ElementIndex: 0x0
; CHECK-NEXT: }
; CHECK-NEXT: ]
; externally visible GV has NO_STRIP/RETAIN in both symtab entry and segment info
@gv0 = global i32 42
; internal GV has NO_STRIP/RETAIN in both symtab entry and segment info
@gv1 = internal global i32 41
; private GV has RETAIN in segment info only (no symtab entry)
@gv2 = private global i32 40
; explicit section names
@gv3 = global i32 39, section "ddd.hello"
@gv4.not.used = global i64 38, section "ddd.hello"

; CHECK: SymbolTable:
; CHECK-NEXT: - Index: 0
; CHECK-NEXT: Kind: FUNCTION
; CHECK-NEXT: Name: foo
; CHECK-NEXT: Flags: [ NO_STRIP ]
; CHECK-NEXT: Function: 0
; CHECK-NEXT: - Index: 1
; CHECK-NEXT: Kind: DATA
; CHECK-NEXT: Name: gv0
; CHECK-NEXT: Flags: [ NO_STRIP ]
; CHECK-NEXT: Segment: 0
; CHECK-NEXT: Size: 4
; CHECK-NEXT: - Index: 2
; CHECK-NEXT: Kind: DATA
; CHECK-NEXT: Name: gv1
; CHECK-NEXT: Flags: [ BINDING_LOCAL, NO_STRIP ]
; CHECK-NEXT: Segment: 1
; CHECK-NEXT: Size: 4
; CHECK-NEXT: - Index: 3
; CHECK-NEXT: Kind: DATA
; CHECK-NEXT: Name: gv3
; CHECK-NEXT: Flags: [ NO_STRIP ]
; CHECK-NEXT: Segment: 3
; CHECK-NEXT: Size: 4
; CHECK-NEXT: - Index: 4
; CHECK-NEXT: Kind: DATA
; CHECK-NEXT: Name: gv4.not.used
; CHECK-NEXT: Flags: [ ]
; CHECK-NEXT: Segment: 3
; CHECK-NEXT: Offset: 8
; CHECK-NEXT: Size: 8
; CHECK-NEXT: SegmentInfo:
; CHECK-NEXT: - Index: 0
; CHECK-NEXT: Name: .data.gv0
; CHECK-NEXT: Alignment: 2
; CHECK-NEXT: Flags: [ RETAIN ]
; CHECK-NEXT: - Index: 1
; CHECK-NEXT: Name: .data.gv1
; CHECK-NEXT: Alignment: 2
; CHECK-NEXT: Flags: [ RETAIN ]
; CHECK-NEXT: - Index: 2
; CHECK-NEXT: Name: .data..Lgv2
; CHECK-NEXT: Alignment: 2
; CHECK-NEXT: Flags: [ RETAIN ]
; CHECK-NEXT: - Index: 3
; CHECK-NEXT: Name: ddd.hello
; CHECK-NEXT: Alignment: 3
; CHECK-NEXT: Flags: [ RETAIN ]