Skip to content

Commit

Permalink
[IRSymTab] Set FB_used on llvm.compiler.used symbols
Browse files Browse the repository at this point in the history
IR symbol table does not parse inline asm. A symbol only referenced by inline
asm is not in the IR symbol table, so LTO does not know that the definition (in
another translation unit) is referenced and may internalize it, even if that
definition has `__attribute__((used))` (which lowers to `llvm.compiler.used` on
ELF targets since D97446).

```
// cabac.c
__attribute__((used)) const uint8_t ff_h264_cabac_tables[...] = {...};

// h264_cabac.c
  asm("lea ff_h264_cabac_tables(%rip), %0" : ...);
```

`__attribute__((used))` is the recommended way to tell the compiler there may
be inline asm references, so the usage is perfectly fine. This patch
conservatively sets the `FB_used` bit on `llvm.compiler.used` symbols to work
around the IR symbol table limitation. Note: before D97446, Clang never emitted
symbols in the `llvm.compiler.used` list, so this change does not punish any
Clang emitted global object.

Without the patch, `ff_h264_cabac_tables` may be assigned to a non-external
partition and get internalized. Then we will get a linker error because the
`cabac.c` definition is not exposed.

Differential Revision: https://reviews.llvm.org/D97755
  • Loading branch information
MaskRay committed Mar 4, 2021
1 parent 0b274ed commit 584cb67
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 2 deletions.
29 changes: 29 additions & 0 deletions clang/test/CodeGen/thinlto-inline-asm2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// REQUIRES: x86-registered-target

// RUN: split-file %s %t
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -flto=thin -emit-llvm-bc %t/a.c -o %t/a.bc
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -flto=thin -emit-llvm-bc %t/b.c -o %t/b.bc
// RUN: llvm-nm %t/a.bc | FileCheck %s --check-prefix=NM

// RUN: llvm-lto2 run %t/a.bc %t/b.bc -o %t/out -save-temps -r=%t/a.bc,ref,plx -r=%t/b.bc,ff_h264_cabac_tables,pl
// RUN: llvm-dis < %t/out.2.2.internalize.bc | FileCheck %s

//--- a.c
/// IR symtab does not track inline asm symbols, so we don't know
/// ff_h264_cabac_tables is undefined.
// NM-NOT: {{.}}
// NM: ---------------- T ref
// NM-NOT: {{.}}
const char *ref() {
const char *ret;
asm("lea ff_h264_cabac_tables(%%rip), %0" : "=r"(ret));
return ret;
}

//--- b.c
/// ff_h264_cabac_tables has __attribute__((used)) in the source code, which means
/// its definition must be retained because there can be references the compiler
/// cannot see (inline asm reference). Test we don't internalize it.
// CHECK: @ff_h264_cabac_tables = dso_local constant [1 x i8] c"\09"
__attribute__((used))
const char ff_h264_cabac_tables[1] = "\x09";
4 changes: 2 additions & 2 deletions llvm/lib/LTO/LTO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,8 +563,8 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,

// Set the partition to external if we know it is re-defined by the linker
// with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
// regular object, is referenced from llvm.compiler_used, or was already
// recorded as being referenced from a different partition.
// regular object, is referenced from llvm.compiler.used/llvm.used, or was
// already recorded as being referenced from a different partition.
if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
(GlobalRes.Partition != GlobalResolution::Unknown &&
GlobalRes.Partition != Partition)) {
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Object/IRSymtab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,20 @@ Error Builder::addModule(Module *M) {
return make_error<StringError>("input module has no datalayout",
inconvertibleErrorCode());

// Symbols in the llvm.used list will get the FB_Used bit and will not be
// internalized. We do this for llvm.compiler.used as well:
//
// IR symbol table tracks module-level asm symbol references but not inline
// asm. A symbol only referenced by inline asm is not in the IR symbol table,
// so we may not know that the definition (in another translation unit) is
// referenced. That definition may have __attribute__((used)) (which lowers to
// llvm.compiler.used on ELF targets) to communicate to the compiler that it
// may be used by inline asm. The usage is perfectly fine, so we treat
// llvm.compiler.used conservatively as llvm.used to work around our own
// limitation.
SmallVector<GlobalValue *, 4> UsedV;
collectUsedGlobalVariables(*M, UsedV, /*CompilerUsed=*/false);
collectUsedGlobalVariables(*M, UsedV, /*CompilerUsed=*/true);
SmallPtrSet<GlobalValue *, 4> Used(UsedV.begin(), UsedV.end());

ModuleSymbolTable Msymtab;
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/ThinLTO/X86/asm.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; RUN: split-file %s %t
; RUN: opt -module-summary %t/a.s -o %t/a.bc
; RUN: opt -module-summary %t/b.s -o %t/b.bc
; RUN: llvm-nm %t/a.bc | FileCheck %s --check-prefix=NM

; RUN: llvm-lto2 run %t/a.bc %t/b.bc -o %t/out -save-temps -r=%t/a.bc,ref,plx -r=%t/b.bc,ff_h264_cabac_tables,pl
; RUN: llvm-dis < %t/out.2.2.internalize.bc | FileCheck %s

;--- a.s
;; IR symtab does not track inline asm symbols, so we don't know
;; ff_h264_cabac_tables is undefined.
; NM-NOT: {{.}}
; NM: ---------------- T ref
; NM-NOT: {{.}}
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i8* @ref() {
entry:
%0 = tail call i8* asm sideeffect "lea ff_h264_cabac_tables(%rip), $0", "=&r,~{dirflag},~{fpsr},~{flags}"()
ret i8* %0
}

;--- b.s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

;; ff_h264_cabac_tables has __attribute__((used)) in the source code, which means
;; its definition must be retained because there can be references the compiler
;; cannot see (inline asm reference). Test we don't internalize it.
; CHECK: @ff_h264_cabac_tables = dso_local constant [1 x i8] c"\09"
@ff_h264_cabac_tables = dso_local constant [1 x i8] c"\09"

@llvm.compiler.used = appending global [1 x i8*] [i8* bitcast ([1 x i8]* @ff_h264_cabac_tables to i8*)], section "llvm.metadata"
11 changes: 11 additions & 0 deletions llvm/test/tools/gold/X86/emit-llvm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ define hidden void @f1() {
ret void
}

;; f2 is not internalized because it is in the llvm.used list.
; CHECK-DAG: define hidden void @f2()
; OPT-DAG: define hidden void @f2()
define hidden void @f2() {
Expand Down Expand Up @@ -97,6 +98,15 @@ define i32* @f8() {
ret i32* @g8
}

;; f9 is not internalized because it is in the llvm.compiler.used list.
; CHECK-DAG: define hidden void @f9()
; OPT-DAG: define hidden void @f9()
define hidden void @f9() {
ret void
}

@llvm.compiler.used = appending global [1 x i8*] [ i8* bitcast (void ()* @f9 to i8*)]

; RES: .o,f1,pl{{$}}
; RES: .o,f2,pl{{$}}
; RES: .o,f3,px{{$}}
Expand All @@ -105,6 +115,7 @@ define i32* @f8() {
; RES: .o,f6,p{{$}}
; RES: .o,f7,px{{$}}
; RES: .o,f8,px{{$}}
; RES: .o,f9,pl{{$}}
; RES: .o,g1,px{{$}}
; RES: .o,g2,p{{$}}
; RES: .o,g3,p{{$}}
Expand Down

0 comments on commit 584cb67

Please sign in to comment.