Skip to content

Commit

Permalink
[PDB] Fix linking of function symbols and local variables.
Browse files Browse the repository at this point in the history
The compiler outputs PROC32_ID symbols into the object files
for functions, and these symbols have an embedded type index
which, when copied to the PDB, refer to the IPI stream.  However,
the symbols themselves are also converted into regular symbols
(e.g. S_GPROC32_ID -> S_GPROC32), and type indices in the regular
symbol records refer to the TPI stream.  So this patch applies
two fixes to function records.
  1. It converts ID symbols to the proper non-ID record type.
  2. After remapping the type index from the object file's index
     space to the PDB file/IPI stream's index space, it then
     remaps that index to the TPI stream's index space by.

Besides functions, during the remapping process we were also
discarding symbol record types which we did not recognize.
In particular, we were discarding S_BPREL32 records, which is
what MSVC uses to describe local variables on the stack.  So
this patch fixes that as well by copying them to the PDB.

Differential Revision: https://reviews.llvm.org/D36426

llvm-svn: 310394
  • Loading branch information
Zachary Turner committed Aug 8, 2017
1 parent e502f00 commit 59e3ae8
Show file tree
Hide file tree
Showing 14 changed files with 207 additions and 44 deletions.
81 changes: 66 additions & 15 deletions lld/COFF/PDB.cpp
Expand Up @@ -17,6 +17,7 @@
#include "llvm/DebugInfo/CodeView/CVDebugRecord.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
Expand Down Expand Up @@ -298,6 +299,7 @@ static bool remapTypeIndex(TypeIndex &TI, ArrayRef<TypeIndex> TypeIndexMap) {
static void remapTypesInSymbolRecord(ObjFile *File,
MutableArrayRef<uint8_t> Contents,
const CVIndexMap &IndexMap,
const TypeTableBuilder &IDTable,
ArrayRef<TiReference> TypeRefs) {
for (const TiReference &Ref : TypeRefs) {
unsigned ByteSize = Ref.Count * sizeof(TypeIndex);
Expand All @@ -322,11 +324,55 @@ static void remapTypesInSymbolRecord(ObjFile *File,
}
}

/// MSVC translates S_PROC_ID_END to S_END.
uint16_t canonicalizeSymbolKind(SymbolKind Kind) {
if (Kind == SymbolKind::S_PROC_ID_END)
return SymbolKind::S_END;
return Kind;
static SymbolKind symbolKind(ArrayRef<uint8_t> RecordData) {
const RecordPrefix *Prefix =
reinterpret_cast<const RecordPrefix *>(RecordData.data());
return static_cast<SymbolKind>(uint16_t(Prefix->RecordKind));
}

/// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32
static void translateIdSymbols(MutableArrayRef<uint8_t> &RecordData,
const TypeTableBuilder &IDTable) {
RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(RecordData.data());

SymbolKind Kind = symbolKind(RecordData);

if (Kind == SymbolKind::S_PROC_ID_END) {
Prefix->RecordKind = SymbolKind::S_END;
return;
}

// In an object file, GPROC32_ID has an embedded reference which refers to the
// single object file type index namespace. This has already been translated
// to the PDB file's ID stream index space, but we need to convert this to a
// symbol that refers to the type stream index space. So we remap again from
// ID index space to type index space.
if (Kind == SymbolKind::S_GPROC32_ID || Kind == SymbolKind::S_LPROC32_ID) {
SmallVector<TiReference, 1> Refs;
auto Content = RecordData.drop_front(sizeof(RecordPrefix));
CVSymbol Sym(Kind, RecordData);
discoverTypeIndicesInSymbol(Sym, Refs);
assert(Refs.size() == 1);
assert(Refs.front().Count == 1);

TypeIndex *TI =
reinterpret_cast<TypeIndex *>(Content.data() + Refs[0].Offset);
// `TI` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in
// the IPI stream, whose `FunctionType` member refers to the TPI stream.
// Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and
// in both cases we just need the second type index.
if (!TI->isSimple() && !TI->isNoneType()) {
ArrayRef<uint8_t> FuncIdData = IDTable.records()[TI->toArrayIndex()];
SmallVector<TypeIndex, 2> Indices;
discoverTypeIndices(FuncIdData, Indices);
assert(Indices.size() == 2);
*TI = Indices[1];
}

Kind = (Kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32
: SymbolKind::S_LPROC32;
Prefix->RecordKind = uint16_t(Kind);
}
}

/// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned.
Expand All @@ -344,10 +390,8 @@ static MutableArrayRef<uint8_t> copySymbolForPdb(const CVSymbol &Sym,
memset(NewData.data() + Sym.length(), 0, Size - Sym.length());

// Update the record prefix length. It should point to the beginning of the
// next record. MSVC does some canonicalization of the record kind, so we do
// that as well.
// next record.
auto *Prefix = reinterpret_cast<RecordPrefix *>(Mem);
Prefix->RecordKind = canonicalizeSymbolKind(Sym.kind());
Prefix->RecordLen = Size - 2;
return NewData;
}
Expand Down Expand Up @@ -418,18 +462,19 @@ static void scopeStackClose(SmallVectorImpl<SymbolScope> &Stack,

static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjFile *File,
const CVIndexMap &IndexMap,
const TypeTableBuilder &IDTable,
BinaryStreamRef SymData) {
// FIXME: Improve error recovery by warning and skipping records when
// possible.
CVSymbolArray Syms;
BinaryStreamReader Reader(SymData);
ExitOnErr(Reader.readArray(Syms, Reader.getLength()));
SmallVector<SymbolScope, 4> Scopes;
for (const CVSymbol &Sym : Syms) {
for (CVSymbol Sym : Syms) {
// Discover type index references in the record. Skip it if we don't know
// where they are.
SmallVector<TiReference, 32> TypeRefs;
if (!discoverTypeIndices(Sym, TypeRefs)) {
if (!discoverTypeIndicesInSymbol(Sym, TypeRefs)) {
log("ignoring unknown symbol record with kind 0x" + utohexstr(Sym.kind()));
continue;
}
Expand All @@ -440,13 +485,19 @@ static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjFile *File,
// Re-map all the type index references.
MutableArrayRef<uint8_t> Contents =
NewData.drop_front(sizeof(RecordPrefix));
remapTypesInSymbolRecord(File, Contents, IndexMap, TypeRefs);
remapTypesInSymbolRecord(File, Contents, IndexMap, IDTable, TypeRefs);

// An object file may have S_xxx_ID symbols, but these get converted to
// "real" symbols in a PDB.
translateIdSymbols(NewData, IDTable);

SymbolKind NewKind = symbolKind(NewData);

// Fill in "Parent" and "End" fields by maintaining a stack of scopes.
CVSymbol NewSym(Sym.kind(), NewData);
if (symbolOpensScope(Sym.kind()))
CVSymbol NewSym(NewKind, NewData);
if (symbolOpensScope(NewKind))
scopeStackOpen(Scopes, File->ModuleDBI->getNextSymbolOffset(), NewSym);
else if (symbolEndsScope(Sym.kind()))
else if (symbolEndsScope(NewKind))
scopeStackClose(Scopes, File->ModuleDBI->getNextSymbolOffset(), File);

// Add the symbol to the module.
Expand Down Expand Up @@ -516,7 +567,7 @@ void PDBLinker::addObjFile(ObjFile *File) {
File->ModuleDBI->addDebugSubsection(SS);
break;
case DebugSubsectionKind::Symbols:
mergeSymbolRecords(Alloc, File, IndexMap, SS.getRecordData());
mergeSymbolRecords(Alloc, File, IndexMap, IDTable, SS.getRecordData());
break;
default:
// FIXME: Process the rest of the subsections.
Expand Down
8 changes: 4 additions & 4 deletions lld/test/COFF/pdb-comdat.test
Expand Up @@ -46,7 +46,7 @@ CHECK: 60 | S_COMPILE3 [size = 60]
CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c
CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1
CHECK: flags = security checks | hot patchable
CHECK: 120 | S_GPROC32_ID [size = 44] `main`
CHECK: 120 | S_GPROC32 [size = 44] `main`
CHECK: parent = 0, end = 196, addr = 0002:0000, code size = 24
CHECK: debug start = 4, debug end = 19, flags = none
CHECK: 164 | S_FRAMEPROC [size = 32]
Expand All @@ -57,7 +57,7 @@ CHECK: 196 | S_END [size = 4]
CHECK: 200 | S_GDATA32 [size = 24] `global`
CHECK: type = 0x0074 (int), addr = 0000:0000
CHECK: 224 | S_BUILDINFO [size = 8] BuildId = `0x100A`
CHECK: 232 | S_GPROC32_ID [size = 44] `foo`
CHECK: 232 | S_GPROC32 [size = 44] `foo`
CHECK: parent = 0, end = 308, addr = 0002:0032, code size = 15
CHECK: debug start = 0, debug end = 14, flags = none
CHECK: 276 | S_FRAMEPROC [size = 32]
Expand All @@ -71,7 +71,7 @@ CHECK: 60 | S_COMPILE3 [size = 60]
CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c
CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1
CHECK: flags = security checks | hot patchable
CHECK: 120 | S_GPROC32_ID [size = 44] `bar`
CHECK: 120 | S_GPROC32 [size = 44] `bar`
CHECK: parent = 0, end = 196, addr = 0002:0048, code size = 14
CHECK: debug start = 4, debug end = 9, flags = none
CHECK: 164 | S_FRAMEPROC [size = 32]
Expand All @@ -82,7 +82,7 @@ CHECK: 196 | S_END [size = 4]
CHECK: 200 | S_GDATA32 [size = 24] `global`
CHECK: type = 0x0074 (int), addr = 0000:0000
CHECK: 224 | S_BUILDINFO [size = 8] BuildId = `0x100D`
CHECK-NOT: S_GPROC32_ID {{.*}} `foo`
CHECK-NOT: S_GPROC32 {{.*}} `foo`
CHECK-LABEL: Mod 0002 | `* Linker *`:

Reorder the object files and verify that the other table is selected.
Expand Down
2 changes: 1 addition & 1 deletion lld/test/COFF/pdb-invalid-func-type.yaml
Expand Up @@ -7,7 +7,7 @@
# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s

# CHECK: Mod 0000 | `{{.*}}pdb-invalid-func-type.yaml.tmp.obj`:
# CHECK: 4 | S_GPROC32_ID [size = 44] `main`
# CHECK: 4 | S_GPROC32 [size = 44] `main`
# CHECK: parent = 0, end = 80, addr = 0001:0000, code size = 3
# CHECK: 48 | S_FRAMEPROC [size = 32]
# CHECK: 80 | S_END [size = 4]
Expand Down
29 changes: 29 additions & 0 deletions lld/test/COFF/pdb-procid-remapping.test
@@ -0,0 +1,29 @@
# RUN: yaml2obj < %p/Inputs/pdb1.yaml > %t1.obj
# RUN: yaml2obj < %p/Inputs/pdb2.yaml > %t2.obj
# RUN: lld-link /debug /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \
# RUN: %t1.obj %t2.obj

# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s

CHECK: Symbols
CHECK-NEXT: ============================================================
CHECK-LABEL: Mod 0000 |
CHECK: 92 | S_GPROC32 [size = 44] `main`
CHECK-NEXT: parent = 0, end = 168, addr = 0002:0000, code size = 14
CHECK-NEXT: type = `0x1004 (int (<no type>))`, debug start = 4, debug end = 9, flags = none
CHECK-NEXT: 136 | S_FRAMEPROC [size = 32]
CHECK-NEXT: size = 40, padding size = 0, offset to padding = 0
CHECK-NEXT: bytes of callee saved registers = 0, exception handler addr = 0000:0000
CHECK-NEXT: flags = has async eh | opt speed
CHECK-NEXT: 168 | S_END [size = 4]
CHECK-LABEL: Mod 0001 |
CHECK: 92 | S_GPROC32 [size = 44] `foo`
CHECK-NEXT: parent = 0, end = 168, addr = 0002:0016, code size = 6
CHECK-NEXT: type = `0x1001 (int ())`, debug start = 0, debug end = 5, flags = none
CHECK-NEXT: 136 | S_FRAMEPROC [size = 32]
CHECK-NEXT: size = 0, padding size = 0, offset to padding = 0
CHECK-NEXT: bytes of callee saved registers = 0, exception handler addr = 0000:0000
CHECK-NEXT: flags = has async eh | opt speed
CHECK-NEXT: 168 | S_END [size = 4]
CHECK-LABEL: Mod 0002 |
CHECK: 4 | S_OBJNAME [size = 20] sig=0, `* Linker *`
6 changes: 3 additions & 3 deletions lld/test/COFF/pdb-scopes.test
Expand Up @@ -34,12 +34,12 @@ RUN: lld-link %t-a.obj %t-b.obj -debug -entry:main -nodefaultlib -out:%t.exe -pd
RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s

CHECK-LABEL: Mod 0000 | `{{.*}}pdb-scopes.test.tmp-a.obj`:
CHECK: 104 | S_GPROC32_ID [size = 44] `g`
CHECK: 104 | S_GPROC32 [size = 44] `g`
CHECK: parent = 0, end = 196, addr = 0002:0000, code size = 5
CHECK: debug start = 4, debug end = 4, flags = none
CHECK: 180 | S_REGREL32 [size = 16] `x`
CHECK: 196 | S_END [size = 4]
CHECK: 200 | S_GPROC32_ID [size = 44] `main`
CHECK: 200 | S_GPROC32 [size = 44] `main`
CHECK: parent = 0, end = 384, addr = 0002:0016, code size = 58
CHECK: debug start = 8, debug end = 53, flags = none
CHECK: 276 | S_REGREL32 [size = 20] `argc`
Expand All @@ -56,7 +56,7 @@ CHECK: 380 | S_END [size = 4]
CHECK: 384 | S_END [size = 4]

CHECK-LABEL: Mod 0001 | `{{.*}}pdb-scopes.test.tmp-b.obj`:
CHECK: 104 | S_GPROC32_ID [size = 44] `f`
CHECK: 104 | S_GPROC32 [size = 44] `f`
CHECK: parent = 0, end = 284, addr = 0002:0080, code size = 62
CHECK: debug start = 8, debug end = 57, flags = none
CHECK: 180 | S_REGREL32 [size = 16] `x`
Expand Down
2 changes: 1 addition & 1 deletion lld/test/COFF/pdb-symbol-types.yaml
Expand Up @@ -21,7 +21,7 @@
# CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c
# CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1
# CHECK: flags = security checks | hot patchable
# CHECK: 116 | S_GPROC32_ID [size = 44] `main`
# CHECK: 116 | S_GPROC32 [size = 44] `main`
# CHECK: parent = 0, end = 192, addr = 0002:0000, code size = 7
# CHECK: debug start = 0, debug end = 6, flags = none
# CHECK: 160 | S_FRAMEPROC [size = 32]
Expand Down
4 changes: 2 additions & 2 deletions lld/test/COFF/pdb-type-server-simple.test
Expand Up @@ -63,7 +63,7 @@ CHECK-LABEL: Symbols
CHECK: ============================================================
CHECK-LABEL: Mod 0000 | `{{.*}}a.obj`:
CHECK: 4 | S_OBJNAME [size = 40] sig=0, `C:\src\llvm-project\build\a.obj`
CHECK: 104 | S_GPROC32_ID [size = 44] `main`
CHECK: 104 | S_GPROC32 [size = 44] `main`
CHECK: parent = 0, end = 196, addr = 0002:0000, code size = 27
CHECK: type = {{.*}}, debug start = 4, debug end = 22, flags = none
CHECK: 200 | S_UDT [size = 12] `Foo`
Expand All @@ -75,7 +75,7 @@ CHECK: 44 | S_COMPILE3 [size = 60]
CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c
CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1
CHECK: flags = security checks | hot patchable
CHECK: 104 | S_GPROC32_ID [size = 44] `g`
CHECK: 104 | S_GPROC32 [size = 44] `g`
CHECK: parent = 0, end = 196, addr = 0002:0032, code size = 13
CHECK: type = {{.*}}, debug start = 5, debug end = 12, flags = none
CHECK: 148 | S_FRAMEPROC [size = 32]
Expand Down
15 changes: 15 additions & 0 deletions llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
Expand Up @@ -52,6 +52,21 @@ class TypeDeserializer : public TypeVisitorCallbacks {
return Error::success();
}

template <typename T>
static Expected<T> deserializeAs(ArrayRef<uint8_t> Data) {
CVType CVT;
CVT.RecordData = Data;
MappingInfo I(CVT.content());
const RecordPrefix *Prefix =
reinterpret_cast<const RecordPrefix *>(Data.data());
TypeRecordKind K =
static_cast<TypeRecordKind>(uint16_t(Prefix->RecordKind));
T Record(K);
if (auto EC = deserializeAs<T>(CVT, Record))
return std::move(EC);
return Record;
}

Error visitTypeBegin(CVType &Record) override {
assert(!Mapping && "Already in a type mapping!");
Mapping = llvm::make_unique<MappingInfo>(Record.content());
Expand Down
10 changes: 8 additions & 2 deletions llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
Expand Up @@ -30,11 +30,17 @@ void discoverTypeIndices(const CVType &Type,
SmallVectorImpl<TiReference> &Refs);
void discoverTypeIndices(const CVType &Type,
SmallVectorImpl<TypeIndex> &Indices);
void discoverTypeIndices(ArrayRef<uint8_t> RecordData,
SmallVectorImpl<TypeIndex> &Indices);

/// Discover type indices in symbol records. Returns false if this is an unknown
/// record.
bool discoverTypeIndices(const CVSymbol &Symbol,
SmallVectorImpl<TiReference> &Refs);
bool discoverTypeIndicesInSymbol(const CVSymbol &Symbol,
SmallVectorImpl<TiReference> &Refs);
bool discoverTypeIndicesInSymbol(ArrayRef<uint8_t> RecordData,
SmallVectorImpl<TiReference> &Refs);
bool discoverTypeIndicesInSymbol(ArrayRef<uint8_t> RecordData,
SmallVectorImpl<TypeIndex> &Indices);
}
}

Expand Down
47 changes: 39 additions & 8 deletions llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
Expand Up @@ -395,6 +395,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_CONSTANT:
Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
break;
case SymbolKind::S_BPREL32:
case SymbolKind::S_REGREL32:
Refs.push_back({TiRefKind::TypeRef, 4, 1}); // Type
break;
Expand Down Expand Up @@ -450,17 +451,17 @@ void llvm::codeview::discoverTypeIndices(const CVType &Type,
::discoverTypeIndices(Type.content(), Type.kind(), Refs);
}

void llvm::codeview::discoverTypeIndices(const CVType &Type,
SmallVectorImpl<TypeIndex> &Indices) {

static void resolveTypeIndexReferences(ArrayRef<uint8_t> RecordData,
ArrayRef<TiReference> Refs,
SmallVectorImpl<TypeIndex> &Indices) {
Indices.clear();

SmallVector<TiReference, 4> Refs;
discoverTypeIndices(Type, Refs);
if (Refs.empty())
return;

BinaryStreamReader Reader(Type.content(), support::little);
RecordData = RecordData.drop_front(sizeof(RecordPrefix));

BinaryStreamReader Reader(RecordData, support::little);
for (const auto &Ref : Refs) {
Reader.setOffset(Ref.Offset);
FixedStreamArray<TypeIndex> Run;
Expand All @@ -469,6 +470,18 @@ void llvm::codeview::discoverTypeIndices(const CVType &Type,
}
}

void llvm::codeview::discoverTypeIndices(const CVType &Type,
SmallVectorImpl<TypeIndex> &Indices) {
return discoverTypeIndices(Type.RecordData, Indices);
}

void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
SmallVectorImpl<TypeIndex> &Indices) {
SmallVector<TiReference, 4> Refs;
discoverTypeIndices(RecordData, Refs);
resolveTypeIndexReferences(RecordData, Refs, Indices);
}

void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
SmallVectorImpl<TiReference> &Refs) {
const RecordPrefix *P =
Expand All @@ -477,8 +490,26 @@ void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs);
}

bool llvm::codeview::discoverTypeIndices(const CVSymbol &Sym,
SmallVectorImpl<TiReference> &Refs) {
bool llvm::codeview::discoverTypeIndicesInSymbol(
const CVSymbol &Sym, SmallVectorImpl<TiReference> &Refs) {
SymbolKind K = Sym.kind();
return ::discoverTypeIndices(Sym.content(), K, Refs);
}

bool llvm::codeview::discoverTypeIndicesInSymbol(
ArrayRef<uint8_t> RecordData, SmallVectorImpl<TiReference> &Refs) {
const RecordPrefix *P =
reinterpret_cast<const RecordPrefix *>(RecordData.data());
SymbolKind K = static_cast<SymbolKind>(uint16_t(P->RecordKind));
return ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K,
Refs);
}

bool llvm::codeview::discoverTypeIndicesInSymbol(
ArrayRef<uint8_t> RecordData, SmallVectorImpl<TypeIndex> &Indices) {
SmallVector<TiReference, 2> Refs;
if (!discoverTypeIndicesInSymbol(RecordData, Refs))
return false;
resolveTypeIndexReferences(RecordData, Refs, Indices);
return true;
}

0 comments on commit 59e3ae8

Please sign in to comment.