Skip to content

Commit

Permalink
[ELF] Add a spell corrector for "undefined symbol" diagnostics
Browse files Browse the repository at this point in the history
Non-undefined symbols with Levenshtein distance 1 or a transposition are
suggestion candidates. This is probably good enough and it can suggest
some missing/superfluous qualifiers: const, restrict, volatile, & and &&
ref-qualifier, e.g.

   error: undefined symbol: foo(int*)
   >>> referenced by b.o:(.text+0x1)
  +>>> did you mean: foo(int const*)
  +>>> defined in: a.o

   error: undefined symbol: foo(int*&)
   >>> referenced by b.o:(.text+0x1)
  +>>> did you mean: foo(int*)
  +>>> defined in: b.o

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D67039

llvm-svn: 370853
  • Loading branch information
MaskRay committed Sep 4, 2019
1 parent b8b4fa4 commit b4745fa
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 5 deletions.
85 changes: 80 additions & 5 deletions lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,8 +691,75 @@ struct UndefinedDiag {

static std::vector<UndefinedDiag> undefs;

// Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns
// the suggested symbol, which is either in the symbol table, or in the same
// file of sym.
static const Symbol *getAlternativeSpelling(const Undefined &sym) {
// Build a map of local defined symbols.
DenseMap<StringRef, const Symbol *> map;
if (sym.file) {
for (const Symbol *s : sym.file->getSymbols())
if (s->isLocal() && s->isDefined())
map.try_emplace(s->getName(), s);
}

auto suggest = [&](StringRef newName) -> const Symbol * {
// If defined locally.
if (const Symbol *s = map.lookup(newName))
return s;

// If in the symbol table and not undefined.
if (const Symbol *s = symtab->find(newName))
if (!s->isUndefined())
return s;

return nullptr;
};

// This loop enumerates all strings of Levenshtein distance 1 as typo
// correction candidates and suggests the one that exists as a non-undefined
// symbol.
StringRef name = sym.getName();
for (size_t i = 0, e = name.size(); i != e + 1; ++i) {
// Insert a character before name[i].
std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str();
for (char c = '0'; c <= 'z'; ++c) {
newName[i] = c;
if (const Symbol *s = suggest(newName))
return s;
}
if (i == e)
break;

// Substitute name[i].
newName = name;
for (char c = '0'; c <= 'z'; ++c) {
newName[i] = c;
if (const Symbol *s = suggest(newName))
return s;
}

// Transpose name[i] and name[i+1]. This is of edit distance 2 but it is
// common.
if (i + 1 < e) {
newName[i] = name[i + 1];
newName[i + 1] = name[i];
if (const Symbol *s = suggest(newName))
return s;
}

// Delete name[i].
newName = (name.substr(0, i) + name.substr(i + 1)).str();
if (const Symbol *s = suggest(newName))
return s;
}

return nullptr;
}

template <class ELFT>
static void reportUndefinedSymbol(const UndefinedDiag &undef) {
static void reportUndefinedSymbol(const UndefinedDiag &undef,
bool correctSpelling) {
Symbol &sym = *undef.sym;

auto visibility = [&]() -> std::string {
Expand Down Expand Up @@ -732,6 +799,14 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef) {
msg += ("\n>>> referenced " + Twine(undef.locs.size() - i) + " more times")
.str();

if (correctSpelling)
if (const Symbol *corrected =
getAlternativeSpelling(cast<Undefined>(sym))) {
msg += "\n>>> did you mean: " + toString(*corrected);
if (corrected->file)
msg += "\n>>> defined in: " + toString(corrected->file);
}

if (sym.getName().startswith("_ZTV"))
msg += "\nthe vtable symbol may be undefined because the class is missing "
"its key function (see https://lld.llvm.org/missingkeyfunction)";
Expand All @@ -755,10 +830,10 @@ template <class ELFT> void elf::reportUndefinedSymbols() {
firstRef[undef.sym] = &undef;
}

for (const UndefinedDiag &undef : undefs) {
if (!undef.locs.empty())
reportUndefinedSymbol<ELFT>(undef);
}
// Enable spell corrector for the first 2 diagnostics.
for (auto it : enumerate(undefs))
if (!it.value().locs.empty())
reportUndefinedSymbol<ELFT>(it.value(), it.index() < 2);
undefs.clear();
}

Expand Down
69 changes: 69 additions & 0 deletions lld/test/ELF/undef-spell-corrector.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o

## Insert a character.
## The spell corrector is enabled for the first two "undefined symbol" diagnostics.
# RUN: echo 'call bcde; call abcd; call abde' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.o

## Symbols defined in DSO can be suggested.
# RUN: ld.lld %t.o -shared -o %t.so
# RUN: not ld.lld %t.so %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.so

# INSERT: error: undefined symbol: bcde
# INSERT-NEXT: >>> referenced by {{.*}}
# INSERT-NEXT: >>> did you mean: abcde
# INSERT-NEXT: >>> defined in: [[FILE]]
# INSERT: error: undefined symbol: abcd
# INSERT-NEXT: >>> referenced by {{.*}}
# INSERT-NEXT: >>> did you mean: abcde
# INSERT-NEXT: >>> defined in: [[FILE]]
# INSERT: error: undefined symbol: abde
# INSERT-NEXT: >>> referenced by {{.*}}
# INSERT-NOT: >>>

## Substitute a character.
# RUN: echo 'call bbcde; call abcdd' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=SUBST %s

# SUBST: error: undefined symbol: bbcde
# SUBST-NEXT: >>> referenced by {{.*}}
# SUBST-NEXT: >>> did you mean: abcde
# SUBST: error: undefined symbol: abcdd
# SUBST-NEXT: >>> referenced by {{.*}}
# SUBST-NEXT: >>> did you mean: abcde

## Delete a character.
# RUN: echo 'call aabcde; call abcdee' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=DELETE %s

# DELETE: error: undefined symbol: aabcde
# DELETE-NEXT: >>> referenced by {{.*}}
# DELETE-NEXT: >>> did you mean: abcde
# DELETE: error: undefined symbol: abcdee
# DELETE-NEXT: >>> referenced by {{.*}}
# DELETE-NEXT: >>> did you mean: abcde

## Transpose.
# RUN: echo 'call bacde' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=TRANSPOSE %s

# TRANSPOSE: error: undefined symbol: bacde
# TRANSPOSE-NEXT: >>> referenced by {{.*}}
# TRANSPOSE-NEXT: >>> did you mean: abcde

## Missing const qualifier.
# RUN: echo 'call _Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s
## Local defined symbols.
# RUN: echo '_Z3fooPKi: call _Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
# RUN: not ld.lld %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s

# CONST: error: undefined symbol: foo(int*)
# CONST-NEXT: >>> referenced by {{.*}}
# CONST-NEXT: >>> did you mean: foo(int const*)

.globl _start, abcde, _Z3fooPKi
_start:
abcde:
_Z3fooPKi:

0 comments on commit b4745fa

Please sign in to comment.