Skip to content

Commit

Permalink
[lld/mac] Port typo correction for undefined symbols from ELF port
Browse files Browse the repository at this point in the history
Ports:
- core feature: https://reviews.llvm.org/D67039
- case mismatch: https://reviews.llvm.org/D70506
- extern "C" suggestions: https://reviews.llvm.org/D69592,
  https://reviews.llvm.org/D69650

Does not port https://reviews.llvm.org/D71735 since I believe that that doesn't
apply to lld/Mach-O.

Differential Revision: https://reviews.llvm.org/D135038
  • Loading branch information
nico committed Oct 3, 2022
1 parent d55dd57 commit 8c45e80
Show file tree
Hide file tree
Showing 4 changed files with 264 additions and 3 deletions.
148 changes: 145 additions & 3 deletions lld/MachO/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,138 @@ void macho::reportPendingDuplicateSymbols() {
}
}

// Check whether the definition name def is a mangled function name that matches
// the reference name ref.
static bool canSuggestExternCForCXX(StringRef ref, StringRef def) {
llvm::ItaniumPartialDemangler d;
std::string name = def.str();
if (d.partialDemangle(name.c_str()))
return false;
char *buf = d.getFunctionName(nullptr, nullptr);
if (!buf)
return false;
bool ret = ref == buf;
free(buf);
return ret;
}

// Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns
// the suggested symbol, which is either in the symbol table, or in the same
// file of sym.
static const Symbol *getAlternativeSpelling(const Undefined &sym,
std::string &pre_hint,
std::string &post_hint) {
DenseMap<StringRef, const Symbol *> map;
if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) {
// Build a map of local defined symbols.
for (const Symbol *s : sym.getFile()->symbols)
if (auto *defined = dyn_cast<Defined>(s))
if (!defined->isExternal())
map.try_emplace(s->getName(), s);
}

auto suggest = [&](StringRef newName) -> const Symbol * {
// If defined locally.
if (const Symbol *s = map.lookup(newName))
return s;

// If in the symbol table and not undefined.
if (const Symbol *s = symtab->find(newName))
if (dyn_cast<Undefined>(s) == nullptr)
return s;

return nullptr;
};

// This loop enumerates all strings of Levenshtein distance 1 as typo
// correction candidates and suggests the one that exists as a non-undefined
// symbol.
StringRef name = sym.getName();
for (size_t i = 0, e = name.size(); i != e + 1; ++i) {
// Insert a character before name[i].
std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str();
for (char c = '0'; c <= 'z'; ++c) {
newName[i] = c;
if (const Symbol *s = suggest(newName))
return s;
}
if (i == e)
break;

// Substitute name[i].
newName = std::string(name);
for (char c = '0'; c <= 'z'; ++c) {
newName[i] = c;
if (const Symbol *s = suggest(newName))
return s;
}

// Transpose name[i] and name[i+1]. This is of edit distance 2 but it is
// common.
if (i + 1 < e) {
newName[i] = name[i + 1];
newName[i + 1] = name[i];
if (const Symbol *s = suggest(newName))
return s;
}

// Delete name[i].
newName = (name.substr(0, i) + name.substr(i + 1)).str();
if (const Symbol *s = suggest(newName))
return s;
}

// Case mismatch, e.g. Foo vs FOO.
for (auto &it : map)
if (name.equals_insensitive(it.first))
return it.second;
for (Symbol *sym : symtab->getSymbols())
if (dyn_cast<Undefined>(sym) == nullptr &&
name.equals_insensitive(sym->getName()))
return sym;

// The reference may be a mangled name while the definition is not. Suggest a
// missing extern "C".
if (name.startswith("__Z")) {
std::string buf = name.str();
llvm::ItaniumPartialDemangler d;
if (!d.partialDemangle(buf.c_str()))
if (char *buf = d.getFunctionName(nullptr, nullptr)) {
const Symbol *s = suggest((Twine("_") + buf).str());
free(buf);
if (s) {
pre_hint = ": extern \"C\" ";
return s;
}
}
} else {
StringRef name_without_underscore = name;
name_without_underscore.consume_front("_");
const Symbol *s = nullptr;
for (auto &it : map)
if (canSuggestExternCForCXX(name_without_underscore, it.first)) {
s = it.second;
break;
}
if (!s)
for (Symbol *sym : symtab->getSymbols())
if (canSuggestExternCForCXX(name_without_underscore, sym->getName())) {
s = sym;
break;
}
if (s) {
pre_hint = " to declare ";
post_hint = " as extern \"C\"?";
return s;
}
}

return nullptr;
}

static void reportUndefinedSymbol(const Undefined &sym,
const UndefinedDiag &locations) {
const UndefinedDiag &locations,
bool correctSpelling) {
std::string message = "undefined symbol";
if (config->archMultiple)
message += (" for arch " + getArchitectureName(config->arch())).str();
Expand Down Expand Up @@ -426,6 +556,17 @@ static void reportUndefinedSymbol(const Undefined &sym,
("\n>>> referenced " + Twine(totalReferences - i) + " more times")
.str();

if (correctSpelling) {
std::string pre_hint = ": ", post_hint;
if (const Symbol *corrected =
getAlternativeSpelling(sym, pre_hint, post_hint)) {
message +=
"\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint;
if (corrected->getFile())
message += "\n>>> defined in: " + toString(corrected->getFile());
}
}

if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error)
error(message);
else if (config->undefinedSymbolTreatment ==
Expand All @@ -436,8 +577,9 @@ static void reportUndefinedSymbol(const Undefined &sym,
}

void macho::reportPendingUndefinedSymbols() {
for (const auto &undef : undefs)
reportUndefinedSymbol(*undef.first, undef.second);
// Enable spell corrector for the first 2 diagnostics.
for (const auto &[i, undef] : llvm::enumerate(undefs))
reportUndefinedSymbol(*undef.first, undef.second, i < 2);

// This function is called multiple times during execution. Clear the printed
// diagnostics to avoid printing the same things again the next time.
Expand Down
79 changes: 79 additions & 0 deletions lld/test/MachO/undef-spell-corrector.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o

## Insert a character.
## The spell corrector is enabled for the first two "undefined symbol" diagnostics.
# RUN: echo 'call bcde; call abcd; call abde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.o

## Symbols defined in DSO can be suggested.
# RUN: %lld %t.o -dylib -o %t.dylib
# RUN: not %lld %t.dylib %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.dylib

# INSERT: error: undefined symbol: abde
# INSERT-NEXT: >>> referenced by {{.*}}
# INSERT-NEXT: >>> did you mean: abcde
# INSERT-NEXT: >>> defined in: [[FILE]]
# INSERT: error: undefined symbol: abcd
# INSERT-NEXT: >>> referenced by {{.*}}
# INSERT-NEXT: >>> did you mean: abcde
# INSERT-NEXT: >>> defined in: [[FILE]]
# INSERT: error: undefined symbol: bcde
# INSERT-NEXT: >>> referenced by {{.*}}
# INSERT-NOT: >>>

## Substitute a character.
# RUN: echo 'call bbcde; call abcdd' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=SUBST %s

# SUBST: error: undefined symbol: abcdd
# SUBST-NEXT: >>> referenced by {{.*}}
# SUBST-NEXT: >>> did you mean: abcde
# SUBST: error: undefined symbol: bbcde
# SUBST-NEXT: >>> referenced by {{.*}}
# SUBST-NEXT: >>> did you mean: abcde

## Delete a character.
# RUN: echo 'call aabcde; call abcdee' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=DELETE %s

# DELETE: error: undefined symbol: abcdee
# DELETE-NEXT: >>> referenced by {{.*}}
# DELETE-NEXT: >>> did you mean: abcde
# DELETE: error: undefined symbol: aabcde
# DELETE-NEXT: >>> referenced by {{.*}}
# DELETE-NEXT: >>> did you mean: abcde

## Transpose.
# RUN: echo 'call bacde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=TRANSPOSE %s

# TRANSPOSE: error: undefined symbol: bacde
# TRANSPOSE-NEXT: >>> referenced by {{.*}}
# TRANSPOSE-NEXT: >>> did you mean: abcde

## Missing const qualifier.
# RUN: echo 'call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s
## Local defined symbols.
# RUN: echo '__Z3fooPKi: call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s

# CONST: error: undefined symbol: foo(int*)
# CONST-NEXT: >>> referenced by {{.*}}
# CONST-NEXT: >>> did you mean: foo(int const*)

## Case mismatch.
# RUN: echo 'call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s
# RUN: echo '__Z3fooPKi: call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s

# CASE: error: undefined symbol: FOO(int const*)
# CASE-NEXT: >>> referenced by {{.*}}
# CASE-NEXT: >>> did you mean: foo(int const*)

.globl _main, abcde, __Z3fooPKi
_main:
abcde:
__Z3fooPKi:
19 changes: 19 additions & 0 deletions lld/test/MachO/undef-suggest-extern-c.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o

## The reference is mangled while the definition is not, suggest a missing
## extern "C".
# RUN: echo 'call __Z3fooi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s

# CHECK: error: undefined symbol: foo(int)
# CHECK-NEXT: >>> referenced by {{.*}}
# CHECK-NEXT: >>> did you mean: extern "C" _foo

## Don't suggest for nested names like F::foo() and foo::foo().
# RUN: echo 'call __ZN1F3fooEv; call __ZN3fooC1Ev' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o
# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean'

.globl _start, _foo
_start:
_foo:
21 changes: 21 additions & 0 deletions lld/test/MachO/undef-suggest-extern-c2.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o

## The definition is mangled while the reference is not, suggest an arbitrary
## C++ overload.
# RUN: echo '.globl __Z3fooi; __Z3fooi:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s

## Check that we can suggest a local definition.
# RUN: echo '__Z3fooi: call _foo' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o
# RUN: not %lld %t2.o -demangle -o /dev/null 2>&1 | FileCheck %s

# CHECK: error: undefined symbol: _foo
# CHECK-NEXT: >>> referenced by {{.*}}
# CHECK-NEXT: >>> did you mean to declare foo(int) as extern "C"?

## Don't suggest nested names whose base name is "foo", e.g. F::foo().
# RUN: echo '.globl __ZN1F3fooEv; __ZN1F3fooEv:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t3.o
# RUN: not %lld %t.o %t3.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean'

call _foo

0 comments on commit 8c45e80

Please sign in to comment.