-
Notifications
You must be signed in to change notification settings - Fork 15.2k
MC: Better handle backslash-escaped symbols #158780
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-webassembly Author: Fangrui Song (MaskRay) ChangesThe MCContext::getOrCreateSymbol change in #138817 was a workaround. With #158106, we can replace Full diff: https://github.com/llvm/llvm-project/pull/158780.diff 10 Files Affected:
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index d96609c0fec21..4a528eecfc900 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -484,6 +484,10 @@ class MCContext {
/// \param Name - The symbol name, which must be unique across all symbols.
LLVM_ABI MCSymbol *getOrCreateSymbol(const Twine &Name);
+ /// Variant of getOrCreateSymbol that handles backslash-escaped symbols.
+ /// For example, parse "a\"b\\" as a"\.
+ LLVM_ABI MCSymbol *parseSymbol(const Twine &Name);
+
/// Gets a symbol that will be defined to the final stack offset of a local
/// variable after codegen.
///
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 5d9ddc2f1306c..5df8692d2e7ba 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -203,27 +203,6 @@ MCInst *MCContext::createMCInst() {
MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
SmallString<128> NameSV;
StringRef NameRef = Name.toStringRef(NameSV);
- if (NameRef.contains('\\')) {
- NameSV = NameRef;
- size_t S = 0;
- // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
- // other characters following \\, which we do not implement due to code
- // structure.
- for (size_t I = 0, E = NameSV.size(); I != E; ++I) {
- char C = NameSV[I];
- if (C == '\\' && I + 1 != E) {
- switch (NameSV[I + 1]) {
- case '"':
- case '\\':
- C = NameSV[++I];
- break;
- }
- }
- NameSV[S++] = C;
- }
- NameSV.resize(S);
- NameRef = NameSV;
- }
assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
@@ -244,6 +223,34 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
return Entry.second.Symbol;
}
+MCSymbol *MCContext::parseSymbol(const Twine &Name) {
+ SmallString<128> SV;
+ StringRef NameRef = Name.toStringRef(SV);
+ if (NameRef.contains('\\')) {
+ SV = NameRef;
+ size_t S = 0;
+ // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
+ // other characters following \\, which we do not implement due to code
+ // structure.
+ for (size_t I = 0, E = SV.size(); I != E; ++I) {
+ char C = SV[I];
+ if (C == '\\' && I + 1 != E) {
+ switch (SV[I + 1]) {
+ case '"':
+ case '\\':
+ C = SV[++I];
+ break;
+ }
+ }
+ SV[S++] = C;
+ }
+ SV.resize(S);
+ NameRef = SV;
+ }
+
+ return getOrCreateSymbol(NameRef);
+}
+
MCSymbol *MCContext::getOrCreateFrameAllocSymbol(const Twine &FuncName,
unsigned Idx) {
return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName +
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 5fa1539790c73..acea3ab23680a 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1213,8 +1213,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
if (!Sym)
- Sym = getContext().getOrCreateSymbol(MAI.isHLASM() ? SymbolName.upper()
- : SymbolName);
+ Sym = getContext().parseSymbol(MAI.isHLASM() ? SymbolName.upper()
+ : SymbolName);
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
@@ -1845,7 +1845,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
RewrittenLabel);
IDVal = RewrittenLabel;
}
- Sym = getContext().getOrCreateSymbol(IDVal);
+ Sym = getContext().parseSymbol(IDVal);
} else
Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
// End of Labels should be treated as end of line for lexing
@@ -4885,7 +4885,7 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
if (discardLTOSymbol(Name))
return false;
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
// Assembler local symbols don't make any sense here, except for directives
// that the symbol should be tagged.
@@ -6142,7 +6142,7 @@ bool HLASMAsmParser::parseAsHLASMLabel(ParseStatementInfo &Info,
return Error(LabelLoc,
"Cannot have just a label for an HLASM inline asm statement");
- MCSymbol *Sym = getContext().getOrCreateSymbol(
+ MCSymbol *Sym = getContext().parseSymbol(
getContext().getAsmInfo()->isHLASM() ? LabelVal.upper() : LabelVal);
// Emit the label.
@@ -6270,7 +6270,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
Parser.getStreamer().emitValueToOffset(Value, 0, EqualLoc);
return false;
} else
- Sym = Parser.getContext().getOrCreateSymbol(Name);
+ Sym = Parser.getContext().parseSymbol(Name);
Sym->setRedefinable(allow_redef);
diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index ef2815b037f2f..04e12e56c4262 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -510,8 +510,8 @@ bool COFFMasmParser::parseDirectiveAlias(StringRef Directive, SMLoc Loc) {
getParser().parseAngleBracketString(ActualName))
return Error(getTok().getLoc(), "expected <actualName>");
- MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
- MCSymbol *Actual = getContext().getOrCreateSymbol(ActualName);
+ MCSymbol *Alias = getContext().parseSymbol(AliasName);
+ MCSymbol *Actual = getContext().parseSymbol(ActualName);
getStreamer().emitWeakReference(Alias, Actual);
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 19da9f57a4a6f..6195355626fd5 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -163,7 +163,7 @@ bool ELFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
continue;
}
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
getStreamer().emitSymbolAttribute(Sym, Attr);
diff --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp
index 3721541c71e11..c1b7e57184de1 100644
--- a/llvm/lib/MC/MCParser/MCAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -168,7 +168,7 @@ bool MCAsmParser::parseSymbol(MCSymbol *&Res) {
if (parseIdentifier(Name))
return true;
- Res = getContext().getOrCreateSymbol(Name);
+ Res = getContext().parseSymbol(Name);
return false;
}
diff --git a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
index 7fa05088c9725..299d4b46a8a84 100644
--- a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -50,8 +50,8 @@ bool MCAsmParserExtension::parseDirectiveCGProfile(StringRef, SMLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
- MCSymbol *FromSym = getContext().getOrCreateSymbol(From);
- MCSymbol *ToSym = getContext().getOrCreateSymbol(To);
+ MCSymbol *FromSym = getContext().parseSymbol(From);
+ MCSymbol *ToSym = getContext().parseSymbol(To);
getStreamer().emitCGProfileEntry(
MCSymbolRefExpr::create(FromSym, getContext(), FromLoc),
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index b38c2f7e41634..7f0ea7830b495 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1480,7 +1480,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
auto VarIt = Variables.find(SymbolName.lower());
if (VarIt != Variables.end())
SymbolName = VarIt->second.Name;
- Sym = getContext().getOrCreateSymbol(SymbolName);
+ Sym = getContext().parseSymbol(SymbolName);
}
// If this is an absolute variable reference, substitute it now to preserve
@@ -1965,7 +1965,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
if (IDVal == "@@") {
Sym = Ctx.createDirectionalLocalSymbol(0);
} else {
- Sym = getContext().getOrCreateSymbol(IDVal);
+ Sym = getContext().parseSymbol(IDVal);
}
// End of Labels should be treated as end of line for lexing
@@ -3009,8 +3009,7 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
return false;
}
- auto *Sym =
- static_cast<MCSymbolCOFF *>(getContext().getOrCreateSymbol(Var.Name));
+ auto *Sym = static_cast<MCSymbolCOFF *>(getContext().parseSymbol(Var.Name));
const MCConstantExpr *PrevValue =
Sym->isVariable()
? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue())
@@ -3318,7 +3317,7 @@ bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
StringRef Name, SMLoc NameLoc) {
if (StructInProgress.empty()) {
// Initialize named data value.
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
getStreamer().emitLabel(Sym);
unsigned Count;
if (emitIntegralValues(Size, &Count))
@@ -3509,7 +3508,7 @@ bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
SMLoc NameLoc) {
if (StructInProgress.empty()) {
// Initialize named data value.
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
getStreamer().emitLabel(Sym);
unsigned Count;
if (emitRealValues(Semantics, &Count))
@@ -4003,7 +4002,7 @@ bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
SMLoc DirLoc, StringRef Name) {
if (StructInProgress.empty()) {
// Initialize named data value.
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
getStreamer().emitLabel(Sym);
unsigned Count;
if (emitStructValues(Structure, &Count))
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 1befcacb3952f..75e8948f8ce9e 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -240,8 +240,7 @@ class WasmAsmParser : public MCAsmParserExtension {
return error("Expected label after .type directive, got: ",
Lexer->getTok());
auto *WasmSym = static_cast<MCSymbolWasm *>(
- getStreamer().getContext().getOrCreateSymbol(
- Lexer->getTok().getString()));
+ getStreamer().getContext().parseSymbol(Lexer->getTok().getString()));
Lex();
if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
Lexer->is(AsmToken::Identifier)))
diff --git a/llvm/test/MC/ELF/symbol-names.s b/llvm/test/MC/ELF/symbol-names.s
index 427187c329acf..f1593dd2f8099 100644
--- a/llvm/test/MC/ELF/symbol-names.s
+++ b/llvm/test/MC/ELF/symbol-names.s
@@ -5,6 +5,7 @@
// CHECK-LABEL:SYMBOL TABLE:
// CHECK-NEXT: 0000000000000001 l F .text 0000000000000000 a"b\{{$}}
// CHECK-NEXT: 0000000000000006 l .text 0000000000000000 a\{{$}}
+// CHECK-NEXT: 000000000000000b l .text 0000000000000000 a\\{{$}}
// CHECK-NEXT: 0000000000000000 g F .text 0000000000000000 foo?bar
// CHECK-NEXT: 0000000000000000 *UND* 0000000000000000 a"b\q{{$}}
// CHECK-EMPTY:
@@ -26,3 +27,5 @@ ret
"a\\":
/// GAS emits a warning for \q
call "a\"b\q"
+
+"a\\\\" = .
|
The MCContext::getOrCreateSymbol change in llvm#138817 was a workaround. With llvm#158106, we can replace `getOrCreateSymbol` with `parseSymbol`, in llvm/lib/MC/MCParser to handle backslash-escaped symbols.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Nice clarification.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please also add an LLVM IR test along the lines of https://llvm.godbolt.org/z/Ea7WzdczE, so we can see the escaping is now correct?
Thx. Added llvm/test/CodeGen/X86/symbol-name.ll |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM_ABI MCSymbol *getOrCreateSymbol(const Twine &Name); | ||
|
||
/// Variant of getOrCreateSymbol that handles backslash-escaped symbols. | ||
/// For example, parse "a\"b\\" as a"\. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a typo? Where did 'b' go?
Failed to cherry-pick: 76aba5d https://github.com/llvm/llvm-project/actions/runs/17802897787 Please manually backport the fix and push it to your github fork. Once this is done, please create a pull request |
The MCContext::getOrCreateSymbol change in llvm#138817 was a workaround. With llvm#158106, we can replace `getOrCreateSymbol` with `parseSymbol`, in llvm/lib/MC/MCParser to handle backslash-escaped symbols. (cherry picked from commit 0cf6688)
Manual backport PR: #159420 |
The MCContext::getOrCreateSymbol change in llvm#138817 was a workaround. With llvm#158106, we can replace `getOrCreateSymbol` with `parseSymbol`, in llvm/lib/MC/MCParser to handle backslash-escaped symbols. (cherry picked from commit 0cf6688)
The MCContext::getOrCreateSymbol change in llvm#138817 was a workaround. With llvm#158106, we can replace `getOrCreateSymbol` with `parseSymbol`, in llvm/lib/MC/MCParser to handle backslash-escaped symbols. (cherry picked from commit 0cf6688)
The MCContext::getOrCreateSymbol change in #138817 was a workaround. With #158106, we can replace
getOrCreateSymbol
withparseSymbol
, in llvm/lib/MC/MCParser to handle backslash-escaped symbols.