Skip to content

Commit

Permalink
[ms] [llvm-ml] Allow arbitrary strings as integer constants
Browse files Browse the repository at this point in the history
MASM interprets strings in expression contexts as integers expressed in big-endian base-256, treating each character as its ASCII representation.

This completely eliminates the need to special-case single-character strings.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D90788
  • Loading branch information
ericastor committed Nov 6, 2020
1 parent babc224 commit 5afb360
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 32 deletions.
44 changes: 24 additions & 20 deletions llvm/lib/MC/MCParser/MasmParser.cpp
Expand Up @@ -1332,6 +1332,8 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
/// primaryexpr ::= number
/// primaryexpr ::= '.'
/// primaryexpr ::= ~,+,-,'not' primaryexpr
/// primaryexpr ::= string
/// (a string is interpreted as a 64-bit number in big-endian base-256)
bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
AsmTypeInfo *TypeInfo) {
SMLoc FirstTokenLoc = getLexer().getLoc();
Expand All @@ -1350,7 +1352,6 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
return false;
case AsmToken::Dollar:
case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
StringRef Identifier;
if (parseIdentifier(Identifier)) {
Expand Down Expand Up @@ -1517,6 +1518,20 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
}
return false;
}
case AsmToken::String: {
// MASM strings (used as constants) are interpreted as big-endian base-256.
SMLoc ValueLoc = getTok().getLoc();
std::string Value;
if (parseEscapedString(Value))
return true;
if (Value.size() > 8)
return Error(ValueLoc, "literal value out of range");
uint64_t IntValue = 0;
for (const unsigned char CharVal : Value)
IntValue = (IntValue << 8) | CharVal;
Res = MCConstantExpr::create(IntValue, getContext());
return false;
}
case AsmToken::Real: {
APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
Expand Down Expand Up @@ -3168,28 +3183,17 @@ bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
bool MasmParser::parseScalarInitializer(unsigned Size,
SmallVectorImpl<const MCExpr *> &Values,
unsigned StringPadLength) {
if (getTok().is(AsmToken::String)) {
if (Size == 1 && getTok().is(AsmToken::String)) {
std::string Value;
if (parseEscapedString(Value))
return true;
if (Size == 1) {
// Treat each character as an initializer.
for (const char CharVal : Value)
Values.push_back(MCConstantExpr::create(CharVal, getContext()));

// Pad the string with spaces to the specified length.
for (size_t i = Value.size(); i < StringPadLength; ++i)
Values.push_back(MCConstantExpr::create(' ', getContext()));
} else {
// Treat the string as an initial value in big-endian representation.
if (Value.size() > Size)
return Error(getTok().getLoc(), "out of range literal value");

uint64_t IntValue = 0;
for (const unsigned char CharVal : Value)
IntValue = (IntValue << 8) | CharVal;
Values.push_back(MCConstantExpr::create(IntValue, getContext()));
}
// Treat each character as an initializer.
for (const unsigned char CharVal : Value)
Values.push_back(MCConstantExpr::create(CharVal, getContext()));

// Pad the string with spaces to the specified length.
for (size_t i = Value.size(); i < StringPadLength; ++i)
Values.push_back(MCConstantExpr::create(' ', getContext()));
} else {
const MCExpr *Value;
if (parseExpression(Value))
Expand Down
29 changes: 17 additions & 12 deletions llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
Expand Up @@ -1693,20 +1693,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(Tok.getLoc(), "unknown token in expression");
}
LLVM_FALLTHROUGH;
case AsmToken::String: {
if (Parser.isParsingMasm()) {
// MASM parsers handle strings in expressions as constants.
SMLoc ValueLoc = Tok.getLoc();
int64_t Res;
const MCExpr *Val;
if (Parser.parsePrimaryExpr(Val, End, nullptr))
return true;
UpdateLocLex = false;
if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
return Error(ValueLoc, "expected absolute value");
if (SM.onInteger(Res, ErrMsg))
return Error(ValueLoc, ErrMsg);
break;
}
LLVM_FALLTHROUGH;
}
case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
if (Parser.isParsingMasm() && Tok.is(AsmToken::String)) {
// Single-character strings should be treated as integer constants. This
// includes MASM escapes for quotes.
char Quote = Tok.getString().front();
StringRef Contents = Tok.getStringContents();
if (Contents.size() == 1 || Contents == std::string(2, Quote)) {
if (SM.onInteger(Contents.front(), ErrMsg))
return Error(Tok.getLoc(), ErrMsg);
break;
}
}
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
UpdateLocLex = false;
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/tools/llvm-ml/strings.test
Expand Up @@ -119,4 +119,25 @@ dq_char_test PROC
ret
dq_char_test ENDP

string_constant_test PROC
; CHECK-LABEL: string_constant_test:

mov eax, 'ab'
mov eax, "ab"
; CHECK: mov eax, 24930
; CHECK: mov eax, 24930

mov eax, "abc"
mov eax, 'abc'
; CHECK: mov eax, 6382179
; CHECK: mov eax, 6382179

mov eax, "abc"""
mov eax, 'abc'''
; CHECK: mov eax, 1633837858
; CHECK: mov eax, 1633837863

ret
string_constant_test ENDP

end
15 changes: 15 additions & 0 deletions llvm/test/tools/llvm-ml/strings_errors.test
@@ -0,0 +1,15 @@
; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --implicit-check-not=error:

.code

oversize_string_test PROC

mov rax, "abcdefghi"
mov rax, 'abcdefghi'
; CHECK: error: literal value out of range
; CHECK: error: literal value out of range

ret
oversize_string_test ENDP

end

0 comments on commit 5afb360

Please sign in to comment.