Skip to content

Commit

Permalink
[ms] [llvm-ml] Add support for .radix directive, and accept all radix…
Browse files Browse the repository at this point in the history
… specifiers

Add support for .radix directive, and radix specifiers [yY] (binary), [oOqQ] (octal), and [tT] (decimal).

Also, when lexing MASM integers, require radix specifier; MASM requires that all literals without a radix specifier be treated as in the default radix. (e.g., 0100 = 100)

Relanding D87400, now with fewer ms-inline-asm tests broken!

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D88337
  • Loading branch information
ericastor committed Sep 29, 2020
1 parent f0506e4 commit 6b70a83
Show file tree
Hide file tree
Showing 8 changed files with 308 additions and 36 deletions.
13 changes: 11 additions & 2 deletions llvm/include/llvm/MC/MCParser/MCAsmLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class MCAsmLexer {
bool AllowAtInIdentifier;
bool IsAtStartOfStatement = true;
bool LexMasmIntegers = false;
bool UseMasmDefaultRadix = false;
unsigned DefaultRadix = 10;
AsmCommentConsumer *CommentConsumer = nullptr;

MCAsmLexer();
Expand Down Expand Up @@ -147,9 +149,16 @@ class MCAsmLexer {
this->CommentConsumer = CommentConsumer;
}

/// Set whether to lex masm-style binary and hex literals. They look like
/// 0b1101 and 0ABCh respectively.
/// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
/// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }

/// Set whether to use masm-style default-radix integer literals. If disabled,
/// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }

unsigned getMasmDefaultRadix() const { return DefaultRadix; }
void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
};

} // end namespace llvm
Expand Down
125 changes: 102 additions & 23 deletions llvm/lib/MC/MCParser/AsmLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SaveAndRestore.h"
#include <cassert>
Expand Down Expand Up @@ -271,13 +272,34 @@ static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
return DefaultRadix;
}

static AsmToken intToken(StringRef Ref, APInt &Value)
{
static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
while (hexDigitValue(*CurPtr) < DefaultRadix) {
++CurPtr;
}
return CurPtr;
}

static AsmToken intToken(StringRef Ref, APInt &Value) {
if (Value.isIntN(64))
return AsmToken(AsmToken::Integer, Ref, Value);
return AsmToken(AsmToken::BigNum, Ref, Value);
}

static std::string radixName(unsigned Radix) {
switch (Radix) {
case 2:
return "binary";
case 8:
return "octal";
case 10:
return "decimal";
case 16:
return "hexadecimal";
default:
return "base-" + std::to_string(Radix);
}
}

/// LexDigit: First character is [0-9].
/// Local Label: [0-9][:]
/// Forward/Backward Label: [0-9][fb]
Expand All @@ -286,45 +308,108 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
/// Decimal integer: [1-9][0-9]*
AsmToken AsmLexer::LexDigit() {
// MASM-flavor binary integer: [01]+[bB]
// MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
// MASM-flavor octal integer: [0-7]+[oOqQ]
// MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
// MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
if (LexMasmIntegers && isdigit(CurPtr[-1])) {
const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
CurPtr - 1 : nullptr;
const char *FirstNonBinary =
(CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
const char *FirstNonDecimal =
(CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
const char *OldCurPtr = CurPtr;
while (isHexDigit(*CurPtr)) {
if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
FirstNonBinary = CurPtr;
switch (*CurPtr) {
default:
if (!FirstNonDecimal) {
FirstNonDecimal = CurPtr;
}
LLVM_FALLTHROUGH;
case '9':
case '8':
case '7':
case '6':
case '5':
case '4':
case '3':
case '2':
if (!FirstNonBinary) {
FirstNonBinary = CurPtr;
}
break;
case '1':
case '0':
break;
}
++CurPtr;
}
if (*CurPtr == '.') {
// MASM float literals (other than hex floats) always contain a ".", and
// are always written in decimal.
++CurPtr;
return LexFloatLiteral();
}

unsigned Radix = 0;
if (*CurPtr == 'h' || *CurPtr == 'H') {
// hexadecimal number
++CurPtr;
Radix = 16;
} else if (*CurPtr == 't' || *CurPtr == 'T') {
// decimal number
++CurPtr;
Radix = 10;
} else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
*CurPtr == 'Q') {
// octal number
++CurPtr;
Radix = 8;
} else if (*CurPtr == 'y' || *CurPtr == 'Y') {
// binary number
++CurPtr;
Radix = 2;
} else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
DefaultRadix < 14 &&
(*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
Radix = 10;
} else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
(*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
DefaultRadix < 12 &&
(*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
Radix = 2;
}

if (Radix == 2 || Radix == 16) {
if (Radix) {
StringRef Result(TokStart, CurPtr - TokStart);
APInt Value(128, 0, true);

if (Result.drop_back().getAsInteger(Radix, Value))
return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
"invalid hexdecimal number");
return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");

// MSVC accepts and ignores type suffices on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);

return intToken(Result, Value);
}
}

// octal/decimal integers, or floating point numbers, fall through
// default-radix integers, or floating point numbers, fall through
CurPtr = OldCurPtr;
}

// MASM default-radix integers: [0-9a-fA-F]+
// (All other integer literals have a radix specifier.)
if (LexMasmIntegers && UseMasmDefaultRadix) {
CurPtr = findLastDigit(CurPtr, 16);
StringRef Result(TokStart, CurPtr - TokStart);

APInt Value(128, 0, true);
if (Result.getAsInteger(DefaultRadix, Value)) {
return ReturnError(TokStart,
"invalid " + radixName(DefaultRadix) + " number");
}

return intToken(Result, Value);
}

// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
Expand All @@ -339,13 +424,9 @@ AsmToken AsmLexer::LexDigit() {
StringRef Result(TokStart, CurPtr - TokStart);

APInt Value(128, 0, true);
if (Result.getAsInteger(Radix, Value))
return ReturnError(TokStart, !isHex ? "invalid decimal number" :
"invalid hexdecimal number");

// Consume the [hH].
if (LexMasmIntegers && Radix == 16)
++CurPtr;
if (Result.getAsInteger(Radix, Value)) {
return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
}

// The darwin/x86 (and x86-64) assembler accepts and ignores type
// suffices on integer literals.
Expand Down Expand Up @@ -416,11 +497,9 @@ AsmToken AsmLexer::LexDigit() {
// Either octal or hexadecimal.
APInt Value(128, 0, true);
unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
bool isHex = Radix == 16;
StringRef Result(TokStart, CurPtr - TokStart);
if (Result.getAsInteger(Radix, Value))
return ReturnError(TokStart, !isHex ? "invalid octal number" :
"invalid hexdecimal number");
return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");

// Consume the [hH].
if (Radix == 16)
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/MC/MCParser/COFFMasmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ class COFFMasmParser : public MCAsmParserExtension {
// option
// popcontext
// pushcontext
// .radix
// .safeseh

// Procedure directives
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/MC/MCParser/MasmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ class MasmParser : public MCAsmParser {
DK_SAVEREG,
DK_SAVEXMM128,
DK_SETFRAME,
DK_RADIX,
};

/// Maps directive name --> DirectiveKind enum, for directives parsed by this
Expand Down Expand Up @@ -964,6 +965,9 @@ class MasmParser : public MCAsmParser {
// ".erre" or ".errnz", depending on ExpectZero.
bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);

// ".radix"
bool parseDirectiveRadix(SMLoc DirectiveLoc);

// "echo"
bool parseDirectiveEcho();

Expand Down Expand Up @@ -2284,6 +2288,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveErrorIfe(IDLoc, true);
case DK_ERRNZ:
return parseDirectiveErrorIfe(IDLoc, false);
case DK_RADIX:
return parseDirectiveRadix(IDLoc);
case DK_ECHO:
return parseDirectiveEcho();
}
Expand Down Expand Up @@ -6343,6 +6349,7 @@ void MasmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".savereg"] = DK_SAVEREG;
DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
DirectiveKindMap[".setframe"] = DK_SETFRAME;
DirectiveKindMap[".radix"] = DK_RADIX;
// DirectiveKindMap[".altmacro"] = DK_ALTMACRO;
// DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO;
DirectiveKindMap["db"] = DK_DB;
Expand Down Expand Up @@ -6584,6 +6591,22 @@ bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
return false;
}

bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
const SMLoc Loc = getLexer().getLoc();
StringRef RadixString = parseStringToEndOfStatement().trim();
unsigned Radix;
if (RadixString.getAsInteger(10, Radix)) {
return Error(Loc,
"radix must be a decimal number in the range 2 to 16; was " +
RadixString);
}
if (Radix < 2 || Radix > 16)
return Error(Loc, "radix must be in the range 2 to 16; was " +
std::to_string(Radix));
getLexer().setMasmDefaultRadix(Radix);
return false;
}

bool MasmParser::parseDirectiveEcho() {
StringRef Message = parseStringToEndOfStatement();
Lex(); // eat end of statement
Expand Down
28 changes: 18 additions & 10 deletions llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1662,6 +1662,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if ((Done = SM.isValidEndState()))
break;
return Error(Tok.getLoc(), "unknown token in expression");
case AsmToken::Error:
return Error(getLexer().getErrLoc(), getLexer().getErr());
break;
case AsmToken::EndOfStatement:
Done = true;
break;
Expand Down Expand Up @@ -2453,21 +2456,26 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
// Parse memory broadcasting ({1to<NUM>}).
if (getLexer().getTok().getIntVal() != 1)
return TokError("Expected 1to<NUM> at this point");
Parser.Lex(); // Eat "1" of 1to8
if (!getLexer().is(AsmToken::Identifier) ||
!getLexer().getTok().getIdentifier().startswith("to"))
StringRef Prefix = getLexer().getTok().getString();
Parser.Lex(); // Eat first token of 1to8
if (!getLexer().is(AsmToken::Identifier))
return TokError("Expected 1to<NUM> at this point");
// Recognize only reasonable suffixes.
SmallVector<char, 5> BroadcastVector;
StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
.toStringRef(BroadcastVector);
if (!BroadcastString.startswith("1to"))
return TokError("Expected 1to<NUM> at this point");
const char *BroadcastPrimitive =
StringSwitch<const char*>(getLexer().getTok().getIdentifier())
.Case("to2", "{1to2}")
.Case("to4", "{1to4}")
.Case("to8", "{1to8}")
.Case("to16", "{1to16}")
.Default(nullptr);
StringSwitch<const char *>(BroadcastString)
.Case("1to2", "{1to2}")
.Case("1to4", "{1to4}")
.Case("1to8", "{1to8}")
.Case("1to16", "{1to16}")
.Default(nullptr);
if (!BroadcastPrimitive)
return TokError("Invalid memory broadcast primitive.");
Parser.Lex(); // Eat "toN" of 1toN
Parser.Lex(); // Eat trailing token of 1toN
if (!getLexer().is(AsmToken::RCurly))
return TokError("Expected } at this point");
Parser.Lex(); // Eat "}"
Expand Down
Loading

0 comments on commit 6b70a83

Please sign in to comment.