88 changes: 37 additions & 51 deletions llvm/lib/MC/MCParser/AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ class AsmParser : public MCAsmParser {
bool parseStatement(ParseStatementInfo &Info,
MCAsmParserSemaCallback *SI);
bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
void eatToEndOfLine();
bool parseCppHashLineFilenameComment(SMLoc L);

void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
Expand Down Expand Up @@ -628,14 +627,18 @@ const AsmToken &AsmParser::Lex() {
Error(Lexer.getErrLoc(), Lexer.getErr());

const AsmToken *tok = &Lexer.Lex();
// Drop comments here.
while (tok->is(AsmToken::Comment)) {
tok = &Lexer.Lex();
}

if (tok->is(AsmToken::Eof)) {
// If this is the end of an included file, pop the parent file off the
// include stack.
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
jumpToLoc(ParentIncludeLoc);
tok = &Lexer.Lex();
return Lex();
}
}

Expand Down Expand Up @@ -720,8 +723,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// first referenced for a source location. We need to add something
// to track that. Currently, we just point to the end of the file.
HadError |=
Error(getLexer().getLoc(), "assembler local symbol '" +
Sym->getName() + "' not defined");
Error(getTok().getLoc(), "assembler local symbol '" +
Sym->getName() + "' not defined");
}
}

Expand Down Expand Up @@ -766,7 +769,7 @@ StringRef AsmParser::parseStringToEndOfStatement() {
const char *Start = getTok().getLoc().getPointer();

while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
Lex();
Lexer.Lex();

const char *End = getTok().getLoc().getPointer();
return StringRef(Start, End - Start);
Expand All @@ -777,7 +780,7 @@ StringRef AsmParser::parseStringToComma() {

while (Lexer.isNot(AsmToken::EndOfStatement) &&
Lexer.isNot(AsmToken::Comma) && Lexer.isNot(AsmToken::Eof))
Lex();
Lexer.Lex();

const char *End = getTok().getLoc().getPointer();
return StringRef(Start, End - Start);
Expand Down Expand Up @@ -859,7 +862,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
if (!MAI.useParensForSymbolVariant()) {
if (FirstTokenKind == AsmToken::String) {
if (Lexer.is(AsmToken::At)) {
Lexer.Lex(); // eat @
Lex(); // eat @
SMLoc AtLoc = getLexer().getLoc();
StringRef VName;
if (parseIdentifier(VName))
Expand All @@ -871,14 +874,14 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Split = Identifier.split('@');
}
} else if (Lexer.is(AsmToken::LParen)) {
Lexer.Lex(); // eat (
Lex(); // eat '('.
StringRef VName;
parseIdentifier(VName);
if (Lexer.isNot(AsmToken::RParen)) {
return Error(Lexer.getTok().getLoc(),
"unexpected token in variant, expected ')'");
}
Lexer.Lex(); // eat )
Lex(); // eat ')'.
Split = std::make_pair(Identifier, VName);
}

Expand Down Expand Up @@ -1343,21 +1346,24 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
/// ::= Label* Identifier OperandList* EndOfStatement
bool AsmParser::parseStatement(ParseStatementInfo &Info,
MCAsmParserSemaCallback *SI) {
// Eat initial spaces and comments
while (Lexer.is(AsmToken::Space))
Lex();
if (Lexer.is(AsmToken::EndOfStatement)) {
Out.AddBlankLine();
// if this is a line comment we can drop it safely
if (getTok().getString().front() == '\r' ||
getTok().getString().front() == '\n')
Out.AddBlankLine();
Lex();
return false;
}

// Statements always start with an identifier or are a full line comment.
// Statements always start with an identifier.
AsmToken ID = getTok();
SMLoc IDLoc = ID.getLoc();
StringRef IDVal;
int64_t LocalLabelVal = -1;
// A full line comment is a '#' as the first token.
if (Lexer.is(AsmToken::Hash))
if (Lexer.is(AsmToken::HashDirective))
return parseCppHashLineFilenameComment(IDLoc);

// Allow an integer followed by a ':' as a directional local label.
if (Lexer.is(AsmToken::Integer)) {
LocalLabelVal = getTok().getIntVal();
Expand Down Expand Up @@ -1648,7 +1654,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveIncbin();
case DK_CODE16:
case DK_CODE16GCC:
return TokError(Twine(IDVal) + " not supported yet");
return TokError(Twine(IDVal) +
" not currently supported for this target");
case DK_REPT:
return parseDirectiveRept(IDLoc, IDVal);
case DK_IRP:
Expand Down Expand Up @@ -1868,37 +1875,20 @@ AsmParser::parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
return true;
}

/// eatToEndOfLine uses the Lexer to eat the characters to the end of the line
/// since they may not be able to be tokenized to get to the end of line token.
void AsmParser::eatToEndOfLine() {
if (!Lexer.is(AsmToken::EndOfStatement))
Lexer.LexUntilEndOfLine();
// Eat EOL.
Lex();
}

/// parseCppHashLineFilenameComment as this:
/// ::= # number "filename"
/// or just as a full line comment if it doesn't have a number and a string.
bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
Lex(); // Eat the hash token.

if (getLexer().isNot(AsmToken::Integer)) {
// Consume the line since in cases it is not a well-formed line directive,
// as if were simply a full line comment.
eatToEndOfLine();
return false;
}

// Lexer only ever emits HashDirective if it fully formed if it's
// done the checking already so this is an internal error.
assert(getTok().is(AsmToken::Integer) &&
"Lexing Cpp line comment: Expected Integer");
int64_t LineNumber = getTok().getIntVal();
Lex();

if (getLexer().isNot(AsmToken::String)) {
eatToEndOfLine();
return false;
}

assert(getTok().is(AsmToken::String) &&
"Lexing Cpp line comment: Expected String");
StringRef Filename = getTok().getString();
Lex();
// Get rid of the enclosing quotes.
Filename = Filename.substr(1, Filename.size() - 2);

Expand All @@ -1907,9 +1897,6 @@ bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
CppHashInfo.Filename = Filename;
CppHashInfo.LineNumber = LineNumber;
CppHashInfo.Buf = CurBuffer;

// Ignore any trailing characters, they're just comment.
eatToEndOfLine();
return false;
}

Expand Down Expand Up @@ -2268,7 +2255,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
break;

if (FAI >= NParameters) {
assert(M && "expected macro to be defined");
assert(M && "expected macro to be defined");
Error(IDLoc,
"parameter named '" + FA.Name + "' does not exist for macro '" +
M->Name + "'");
Expand Down Expand Up @@ -2426,7 +2413,7 @@ bool AsmParser::parseIdentifier(StringRef &Res) {
// Construct the joined identifier and consume the token.
Res =
StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
Lexer.Lex(); // Lexer's Lex guarantees consecutive token
Lex(); // Parser Lex to maintain invariants.
return false;
}

Expand Down Expand Up @@ -2568,16 +2555,16 @@ bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {

if (Lexer.isNot(AsmToken::Comma))
return TokError("expected comma");
Lexer.Lex();
Lex();

if (Lexer.isNot(AsmToken::Identifier))
return TokError("expected relocation name");
SMLoc NameLoc = Lexer.getTok().getLoc();
StringRef Name = Lexer.getTok().getIdentifier();
Lexer.Lex();
Lex();

if (Lexer.is(AsmToken::Comma)) {
Lexer.Lex();
Lex();
SMLoc ExprLoc = Lexer.getLoc();
if (parseExpression(Expr))
return true;
Expand Down Expand Up @@ -5250,10 +5237,9 @@ static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *Value) {
bool parseAssignmentExpression(StringRef Name, bool allow_redef,
MCAsmParser &Parser, MCSymbol *&Sym,
const MCExpr *&Value) {
MCAsmLexer &Lexer = Parser.getLexer();

// FIXME: Use better location, we should use proper tokens.
SMLoc EqualLoc = Lexer.getLoc();
SMLoc EqualLoc = Parser.getTok().getLoc();

if (Parser.parseExpression(Value)) {
Parser.TokError("missing expression");
Expand All @@ -5265,7 +5251,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
// a = b
// b = c

if (Lexer.isNot(AsmToken::EndOfStatement))
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Parser.TokError("unexpected token in assignment");

// Eat the end of statement marker.
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10072,7 +10072,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {

StringRef Arch = Parser.getTok().getString();
SMLoc ArchLoc = Parser.getTok().getLoc();
getLexer().Lex();
Lex();

unsigned ID = ARM::parseArch(Arch);

Expand Down Expand Up @@ -10195,7 +10195,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {

StringRef Name = Parser.getTok().getString();
SMLoc ExtLoc = Parser.getTok().getLoc();
getLexer().Lex();
Lex();

bool EnableFeature = true;
if (Name.startswith_lower("no")) {
Expand Down
25 changes: 12 additions & 13 deletions llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -729,11 +729,10 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {

bool HexagonAsmParser::matchBundleOptions() {
MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
while (true) {
if (!Parser.getTok().is(AsmToken::Colon))
return false;
Lexer.Lex();
Lex();
StringRef Option = Parser.getTok().getString();
if (Option.compare_lower("endloop0") == 0)
HexagonMCInstrInfo::setInnerLoop(MCB);
Expand All @@ -745,7 +744,7 @@ bool HexagonAsmParser::matchBundleOptions() {
HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
else
return true;
Lexer.Lex();
Lex();
}
}

Expand Down Expand Up @@ -1105,7 +1104,7 @@ bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) {
AsmToken const &Token = getParser().getTok();
StringRef String = Token.getString();
SMLoc Loc = Token.getLoc();
getLexer().Lex();
Lex();
do {
std::pair<StringRef, StringRef> HeadTail = String.split('.');
if (!HeadTail.first.empty())
Expand Down Expand Up @@ -1297,7 +1296,7 @@ bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) {
static char const * Comma = ",";
do {
Tokens.emplace_back (Lexer.getTok());
Lexer.Lex();
Lex();
switch (Tokens.back().getKind())
{
case AsmToken::TokenKind::Hash:
Expand Down Expand Up @@ -1346,27 +1345,27 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
AsmToken const &Token = Parser.getTok();
switch (Token.getKind()) {
case AsmToken::EndOfStatement: {
Lexer.Lex();
Lex();
return false;
}
case AsmToken::LCurly: {
if (!Operands.empty())
return true;
Operands.push_back(
HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
Lexer.Lex();
Lex();
return false;
}
case AsmToken::RCurly: {
if (Operands.empty()) {
Operands.push_back(
HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
Lexer.Lex();
Lex();
}
return false;
}
case AsmToken::Comma: {
Lexer.Lex();
Lex();
continue;
}
case AsmToken::EqualEqual:
Expand All @@ -1379,7 +1378,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
Token.getString().substr(0, 1), Token.getLoc()));
Operands.push_back(HexagonOperand::CreateToken(
Token.getString().substr(1, 1), Token.getLoc()));
Lexer.Lex();
Lex();
continue;
}
case AsmToken::Hash: {
Expand All @@ -1389,12 +1388,12 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
if (!ImplicitExpression)
Operands.push_back(
HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
Lexer.Lex();
Lex();
bool MustExtend = false;
bool HiOnly = false;
bool LoOnly = false;
if (Lexer.is(AsmToken::Hash)) {
Lexer.Lex();
Lex();
MustExtend = true;
} else if (ImplicitExpression)
MustNotExtend = true;
Expand All @@ -1412,7 +1411,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
HiOnly = false;
LoOnly = false;
} else {
Lexer.Lex();
Lex();
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,7 +1122,7 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo &Info,
// Parse until end of statement, consuming commas between operands
while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.is(AsmToken::Comma)) {
// Consume comma token
Lexer.Lex();
Lex();

// Parse next operand
if (parseOperand(&Operands, Mnemonic) != MatchOperand_Success)
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1710,7 +1710,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
while (getLexer().isNot(AsmToken::EndOfStatement) &&
getLexer().is(AsmToken::Comma)) {
// Consume the comma token
getLexer().Lex();
Lex();

// Parse the next operand
if (ParseOperand(Operands))
Expand Down
23 changes: 23 additions & 0 deletions llvm/test/MC/AsmParser/hash-directive.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# RUN: not llvm-mc -triple i386-unknown-unknown %s 2>&1 | FileCheck %s
error
# CHECK: hash-directive.s:[[@LINE-1]]:1: error
# 3 "FILE1" 1 #<- This is a CPP Hash w/ comment
error
# CHECK: FILE1:3:1: error
# 0 "" 2 #<- This is too
error
# CHECK: hash-directive.s:[[@LINE-1]]:1: error
# 1 "FILE2" 2 #<- This is a comment
error
# CHECK: hash-directive.s:[[@LINE-1]]:1: error
nop; # 6 "FILE3" 2 #<- This is a still comment
error
# CHECK: hash-directive.s:[[@LINE-1]]:1: error
nop;# 6 "FILE4" 2
nop;
error
# CHECK: FILE4:7:1: error
# 0 "" 2
/*comment*/# 6 "FILE5" 2 #<- This is a comment
error
# CHECK: hash-directive.s:[[@LINE-1]]:1: error