Skip to content

Commit

Permalink
[MC] Add three-state parseDirective as a replacement for ParseDirective
Browse files Browse the repository at this point in the history
Conventionally, parsing methods return false on success and true on
error. However, directive parsing methods need a third state: the
directive is not target specific. AsmParser::parseStatement detected
this case by using a fragile heuristic: if the target parser did not
consume any tokens, the directive is assumed to be not target-specific.

Some targets fail to follow the convention: they return success after
emitting an error or do not consume the entire line and return failure
on successful parsing. This was partially worked around by checking for
pending errors in parseStatement.

This patch tries to improve the situation by introducing parseDirective
method that returns ParseStatus -- three-state class. The new method
should eventually replace the old one returning bool.

ParseStatus is intentionally implicitly constructible from bool to allow
uses like `return Error(Loc, "message")`. It also has a potential to
replace OperandMatchResulTy as it is more convenient to use due to the
implicit construction from bool and more type safe.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D154101
  • Loading branch information
s-barannikov committed Jul 1, 2023
1 parent 624813a commit af20c1c
Show file tree
Hide file tree
Showing 30 changed files with 402 additions and 135 deletions.
41 changes: 40 additions & 1 deletion llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
Expand Up @@ -122,6 +122,32 @@ struct ParseInstructionInfo {
: AsmRewrites(rewrites) {}
};

/// Ternary parse status returned by various parse* methods.
class ParseStatus {
enum class StatusTy { Success, Failure, NoMatch } Status;

public:
#if __cplusplus >= 202002L
using enum StatusTy;
#else
static constexpr StatusTy Success = StatusTy::Success;
static constexpr StatusTy Failure = StatusTy::Failure;
static constexpr StatusTy NoMatch = StatusTy::NoMatch;
#endif

constexpr ParseStatus() : Status(NoMatch) {}

constexpr ParseStatus(StatusTy Status) : Status(Status) {}

constexpr ParseStatus(bool Error) : Status(Error ? Failure : Success) {}

template <typename T> constexpr ParseStatus(T) = delete;

constexpr bool isSuccess() const { return Status == StatusTy::Success; }
constexpr bool isFailure() const { return Status == StatusTy::Failure; }
constexpr bool isNoMatch() const { return Status == StatusTy::NoMatch; }
};

enum OperandMatchResultTy {
MatchOperand_Success, // operand matched successfully
MatchOperand_NoMatch, // operand did not match
Expand Down Expand Up @@ -408,6 +434,7 @@ class MCTargetAsmParser : public MCAsmParserExtension {
}

/// ParseDirective - Parse a target specific assembler directive
/// This method is deprecated, use 'parseDirective' instead.
///
/// The parser is positioned following the directive name. The target
/// specific directive parser should parse the entire directive doing or
Expand All @@ -417,7 +444,19 @@ class MCTargetAsmParser : public MCAsmParserExtension {
/// end-of-statement token and false is returned.
///
/// \param DirectiveID - the identifier token of the directive.
virtual bool ParseDirective(AsmToken DirectiveID) = 0;
virtual bool ParseDirective(AsmToken DirectiveID) { return true; }

/// Parses a target-specific assembler directive.
///
/// The parser is positioned following the directive name. The target-specific
/// directive parser should parse the entire directive doing or recording any
/// target-specific work, or emit an error. On success, the entire line should
/// be parsed up to and including the end-of-statement token. On failure, the
/// parser is not required to read to the end of the line. If the directive is
/// not target-specific, no tokens should be consumed and NoMatch is returned.
///
/// \param DirectiveID - The token identifying the directive.
virtual ParseStatus parseDirective(AsmToken DirectiveID);

/// MatchAndEmitInstruction - Recognize a series of operands of a parsed
/// instruction as an actual MCInst and emit it to the specified MCStreamer.
Expand Down
18 changes: 5 additions & 13 deletions llvm/lib/MC/MCParser/AsmParser.cpp
Expand Up @@ -1999,20 +1999,12 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,

getTargetParser().flushPendingInstructions(getStreamer());

SMLoc StartTokLoc = getTok().getLoc();
bool TPDirectiveReturn = getTargetParser().ParseDirective(ID);

if (hasPendingError())
return true;
// Currently the return value should be true if we are
// uninterested but as this is at odds with the standard parsing
// convention (return true = error) we have instances of a parsed
// directive that fails returning true as an error. Catch these
// cases as best as possible errors here.
if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
"Should only return Failure iff there was an error");
if (TPDirectiveReturn.isFailure())
return true;
// Return if we did some parsing or believe we succeeded.
if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
if (TPDirectiveReturn.isSuccess())
return false;

// Next, check the extension directive map to see if any extension has
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
Expand Up @@ -27,3 +27,24 @@ MCSubtargetInfo &MCTargetAsmParser::copySTI() {
const MCSubtargetInfo &MCTargetAsmParser::getSTI() const {
return *STI;
}

ParseStatus MCTargetAsmParser::parseDirective(AsmToken DirectiveID) {
SMLoc StartTokLoc = getTok().getLoc();
// Delegate to ParseDirective by default for transition period. Once the
// transition is over, this method should just return NoMatch.
bool Res = ParseDirective(DirectiveID);

// Some targets erroneously report success after emitting an error.
if (getParser().hasPendingError())
return ParseStatus::Failure;

// ParseDirective returns true if there was an error or if the directive is
// not target-specific. Disambiguate the two cases by comparing position of
// the lexer before and after calling the method: if no tokens were consumed,
// there was no match, otherwise there was a failure.
if (!Res)
return ParseStatus::Success;
if (getTok().getLoc() != StartTokLoc)
return ParseStatus::Failure;
return ParseStatus::NoMatch;
}
20 changes: 7 additions & 13 deletions llvm/lib/MC/MCParser/MasmParser.cpp
Expand Up @@ -2304,21 +2304,15 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
return (*Handler.second)(Handler.first, IDVal, IDLoc);

// Next, let the target-specific assembly parser try.
SMLoc StartTokLoc = getTok().getLoc();
bool TPDirectiveReturn =
ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
if (ID.isNot(AsmToken::Identifier))
return false;

if (hasPendingError())
return true;
// Currently the return value should be true if we are
// uninterested but as this is at odds with the standard parsing
// convention (return true = error) we have instances of a parsed
// directive that fails returning true as an error. Catch these
// cases as best as possible errors here.
if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
"Should only return Failure iff there was an error");
if (TPDirectiveReturn.isFailure())
return true;
// Return if we did some parsing or believe we succeeded.
if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
if (TPDirectiveReturn.isSuccess())
return false;

// Finally, if no one else is interested in this directive, it must be
Expand Down
30 changes: 16 additions & 14 deletions llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
Expand Up @@ -64,7 +64,7 @@ class AVRAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;

bool ParseDirective(AsmToken DirectiveID) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;

OperandMatchResultTy parseMemriOperand(OperandVector &Operands);

Expand All @@ -90,7 +90,7 @@ class AVRAsmParser : public MCTargetAsmParser {
uint64_t const &ErrorInfo);
bool missingFeature(SMLoc const &Loc, uint64_t const &ErrorInfo);

bool parseLiteralValues(unsigned SizeInBytes, SMLoc L);
ParseStatus parseLiteralValues(unsigned SizeInBytes, SMLoc L);

public:
AVRAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
Expand Down Expand Up @@ -674,19 +674,18 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
}

bool AVRAsmParser::ParseDirective(llvm::AsmToken DirectiveID) {
ParseStatus AVRAsmParser::parseDirective(llvm::AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal.lower() == ".long") {
parseLiteralValues(SIZE_LONG, DirectiveID.getLoc());
} else if (IDVal.lower() == ".word" || IDVal.lower() == ".short") {
parseLiteralValues(SIZE_WORD, DirectiveID.getLoc());
} else if (IDVal.lower() == ".byte") {
parseLiteralValues(1, DirectiveID.getLoc());
}
return true;
if (IDVal.lower() == ".long")
return parseLiteralValues(SIZE_LONG, DirectiveID.getLoc());
if (IDVal.lower() == ".word" || IDVal.lower() == ".short")
return parseLiteralValues(SIZE_WORD, DirectiveID.getLoc());
if (IDVal.lower() == ".byte")
return parseLiteralValues(1, DirectiveID.getLoc());
return ParseStatus::NoMatch;
}

bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
ParseStatus AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
MCAsmParser &Parser = getParser();
AVRMCELFStreamer &AVRStreamer =
static_cast<AVRMCELFStreamer &>(Parser.getStreamer());
Expand All @@ -698,7 +697,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(".text");
AVRStreamer.emitValueForModiferKind(Symbol, SizeInBytes, L,
AVRMCExpr::VK_AVR_None);
return false;
return ParseStatus::NoMatch;
}

if (Parser.getTok().getKind() == AsmToken::Identifier &&
Expand All @@ -715,7 +714,10 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
MCSymbol *Symbol =
getContext().getOrCreateSymbol(Parser.getTok().getString());
AVRStreamer.emitValueForModiferKind(Symbol, SizeInBytes, L, ModifierKind);
return false;
Lex(); // Eat the symbol name.
if (parseToken(AsmToken::RParen))
return ParseStatus::Failure;
return parseEOL();
}

auto parseOne = [&]() -> bool {
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
Expand Up @@ -47,7 +47,7 @@ class BPFAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;

bool ParseDirective(AsmToken DirectiveID) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;

// "=" is used as assignment operator for assembly statment, so can't be used
// for symbol assignment.
Expand Down Expand Up @@ -516,7 +516,9 @@ bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
}

bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
ParseStatus BPFAsmParser::parseDirective(AsmToken DirectiveID) {
return ParseStatus::NoMatch;
}

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
Expand Down
20 changes: 7 additions & 13 deletions llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
Expand Up @@ -77,7 +77,7 @@ class CSKYAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;

bool ParseDirective(AsmToken DirectiveID) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;

// Helper to actually emit an instruction to the MCStreamer. Also, when
// possible, compression of the instruction is performed.
Expand Down Expand Up @@ -1573,17 +1573,13 @@ OperandMatchResultTy CSKYAsmParser::tryParseRegister(MCRegister &RegNo,
return MatchOperand_Success;
}

bool CSKYAsmParser::ParseDirective(AsmToken DirectiveID) {
// This returns false if this function recognizes the directive
// regardless of whether it is successfully handles or reports an
// error. Otherwise it returns true to give the generic parser a
// chance at recognizing it.
ParseStatus CSKYAsmParser::parseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();

if (IDVal == ".csky_attribute")
return parseDirectiveAttribute();

return true;
return ParseStatus::NoMatch;
}

/// parseDirectiveAttribute
Expand All @@ -1597,10 +1593,8 @@ bool CSKYAsmParser::parseDirectiveAttribute() {
StringRef Name = Parser.getTok().getIdentifier();
std::optional<unsigned> Ret =
ELFAttrs::attrTypeFromString(Name, CSKYAttrs::getCSKYAttributeTags());
if (!Ret) {
Error(TagLoc, "attribute name not recognised: " + Name);
return false;
}
if (!Ret)
return Error(TagLoc, "attribute name not recognised: " + Name);
Tag = *Ret;
Parser.Lex();
} else {
Expand All @@ -1611,8 +1605,8 @@ bool CSKYAsmParser::parseDirectiveAttribute() {
return true;

const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr);
if (check(!CE, TagLoc, "expected numeric constant"))
return true;
if (!CE)
return Error(TagLoc, "expected numeric constant");

Tag = CE->getValue();
}
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
Expand Up @@ -62,7 +62,7 @@ class LanaiAsmParser : public MCTargetAsmParser {

bool parsePrePost(StringRef Type, int *OffsetValue);

bool ParseDirective(AsmToken DirectiveID) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;

bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
Expand Down Expand Up @@ -649,7 +649,9 @@ struct LanaiOperand : public MCParsedAsmOperand {

} // end anonymous namespace

bool LanaiAsmParser::ParseDirective(AsmToken /*DirectiveId*/) { return true; }
ParseStatus LanaiAsmParser::parseDirective(AsmToken DirectiveID) {
return ParseStatus::NoMatch;
}

bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
OperandVector &Operands,
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
Expand Up @@ -51,7 +51,9 @@ class LoongArchAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;

bool ParseDirective(AsmToken DirectiveID) override { return true; }
ParseStatus parseDirective(AsmToken DirectiveID) override {
return ParseStatus::NoMatch;
}

bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
Expand Up @@ -72,7 +72,7 @@ class M68kAsmParser : public MCTargetAsmParser {
SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
Expand Down Expand Up @@ -992,7 +992,9 @@ bool M68kAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
}

bool M68kAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
ParseStatus M68kAsmParser::parseDirective(AsmToken DirectiveID) {
return ParseStatus::NoMatch;
}

bool M68kAsmParser::invalidOperand(SMLoc const &Loc,
OperandVector const &Operands,
Expand Down
33 changes: 16 additions & 17 deletions llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
Expand Up @@ -53,7 +53,7 @@ class MSP430AsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;

bool ParseDirective(AsmToken DirectiveID) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
bool ParseDirectiveRefSym(AsmToken DirectiveID);

unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
Expand Down Expand Up @@ -424,27 +424,26 @@ bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info,
}

bool MSP430AsmParser::ParseDirectiveRefSym(AsmToken DirectiveID) {
StringRef Name;
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");
StringRef Name;
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");

MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
getStreamer().emitSymbolAttribute(Sym, MCSA_Global);
return false;
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
getStreamer().emitSymbolAttribute(Sym, MCSA_Global);
return parseEOL();
}

bool MSP430AsmParser::ParseDirective(AsmToken DirectiveID) {
ParseStatus MSP430AsmParser::parseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal.lower() == ".long") {
ParseLiteralValues(4, DirectiveID.getLoc());
} else if (IDVal.lower() == ".word" || IDVal.lower() == ".short") {
ParseLiteralValues(2, DirectiveID.getLoc());
} else if (IDVal.lower() == ".byte") {
ParseLiteralValues(1, DirectiveID.getLoc());
} else if (IDVal.lower() == ".refsym") {
if (IDVal.lower() == ".long")
return ParseLiteralValues(4, DirectiveID.getLoc());
if (IDVal.lower() == ".word" || IDVal.lower() == ".short")
return ParseLiteralValues(2, DirectiveID.getLoc());
if (IDVal.lower() == ".byte")
return ParseLiteralValues(1, DirectiveID.getLoc());
if (IDVal.lower() == ".refsym")
return ParseDirectiveRefSym(DirectiveID);
}
return true;
return ParseStatus::NoMatch;
}

bool MSP430AsmParser::ParseOperand(OperandVector &Operands) {
Expand Down

0 comments on commit af20c1c

Please sign in to comment.