Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reapply: [llvm-rc] Add basic RC scripts parsing ability.
As for now, the parser supports a limited set of statements and resources. This will be extended in the following patches. Thanks to Nico Weber (thakis) for his original work in this area. This patch was originally submitted as r311175 and got reverted in r311177 because of the problems with compilation under gcc. Differential Revision: https://reviews.llvm.org/D36340 llvm-svn: 311184
- Loading branch information
Showing
18 changed files
with
731 additions
and
1 deletion.
There are no files selected for viewing
15 changes: 15 additions & 0 deletions
15
llvm/test/tools/llvm-rc/Inputs/parser-correct-everything.rc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
meh IcOn "hello.bmp" | ||
Icon Icon "Icon" | ||
|
||
LANGUAGE 5, 12 | ||
|
||
STRINGTABLE | ||
LANGUAGE 1, 1 | ||
CHARACTERISTICS 500 | ||
LANGUAGE 3, 4 | ||
VERSION 14 | ||
{ | ||
1 "hello" | ||
2 "world" | ||
} | ||
STRINGTABLE BEGIN END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
LANGUAGE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
LANGUAGE 5 7 |
1 change: 1 addition & 0 deletions
1
llvm/test/tools/llvm-rc/Inputs/parser-language-too-many-commas.rc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
LANGUAGE 5,, 7 |
5 changes: 5 additions & 0 deletions
5
llvm/test/tools/llvm-rc/Inputs/parser-no-characteristics-arg.rc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
STRINGTABLE | ||
CHARACTERISTICS | ||
BEGIN | ||
100 "No integer after CHARACTERISTICS." | ||
END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
& ICON "WeirdResourceName.ico" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
HELLO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
HELLO WORLD |
6 changes: 6 additions & 0 deletions
6
llvm/test/tools/llvm-rc/Inputs/parser-stringtable-no-string.rc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
STRINGTABLE | ||
VERSION 8 | ||
{ | ||
1 "hello" | ||
2 | ||
} |
4 changes: 4 additions & 0 deletions
4
llvm/test/tools/llvm-rc/Inputs/parser-stringtable-weird-option.rc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
STRINGTABLE | ||
NONSENSETYPE 12 34 | ||
BEGIN | ||
END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
; RUN: llvm-rc /V %p/Inputs/parser-correct-everything.rc | FileCheck %s --check-prefix PGOOD | ||
|
||
; PGOOD: Icon (meh): "hello.bmp" | ||
; PGOOD-NEXT: Icon (Icon): "Icon" | ||
; PGOOD-NEXT: Language: 5, Sublanguage: 12 | ||
; PGOOD-NEXT: StringTable: | ||
; PGOOD-NEXT: Option: Language: 1, Sublanguage: 1 | ||
; PGOOD-NEXT: Option: Characteristics: 500 | ||
; PGOOD-NEXT: Option: Language: 3, Sublanguage: 4 | ||
; PGOOD-NEXT: Option: Version: 14 | ||
; PGOOD-NEXT: 1 => "hello" | ||
; PGOOD-NEXT: 2 => "world" | ||
; PGOOD-NEXT: StringTable: | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-no-string.rc 2> %t2 | ||
; RUN: FileCheck %s --check-prefix PSTRINGTABLE1 --input-file %t2 | ||
|
||
; PSTRINGTABLE1: llvm-rc: Error parsing file: expected string, got } | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-weird-option.rc 2> %t3 | ||
; RUN: FileCheck %s --check-prefix PSTRINGTABLE2 --input-file %t3 | ||
|
||
; PSTRINGTABLE2: llvm-rc: Error parsing file: expected optional statement type, BEGIN or '{', got NONSENSETYPE | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-eof.rc 2> %t4 | ||
; RUN: FileCheck %s --check-prefix PEOF --input-file %t4 | ||
|
||
; PEOF: llvm-rc: Error parsing file: expected integer, got <EOF> | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-no-characteristics-arg.rc 2> %t5 | ||
; RUN: FileCheck %s --check-prefix PCHARACTERISTICS1 --input-file %t5 | ||
|
||
; PCHARACTERISTICS1: llvm-rc: Error parsing file: expected integer, got BEGIN | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-token.rc 2> %t6 | ||
; RUN: FileCheck %s --check-prefix PNONSENSE1 --input-file %t6 | ||
|
||
; PNONSENSE1: llvm-rc: Error parsing file: expected int or identifier, got & | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type.rc 2> %t7 | ||
; RUN: FileCheck %s --check-prefix PNONSENSE2 --input-file %t7 | ||
|
||
; PNONSENSE2: llvm-rc: Error parsing file: expected resource type, got WORLD | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type-eof.rc 2> %t8 | ||
; RUN: FileCheck %s --check-prefix PNONSENSE3 --input-file %t8 | ||
|
||
; PNONSENSE3: llvm-rc: Error parsing file: expected int or identifier, got <EOF> | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-language-no-comma.rc 2> %t9 | ||
; RUN: FileCheck %s --check-prefix PLANGUAGE1 --input-file %t9 | ||
|
||
; PLANGUAGE1: llvm-rc: Error parsing file: expected ',', got 7 | ||
|
||
|
||
; RUN: not llvm-rc /V %p/Inputs/parser-language-too-many-commas.rc 2> %t10 | ||
; RUN: FileCheck %s --check-prefix PLANGUAGE2 --input-file %t10 | ||
|
||
; PLANGUAGE2: llvm-rc: Error parsing file: expected integer, got , |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,267 @@ | ||
//===-- ResourceScriptParser.cpp --------------------------------*- C++-*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===---------------------------------------------------------------------===// | ||
// | ||
// This implements the parser defined in ResourceScriptParser.h. | ||
// | ||
//===---------------------------------------------------------------------===// | ||
|
||
#include "ResourceScriptParser.h" | ||
|
||
// Take an expression returning llvm::Error and forward the error if it exists. | ||
#define RETURN_IF_ERROR(Expr) \ | ||
if (auto Err = (Expr)) \ | ||
return std::move(Err); | ||
|
||
// Take an expression returning llvm::Expected<T> and assign it to Var or | ||
// forward the error out of the function. | ||
#define ASSIGN_OR_RETURN(Var, Expr) \ | ||
auto Var = (Expr); \ | ||
if (!Var) \ | ||
return Var.takeError(); | ||
|
||
namespace llvm { | ||
namespace rc { | ||
|
||
RCParser::ParserError::ParserError(const Twine Expected, const LocIter CurLoc, | ||
const LocIter End) | ||
: ErrorLoc(CurLoc), FileEnd(End) { | ||
CurMessage = "Error parsing file: expected " + Expected.str() + ", got " + | ||
(CurLoc == End ? "<EOF>" : CurLoc->value()).str(); | ||
} | ||
|
||
char RCParser::ParserError::ID = 0; | ||
|
||
RCParser::RCParser(const std::vector<RCToken> &TokenList) | ||
: Tokens(TokenList), CurLoc(Tokens.begin()), End(Tokens.end()) {} | ||
|
||
RCParser::RCParser(std::vector<RCToken> &&TokenList) | ||
: Tokens(std::move(TokenList)), CurLoc(Tokens.begin()), End(Tokens.end()) {} | ||
|
||
bool RCParser::isEof() const { return CurLoc == End; } | ||
|
||
RCParser::ParseType RCParser::parseSingleResource() { | ||
// The first thing we read is usually a resource's name. However, in some | ||
// cases (LANGUAGE and STRINGTABLE) the resources don't have their names | ||
// and the first token to be read is the type. | ||
ASSIGN_OR_RETURN(NameToken, readTypeOrName()); | ||
|
||
if (NameToken->equalsLower("LANGUAGE")) | ||
return parseLanguageResource(); | ||
else if (NameToken->equalsLower("STRINGTABLE")) | ||
return parseStringTableResource(); | ||
|
||
// If it's not an unnamed resource, what we've just read is a name. Now, | ||
// read resource type; | ||
ASSIGN_OR_RETURN(TypeToken, readTypeOrName()); | ||
|
||
ParseType Result = std::unique_ptr<RCResource>(); | ||
(void)!Result; | ||
|
||
if (TypeToken->equalsLower("ICON")) | ||
Result = parseIconResource(); | ||
else | ||
return getExpectedError("resource type", /* IsAlreadyRead = */ true); | ||
|
||
if (Result) | ||
(*Result)->setName(*NameToken); | ||
|
||
return Result; | ||
} | ||
|
||
bool RCParser::isNextTokenKind(Kind TokenKind) const { | ||
return !isEof() && look().kind() == TokenKind; | ||
} | ||
|
||
const RCToken &RCParser::look() const { | ||
assert(!isEof()); | ||
return *CurLoc; | ||
} | ||
|
||
const RCToken &RCParser::read() { | ||
assert(!isEof()); | ||
return *CurLoc++; | ||
} | ||
|
||
void RCParser::consume() { | ||
assert(!isEof()); | ||
CurLoc++; | ||
} | ||
|
||
Expected<uint32_t> RCParser::readInt() { | ||
if (!isNextTokenKind(Kind::Int)) | ||
return getExpectedError("integer"); | ||
return read().intValue(); | ||
} | ||
|
||
Expected<StringRef> RCParser::readString() { | ||
if (!isNextTokenKind(Kind::String)) | ||
return getExpectedError("string"); | ||
return read().value(); | ||
} | ||
|
||
Expected<StringRef> RCParser::readIdentifier() { | ||
if (!isNextTokenKind(Kind::Identifier)) | ||
return getExpectedError("identifier"); | ||
return read().value(); | ||
} | ||
|
||
Expected<IntOrString> RCParser::readTypeOrName() { | ||
// We suggest that the correct resource name or type should be either an | ||
// identifier or an integer. The original RC tool is much more liberal. | ||
if (!isNextTokenKind(Kind::Identifier) && !isNextTokenKind(Kind::Int)) | ||
return getExpectedError("int or identifier"); | ||
|
||
const RCToken &Tok = read(); | ||
if (Tok.kind() == Kind::Int) | ||
return IntOrString(Tok.intValue()); | ||
else | ||
return IntOrString(Tok.value()); | ||
} | ||
|
||
Error RCParser::consumeType(Kind TokenKind) { | ||
if (isNextTokenKind(TokenKind)) { | ||
consume(); | ||
return Error::success(); | ||
} | ||
|
||
switch (TokenKind) { | ||
#define TOKEN(TokenName) \ | ||
case Kind::TokenName: \ | ||
return getExpectedError(#TokenName); | ||
#define SHORT_TOKEN(TokenName, TokenCh) \ | ||
case Kind::TokenName: \ | ||
return getExpectedError(#TokenCh); | ||
#include "ResourceScriptTokenList.h" | ||
#undef SHORT_TOKEN | ||
#undef TOKEN | ||
} | ||
|
||
llvm_unreachable("All case options exhausted."); | ||
} | ||
|
||
bool RCParser::consumeOptionalType(Kind TokenKind) { | ||
if (isNextTokenKind(TokenKind)) { | ||
consume(); | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
Expected<SmallVector<uint32_t, 8>> | ||
RCParser::readIntsWithCommas(size_t MinCount, size_t MaxCount) { | ||
assert(MinCount <= MaxCount); | ||
|
||
SmallVector<uint32_t, 8> Result; | ||
|
||
auto FailureHandler = | ||
[&](llvm::Error Err) -> Expected<SmallVector<uint32_t, 8>> { | ||
if (Result.size() < MinCount) | ||
return std::move(Err); | ||
consumeError(std::move(Err)); | ||
return Result; | ||
}; | ||
|
||
for (size_t i = 0; i < MaxCount; ++i) { | ||
// Try to read a comma unless we read the first token. | ||
// Sometimes RC tool requires them and sometimes not. We decide to | ||
// always require them. | ||
if (i >= 1) { | ||
if (auto CommaError = consumeType(Kind::Comma)) | ||
return FailureHandler(std::move(CommaError)); | ||
} | ||
|
||
if (auto IntResult = readInt()) | ||
Result.push_back(*IntResult); | ||
else | ||
return FailureHandler(IntResult.takeError()); | ||
} | ||
|
||
return std::move(Result); | ||
} | ||
|
||
// As for now, we ignore the extended set of statements. | ||
Expected<OptionalStmtList> RCParser::parseOptionalStatements(bool IsExtended) { | ||
OptionalStmtList Result; | ||
|
||
// The last statement is always followed by the start of the block. | ||
while (!isNextTokenKind(Kind::BlockBegin)) { | ||
ASSIGN_OR_RETURN(SingleParse, parseSingleOptionalStatement(IsExtended)); | ||
Result.addStmt(std::move(*SingleParse)); | ||
} | ||
|
||
return std::move(Result); | ||
} | ||
|
||
Expected<std::unique_ptr<OptionalStmt>> | ||
RCParser::parseSingleOptionalStatement(bool) { | ||
ASSIGN_OR_RETURN(TypeToken, readIdentifier()); | ||
if (TypeToken->equals_lower("CHARACTERISTICS")) | ||
return parseCharacteristicsStmt(); | ||
else if (TypeToken->equals_lower("LANGUAGE")) | ||
return parseLanguageStmt(); | ||
else if (TypeToken->equals_lower("VERSION")) | ||
return parseVersionStmt(); | ||
else | ||
return getExpectedError("optional statement type, BEGIN or '{'", | ||
/* IsAlreadyRead = */ true); | ||
} | ||
|
||
RCParser::ParseType RCParser::parseLanguageResource() { | ||
// Read LANGUAGE as an optional statement. If it's read correctly, we can | ||
// upcast it to RCResource. | ||
return parseLanguageStmt(); | ||
} | ||
|
||
RCParser::ParseType RCParser::parseIconResource() { | ||
ASSIGN_OR_RETURN(Arg, readString()); | ||
return make_unique<IconResource>(*Arg); | ||
} | ||
|
||
RCParser::ParseType RCParser::parseStringTableResource() { | ||
ASSIGN_OR_RETURN(OptStatements, parseOptionalStatements()); | ||
RETURN_IF_ERROR(consumeType(Kind::BlockBegin)); | ||
|
||
auto Table = make_unique<StringTableResource>(std::move(*OptStatements)); | ||
|
||
// Read strings until we reach the end of the block. | ||
while (!consumeOptionalType(Kind::BlockEnd)) { | ||
// Each definition consists of string's ID (an integer) and a string. | ||
// Some examples in documentation suggest that there might be a comma in | ||
// between, however we strictly adhere to the single statement definition. | ||
ASSIGN_OR_RETURN(IDResult, readInt()); | ||
ASSIGN_OR_RETURN(StrResult, readString()); | ||
Table->addString(*IDResult, *StrResult); | ||
} | ||
|
||
return std::move(Table); | ||
} | ||
|
||
RCParser::ParseOptionType RCParser::parseLanguageStmt() { | ||
ASSIGN_OR_RETURN(Args, readIntsWithCommas(/* min = */ 2, /* max = */ 2)); | ||
return make_unique<LanguageResource>((*Args)[0], (*Args)[1]); | ||
} | ||
|
||
RCParser::ParseOptionType RCParser::parseCharacteristicsStmt() { | ||
ASSIGN_OR_RETURN(Arg, readInt()); | ||
return make_unique<CharacteristicsStmt>(*Arg); | ||
} | ||
|
||
RCParser::ParseOptionType RCParser::parseVersionStmt() { | ||
ASSIGN_OR_RETURN(Arg, readInt()); | ||
return make_unique<VersionStmt>(*Arg); | ||
} | ||
|
||
Error RCParser::getExpectedError(const Twine Message, bool IsAlreadyRead) { | ||
return make_error<ParserError>( | ||
Message, IsAlreadyRead ? std::prev(CurLoc) : CurLoc, End); | ||
} | ||
|
||
} // namespace rc | ||
} // namespace llvm |
Oops, something went wrong.