Skip to content

Commit

Permalink
Reapply: [llvm-rc] Add basic RC scripts parsing ability.
Browse files Browse the repository at this point in the history
As for now, the parser supports a limited set of statements and
resources. This will be extended in the following patches.

Thanks to Nico Weber (thakis) for his original work in this area.

This patch was originally submitted as r311175 and got reverted
in r311177 because of the problems with compilation under gcc.

Differential Revision: https://reviews.llvm.org/D36340

llvm-svn: 311184
  • Loading branch information
mnbvmar committed Aug 18, 2017
1 parent 291d658 commit 5cd3d5c
Show file tree
Hide file tree
Showing 18 changed files with 731 additions and 1 deletion.
15 changes: 15 additions & 0 deletions llvm/test/tools/llvm-rc/Inputs/parser-correct-everything.rc
@@ -0,0 +1,15 @@
meh IcOn "hello.bmp"
Icon Icon "Icon"

LANGUAGE 5, 12

STRINGTABLE
LANGUAGE 1, 1
CHARACTERISTICS 500
LANGUAGE 3, 4
VERSION 14
{
1 "hello"
2 "world"
}
STRINGTABLE BEGIN END
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-rc/Inputs/parser-eof.rc
@@ -0,0 +1 @@
LANGUAGE
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-rc/Inputs/parser-language-no-comma.rc
@@ -0,0 +1 @@
LANGUAGE 5 7
@@ -0,0 +1 @@
LANGUAGE 5,, 7
@@ -0,0 +1,5 @@
STRINGTABLE
CHARACTERISTICS
BEGIN
100 "No integer after CHARACTERISTICS."
END
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-rc/Inputs/parser-nonsense-token.rc
@@ -0,0 +1 @@
& ICON "WeirdResourceName.ico"
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-rc/Inputs/parser-nonsense-type-eof.rc
@@ -0,0 +1 @@
HELLO
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-rc/Inputs/parser-nonsense-type.rc
@@ -0,0 +1 @@
HELLO WORLD
@@ -0,0 +1,6 @@
STRINGTABLE
VERSION 8
{
1 "hello"
2
}
@@ -0,0 +1,4 @@
STRINGTABLE
NONSENSETYPE 12 34
BEGIN
END
67 changes: 67 additions & 0 deletions llvm/test/tools/llvm-rc/parser.test
@@ -0,0 +1,67 @@
; RUN: llvm-rc /V %p/Inputs/parser-correct-everything.rc | FileCheck %s --check-prefix PGOOD

; PGOOD: Icon (meh): "hello.bmp"
; PGOOD-NEXT: Icon (Icon): "Icon"
; PGOOD-NEXT: Language: 5, Sublanguage: 12
; PGOOD-NEXT: StringTable:
; PGOOD-NEXT: Option: Language: 1, Sublanguage: 1
; PGOOD-NEXT: Option: Characteristics: 500
; PGOOD-NEXT: Option: Language: 3, Sublanguage: 4
; PGOOD-NEXT: Option: Version: 14
; PGOOD-NEXT: 1 => "hello"
; PGOOD-NEXT: 2 => "world"
; PGOOD-NEXT: StringTable:


; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-no-string.rc 2> %t2
; RUN: FileCheck %s --check-prefix PSTRINGTABLE1 --input-file %t2

; PSTRINGTABLE1: llvm-rc: Error parsing file: expected string, got }


; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-weird-option.rc 2> %t3
; RUN: FileCheck %s --check-prefix PSTRINGTABLE2 --input-file %t3

; PSTRINGTABLE2: llvm-rc: Error parsing file: expected optional statement type, BEGIN or '{', got NONSENSETYPE


; RUN: not llvm-rc /V %p/Inputs/parser-eof.rc 2> %t4
; RUN: FileCheck %s --check-prefix PEOF --input-file %t4

; PEOF: llvm-rc: Error parsing file: expected integer, got <EOF>


; RUN: not llvm-rc /V %p/Inputs/parser-no-characteristics-arg.rc 2> %t5
; RUN: FileCheck %s --check-prefix PCHARACTERISTICS1 --input-file %t5

; PCHARACTERISTICS1: llvm-rc: Error parsing file: expected integer, got BEGIN


; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-token.rc 2> %t6
; RUN: FileCheck %s --check-prefix PNONSENSE1 --input-file %t6

; PNONSENSE1: llvm-rc: Error parsing file: expected int or identifier, got &


; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type.rc 2> %t7
; RUN: FileCheck %s --check-prefix PNONSENSE2 --input-file %t7

; PNONSENSE2: llvm-rc: Error parsing file: expected resource type, got WORLD


; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type-eof.rc 2> %t8
; RUN: FileCheck %s --check-prefix PNONSENSE3 --input-file %t8

; PNONSENSE3: llvm-rc: Error parsing file: expected int or identifier, got <EOF>


; RUN: not llvm-rc /V %p/Inputs/parser-language-no-comma.rc 2> %t9
; RUN: FileCheck %s --check-prefix PLANGUAGE1 --input-file %t9

; PLANGUAGE1: llvm-rc: Error parsing file: expected ',', got 7


; RUN: not llvm-rc /V %p/Inputs/parser-language-too-many-commas.rc 2> %t10
; RUN: FileCheck %s --check-prefix PLANGUAGE2 --input-file %t10

; PLANGUAGE2: llvm-rc: Error parsing file: expected integer, got ,
4 changes: 3 additions & 1 deletion llvm/test/tools/llvm-rc/tokenizer.test
@@ -1,4 +1,6 @@
; RUN: llvm-rc /V %p/Inputs/tokens.rc | FileCheck %s
; RUN: not llvm-rc /V %p/Inputs/tokens.rc | FileCheck %s
; llvm-rc fails now on this sample because it is an invalid resource file
; script. We silence the error message and just analyze the output.

; CHECK: Int: 1; int value = 1
; CHECK-NEXT: Plus: +
Expand Down
2 changes: 2 additions & 0 deletions llvm/tools/llvm-rc/CMakeLists.txt
Expand Up @@ -10,5 +10,7 @@ add_public_tablegen_target(RcTableGen)

add_llvm_tool(llvm-rc
llvm-rc.cpp
ResourceScriptParser.cpp
ResourceScriptStmt.cpp
ResourceScriptToken.cpp
)
267 changes: 267 additions & 0 deletions llvm/tools/llvm-rc/ResourceScriptParser.cpp
@@ -0,0 +1,267 @@
//===-- ResourceScriptParser.cpp --------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// This implements the parser defined in ResourceScriptParser.h.
//
//===---------------------------------------------------------------------===//

#include "ResourceScriptParser.h"

// Take an expression returning llvm::Error and forward the error if it exists.
#define RETURN_IF_ERROR(Expr) \
if (auto Err = (Expr)) \
return std::move(Err);

// Take an expression returning llvm::Expected<T> and assign it to Var or
// forward the error out of the function.
#define ASSIGN_OR_RETURN(Var, Expr) \
auto Var = (Expr); \
if (!Var) \
return Var.takeError();

namespace llvm {
namespace rc {

RCParser::ParserError::ParserError(const Twine Expected, const LocIter CurLoc,
const LocIter End)
: ErrorLoc(CurLoc), FileEnd(End) {
CurMessage = "Error parsing file: expected " + Expected.str() + ", got " +
(CurLoc == End ? "<EOF>" : CurLoc->value()).str();
}

char RCParser::ParserError::ID = 0;

RCParser::RCParser(const std::vector<RCToken> &TokenList)
: Tokens(TokenList), CurLoc(Tokens.begin()), End(Tokens.end()) {}

RCParser::RCParser(std::vector<RCToken> &&TokenList)
: Tokens(std::move(TokenList)), CurLoc(Tokens.begin()), End(Tokens.end()) {}

bool RCParser::isEof() const { return CurLoc == End; }

RCParser::ParseType RCParser::parseSingleResource() {
// The first thing we read is usually a resource's name. However, in some
// cases (LANGUAGE and STRINGTABLE) the resources don't have their names
// and the first token to be read is the type.
ASSIGN_OR_RETURN(NameToken, readTypeOrName());

if (NameToken->equalsLower("LANGUAGE"))
return parseLanguageResource();
else if (NameToken->equalsLower("STRINGTABLE"))
return parseStringTableResource();

// If it's not an unnamed resource, what we've just read is a name. Now,
// read resource type;
ASSIGN_OR_RETURN(TypeToken, readTypeOrName());

ParseType Result = std::unique_ptr<RCResource>();
(void)!Result;

if (TypeToken->equalsLower("ICON"))
Result = parseIconResource();
else
return getExpectedError("resource type", /* IsAlreadyRead = */ true);

if (Result)
(*Result)->setName(*NameToken);

return Result;
}

bool RCParser::isNextTokenKind(Kind TokenKind) const {
return !isEof() && look().kind() == TokenKind;
}

const RCToken &RCParser::look() const {
assert(!isEof());
return *CurLoc;
}

const RCToken &RCParser::read() {
assert(!isEof());
return *CurLoc++;
}

void RCParser::consume() {
assert(!isEof());
CurLoc++;
}

Expected<uint32_t> RCParser::readInt() {
if (!isNextTokenKind(Kind::Int))
return getExpectedError("integer");
return read().intValue();
}

Expected<StringRef> RCParser::readString() {
if (!isNextTokenKind(Kind::String))
return getExpectedError("string");
return read().value();
}

Expected<StringRef> RCParser::readIdentifier() {
if (!isNextTokenKind(Kind::Identifier))
return getExpectedError("identifier");
return read().value();
}

Expected<IntOrString> RCParser::readTypeOrName() {
// We suggest that the correct resource name or type should be either an
// identifier or an integer. The original RC tool is much more liberal.
if (!isNextTokenKind(Kind::Identifier) && !isNextTokenKind(Kind::Int))
return getExpectedError("int or identifier");

const RCToken &Tok = read();
if (Tok.kind() == Kind::Int)
return IntOrString(Tok.intValue());
else
return IntOrString(Tok.value());
}

Error RCParser::consumeType(Kind TokenKind) {
if (isNextTokenKind(TokenKind)) {
consume();
return Error::success();
}

switch (TokenKind) {
#define TOKEN(TokenName) \
case Kind::TokenName: \
return getExpectedError(#TokenName);
#define SHORT_TOKEN(TokenName, TokenCh) \
case Kind::TokenName: \
return getExpectedError(#TokenCh);
#include "ResourceScriptTokenList.h"
#undef SHORT_TOKEN
#undef TOKEN
}

llvm_unreachable("All case options exhausted.");
}

bool RCParser::consumeOptionalType(Kind TokenKind) {
if (isNextTokenKind(TokenKind)) {
consume();
return true;
}

return false;
}

Expected<SmallVector<uint32_t, 8>>
RCParser::readIntsWithCommas(size_t MinCount, size_t MaxCount) {
assert(MinCount <= MaxCount);

SmallVector<uint32_t, 8> Result;

auto FailureHandler =
[&](llvm::Error Err) -> Expected<SmallVector<uint32_t, 8>> {
if (Result.size() < MinCount)
return std::move(Err);
consumeError(std::move(Err));
return Result;
};

for (size_t i = 0; i < MaxCount; ++i) {
// Try to read a comma unless we read the first token.
// Sometimes RC tool requires them and sometimes not. We decide to
// always require them.
if (i >= 1) {
if (auto CommaError = consumeType(Kind::Comma))
return FailureHandler(std::move(CommaError));
}

if (auto IntResult = readInt())
Result.push_back(*IntResult);
else
return FailureHandler(IntResult.takeError());
}

return std::move(Result);
}

// As for now, we ignore the extended set of statements.
Expected<OptionalStmtList> RCParser::parseOptionalStatements(bool IsExtended) {
OptionalStmtList Result;

// The last statement is always followed by the start of the block.
while (!isNextTokenKind(Kind::BlockBegin)) {
ASSIGN_OR_RETURN(SingleParse, parseSingleOptionalStatement(IsExtended));
Result.addStmt(std::move(*SingleParse));
}

return std::move(Result);
}

Expected<std::unique_ptr<OptionalStmt>>
RCParser::parseSingleOptionalStatement(bool) {
ASSIGN_OR_RETURN(TypeToken, readIdentifier());
if (TypeToken->equals_lower("CHARACTERISTICS"))
return parseCharacteristicsStmt();
else if (TypeToken->equals_lower("LANGUAGE"))
return parseLanguageStmt();
else if (TypeToken->equals_lower("VERSION"))
return parseVersionStmt();
else
return getExpectedError("optional statement type, BEGIN or '{'",
/* IsAlreadyRead = */ true);
}

RCParser::ParseType RCParser::parseLanguageResource() {
// Read LANGUAGE as an optional statement. If it's read correctly, we can
// upcast it to RCResource.
return parseLanguageStmt();
}

RCParser::ParseType RCParser::parseIconResource() {
ASSIGN_OR_RETURN(Arg, readString());
return make_unique<IconResource>(*Arg);
}

RCParser::ParseType RCParser::parseStringTableResource() {
ASSIGN_OR_RETURN(OptStatements, parseOptionalStatements());
RETURN_IF_ERROR(consumeType(Kind::BlockBegin));

auto Table = make_unique<StringTableResource>(std::move(*OptStatements));

// Read strings until we reach the end of the block.
while (!consumeOptionalType(Kind::BlockEnd)) {
// Each definition consists of string's ID (an integer) and a string.
// Some examples in documentation suggest that there might be a comma in
// between, however we strictly adhere to the single statement definition.
ASSIGN_OR_RETURN(IDResult, readInt());
ASSIGN_OR_RETURN(StrResult, readString());
Table->addString(*IDResult, *StrResult);
}

return std::move(Table);
}

RCParser::ParseOptionType RCParser::parseLanguageStmt() {
ASSIGN_OR_RETURN(Args, readIntsWithCommas(/* min = */ 2, /* max = */ 2));
return make_unique<LanguageResource>((*Args)[0], (*Args)[1]);
}

RCParser::ParseOptionType RCParser::parseCharacteristicsStmt() {
ASSIGN_OR_RETURN(Arg, readInt());
return make_unique<CharacteristicsStmt>(*Arg);
}

RCParser::ParseOptionType RCParser::parseVersionStmt() {
ASSIGN_OR_RETURN(Arg, readInt());
return make_unique<VersionStmt>(*Arg);
}

Error RCParser::getExpectedError(const Twine Message, bool IsAlreadyRead) {
return make_error<ParserError>(
Message, IsAlreadyRead ? std::prev(CurLoc) : CurLoc, End);
}

} // namespace rc
} // namespace llvm

0 comments on commit 5cd3d5c

Please sign in to comment.