Skip to content

Commit 5cd3d5c

Browse files
committed
Reapply: [llvm-rc] Add basic RC scripts parsing ability.
As for now, the parser supports a limited set of statements and resources. This will be extended in the following patches. Thanks to Nico Weber (thakis) for his original work in this area. This patch was originally submitted as r311175 and got reverted in r311177 because of the problems with compilation under gcc. Differential Revision: https://reviews.llvm.org/D36340 llvm-svn: 311184
1 parent 291d658 commit 5cd3d5c

18 files changed

+731
-1
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
meh IcOn "hello.bmp"
2+
Icon Icon "Icon"
3+
4+
LANGUAGE 5, 12
5+
6+
STRINGTABLE
7+
LANGUAGE 1, 1
8+
CHARACTERISTICS 500
9+
LANGUAGE 3, 4
10+
VERSION 14
11+
{
12+
1 "hello"
13+
2 "world"
14+
}
15+
STRINGTABLE BEGIN END
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
LANGUAGE
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
LANGUAGE 5 7
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
LANGUAGE 5,, 7
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
STRINGTABLE
2+
CHARACTERISTICS
3+
BEGIN
4+
100 "No integer after CHARACTERISTICS."
5+
END
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
& ICON "WeirdResourceName.ico"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
HELLO
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
HELLO WORLD
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
STRINGTABLE
2+
VERSION 8
3+
{
4+
1 "hello"
5+
2
6+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
STRINGTABLE
2+
NONSENSETYPE 12 34
3+
BEGIN
4+
END

llvm/test/tools/llvm-rc/parser.test

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; RUN: llvm-rc /V %p/Inputs/parser-correct-everything.rc | FileCheck %s --check-prefix PGOOD
2+
3+
; PGOOD: Icon (meh): "hello.bmp"
4+
; PGOOD-NEXT: Icon (Icon): "Icon"
5+
; PGOOD-NEXT: Language: 5, Sublanguage: 12
6+
; PGOOD-NEXT: StringTable:
7+
; PGOOD-NEXT: Option: Language: 1, Sublanguage: 1
8+
; PGOOD-NEXT: Option: Characteristics: 500
9+
; PGOOD-NEXT: Option: Language: 3, Sublanguage: 4
10+
; PGOOD-NEXT: Option: Version: 14
11+
; PGOOD-NEXT: 1 => "hello"
12+
; PGOOD-NEXT: 2 => "world"
13+
; PGOOD-NEXT: StringTable:
14+
15+
16+
; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-no-string.rc 2> %t2
17+
; RUN: FileCheck %s --check-prefix PSTRINGTABLE1 --input-file %t2
18+
19+
; PSTRINGTABLE1: llvm-rc: Error parsing file: expected string, got }
20+
21+
22+
; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-weird-option.rc 2> %t3
23+
; RUN: FileCheck %s --check-prefix PSTRINGTABLE2 --input-file %t3
24+
25+
; PSTRINGTABLE2: llvm-rc: Error parsing file: expected optional statement type, BEGIN or '{', got NONSENSETYPE
26+
27+
28+
; RUN: not llvm-rc /V %p/Inputs/parser-eof.rc 2> %t4
29+
; RUN: FileCheck %s --check-prefix PEOF --input-file %t4
30+
31+
; PEOF: llvm-rc: Error parsing file: expected integer, got <EOF>
32+
33+
34+
; RUN: not llvm-rc /V %p/Inputs/parser-no-characteristics-arg.rc 2> %t5
35+
; RUN: FileCheck %s --check-prefix PCHARACTERISTICS1 --input-file %t5
36+
37+
; PCHARACTERISTICS1: llvm-rc: Error parsing file: expected integer, got BEGIN
38+
39+
40+
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-token.rc 2> %t6
41+
; RUN: FileCheck %s --check-prefix PNONSENSE1 --input-file %t6
42+
43+
; PNONSENSE1: llvm-rc: Error parsing file: expected int or identifier, got &
44+
45+
46+
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type.rc 2> %t7
47+
; RUN: FileCheck %s --check-prefix PNONSENSE2 --input-file %t7
48+
49+
; PNONSENSE2: llvm-rc: Error parsing file: expected resource type, got WORLD
50+
51+
52+
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type-eof.rc 2> %t8
53+
; RUN: FileCheck %s --check-prefix PNONSENSE3 --input-file %t8
54+
55+
; PNONSENSE3: llvm-rc: Error parsing file: expected int or identifier, got <EOF>
56+
57+
58+
; RUN: not llvm-rc /V %p/Inputs/parser-language-no-comma.rc 2> %t9
59+
; RUN: FileCheck %s --check-prefix PLANGUAGE1 --input-file %t9
60+
61+
; PLANGUAGE1: llvm-rc: Error parsing file: expected ',', got 7
62+
63+
64+
; RUN: not llvm-rc /V %p/Inputs/parser-language-too-many-commas.rc 2> %t10
65+
; RUN: FileCheck %s --check-prefix PLANGUAGE2 --input-file %t10
66+
67+
; PLANGUAGE2: llvm-rc: Error parsing file: expected integer, got ,

llvm/test/tools/llvm-rc/tokenizer.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
; RUN: llvm-rc /V %p/Inputs/tokens.rc | FileCheck %s
1+
; RUN: not llvm-rc /V %p/Inputs/tokens.rc | FileCheck %s
2+
; llvm-rc fails now on this sample because it is an invalid resource file
3+
; script. We silence the error message and just analyze the output.
24

35
; CHECK: Int: 1; int value = 1
46
; CHECK-NEXT: Plus: +

llvm/tools/llvm-rc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,7 @@ add_public_tablegen_target(RcTableGen)
1010

1111
add_llvm_tool(llvm-rc
1212
llvm-rc.cpp
13+
ResourceScriptParser.cpp
14+
ResourceScriptStmt.cpp
1315
ResourceScriptToken.cpp
1416
)
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
//===-- ResourceScriptParser.cpp --------------------------------*- C++-*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===---------------------------------------------------------------------===//
9+
//
10+
// This implements the parser defined in ResourceScriptParser.h.
11+
//
12+
//===---------------------------------------------------------------------===//
13+
14+
#include "ResourceScriptParser.h"
15+
16+
// Take an expression returning llvm::Error and forward the error if it exists.
17+
#define RETURN_IF_ERROR(Expr) \
18+
if (auto Err = (Expr)) \
19+
return std::move(Err);
20+
21+
// Take an expression returning llvm::Expected<T> and assign it to Var or
22+
// forward the error out of the function.
23+
#define ASSIGN_OR_RETURN(Var, Expr) \
24+
auto Var = (Expr); \
25+
if (!Var) \
26+
return Var.takeError();
27+
28+
namespace llvm {
29+
namespace rc {
30+
31+
RCParser::ParserError::ParserError(const Twine Expected, const LocIter CurLoc,
32+
const LocIter End)
33+
: ErrorLoc(CurLoc), FileEnd(End) {
34+
CurMessage = "Error parsing file: expected " + Expected.str() + ", got " +
35+
(CurLoc == End ? "<EOF>" : CurLoc->value()).str();
36+
}
37+
38+
char RCParser::ParserError::ID = 0;
39+
40+
RCParser::RCParser(const std::vector<RCToken> &TokenList)
41+
: Tokens(TokenList), CurLoc(Tokens.begin()), End(Tokens.end()) {}
42+
43+
RCParser::RCParser(std::vector<RCToken> &&TokenList)
44+
: Tokens(std::move(TokenList)), CurLoc(Tokens.begin()), End(Tokens.end()) {}
45+
46+
bool RCParser::isEof() const { return CurLoc == End; }
47+
48+
RCParser::ParseType RCParser::parseSingleResource() {
49+
// The first thing we read is usually a resource's name. However, in some
50+
// cases (LANGUAGE and STRINGTABLE) the resources don't have their names
51+
// and the first token to be read is the type.
52+
ASSIGN_OR_RETURN(NameToken, readTypeOrName());
53+
54+
if (NameToken->equalsLower("LANGUAGE"))
55+
return parseLanguageResource();
56+
else if (NameToken->equalsLower("STRINGTABLE"))
57+
return parseStringTableResource();
58+
59+
// If it's not an unnamed resource, what we've just read is a name. Now,
60+
// read resource type;
61+
ASSIGN_OR_RETURN(TypeToken, readTypeOrName());
62+
63+
ParseType Result = std::unique_ptr<RCResource>();
64+
(void)!Result;
65+
66+
if (TypeToken->equalsLower("ICON"))
67+
Result = parseIconResource();
68+
else
69+
return getExpectedError("resource type", /* IsAlreadyRead = */ true);
70+
71+
if (Result)
72+
(*Result)->setName(*NameToken);
73+
74+
return Result;
75+
}
76+
77+
bool RCParser::isNextTokenKind(Kind TokenKind) const {
78+
return !isEof() && look().kind() == TokenKind;
79+
}
80+
81+
const RCToken &RCParser::look() const {
82+
assert(!isEof());
83+
return *CurLoc;
84+
}
85+
86+
const RCToken &RCParser::read() {
87+
assert(!isEof());
88+
return *CurLoc++;
89+
}
90+
91+
void RCParser::consume() {
92+
assert(!isEof());
93+
CurLoc++;
94+
}
95+
96+
Expected<uint32_t> RCParser::readInt() {
97+
if (!isNextTokenKind(Kind::Int))
98+
return getExpectedError("integer");
99+
return read().intValue();
100+
}
101+
102+
Expected<StringRef> RCParser::readString() {
103+
if (!isNextTokenKind(Kind::String))
104+
return getExpectedError("string");
105+
return read().value();
106+
}
107+
108+
Expected<StringRef> RCParser::readIdentifier() {
109+
if (!isNextTokenKind(Kind::Identifier))
110+
return getExpectedError("identifier");
111+
return read().value();
112+
}
113+
114+
Expected<IntOrString> RCParser::readTypeOrName() {
115+
// We suggest that the correct resource name or type should be either an
116+
// identifier or an integer. The original RC tool is much more liberal.
117+
if (!isNextTokenKind(Kind::Identifier) && !isNextTokenKind(Kind::Int))
118+
return getExpectedError("int or identifier");
119+
120+
const RCToken &Tok = read();
121+
if (Tok.kind() == Kind::Int)
122+
return IntOrString(Tok.intValue());
123+
else
124+
return IntOrString(Tok.value());
125+
}
126+
127+
Error RCParser::consumeType(Kind TokenKind) {
128+
if (isNextTokenKind(TokenKind)) {
129+
consume();
130+
return Error::success();
131+
}
132+
133+
switch (TokenKind) {
134+
#define TOKEN(TokenName) \
135+
case Kind::TokenName: \
136+
return getExpectedError(#TokenName);
137+
#define SHORT_TOKEN(TokenName, TokenCh) \
138+
case Kind::TokenName: \
139+
return getExpectedError(#TokenCh);
140+
#include "ResourceScriptTokenList.h"
141+
#undef SHORT_TOKEN
142+
#undef TOKEN
143+
}
144+
145+
llvm_unreachable("All case options exhausted.");
146+
}
147+
148+
bool RCParser::consumeOptionalType(Kind TokenKind) {
149+
if (isNextTokenKind(TokenKind)) {
150+
consume();
151+
return true;
152+
}
153+
154+
return false;
155+
}
156+
157+
Expected<SmallVector<uint32_t, 8>>
158+
RCParser::readIntsWithCommas(size_t MinCount, size_t MaxCount) {
159+
assert(MinCount <= MaxCount);
160+
161+
SmallVector<uint32_t, 8> Result;
162+
163+
auto FailureHandler =
164+
[&](llvm::Error Err) -> Expected<SmallVector<uint32_t, 8>> {
165+
if (Result.size() < MinCount)
166+
return std::move(Err);
167+
consumeError(std::move(Err));
168+
return Result;
169+
};
170+
171+
for (size_t i = 0; i < MaxCount; ++i) {
172+
// Try to read a comma unless we read the first token.
173+
// Sometimes RC tool requires them and sometimes not. We decide to
174+
// always require them.
175+
if (i >= 1) {
176+
if (auto CommaError = consumeType(Kind::Comma))
177+
return FailureHandler(std::move(CommaError));
178+
}
179+
180+
if (auto IntResult = readInt())
181+
Result.push_back(*IntResult);
182+
else
183+
return FailureHandler(IntResult.takeError());
184+
}
185+
186+
return std::move(Result);
187+
}
188+
189+
// As for now, we ignore the extended set of statements.
190+
Expected<OptionalStmtList> RCParser::parseOptionalStatements(bool IsExtended) {
191+
OptionalStmtList Result;
192+
193+
// The last statement is always followed by the start of the block.
194+
while (!isNextTokenKind(Kind::BlockBegin)) {
195+
ASSIGN_OR_RETURN(SingleParse, parseSingleOptionalStatement(IsExtended));
196+
Result.addStmt(std::move(*SingleParse));
197+
}
198+
199+
return std::move(Result);
200+
}
201+
202+
Expected<std::unique_ptr<OptionalStmt>>
203+
RCParser::parseSingleOptionalStatement(bool) {
204+
ASSIGN_OR_RETURN(TypeToken, readIdentifier());
205+
if (TypeToken->equals_lower("CHARACTERISTICS"))
206+
return parseCharacteristicsStmt();
207+
else if (TypeToken->equals_lower("LANGUAGE"))
208+
return parseLanguageStmt();
209+
else if (TypeToken->equals_lower("VERSION"))
210+
return parseVersionStmt();
211+
else
212+
return getExpectedError("optional statement type, BEGIN or '{'",
213+
/* IsAlreadyRead = */ true);
214+
}
215+
216+
RCParser::ParseType RCParser::parseLanguageResource() {
217+
// Read LANGUAGE as an optional statement. If it's read correctly, we can
218+
// upcast it to RCResource.
219+
return parseLanguageStmt();
220+
}
221+
222+
RCParser::ParseType RCParser::parseIconResource() {
223+
ASSIGN_OR_RETURN(Arg, readString());
224+
return make_unique<IconResource>(*Arg);
225+
}
226+
227+
RCParser::ParseType RCParser::parseStringTableResource() {
228+
ASSIGN_OR_RETURN(OptStatements, parseOptionalStatements());
229+
RETURN_IF_ERROR(consumeType(Kind::BlockBegin));
230+
231+
auto Table = make_unique<StringTableResource>(std::move(*OptStatements));
232+
233+
// Read strings until we reach the end of the block.
234+
while (!consumeOptionalType(Kind::BlockEnd)) {
235+
// Each definition consists of string's ID (an integer) and a string.
236+
// Some examples in documentation suggest that there might be a comma in
237+
// between, however we strictly adhere to the single statement definition.
238+
ASSIGN_OR_RETURN(IDResult, readInt());
239+
ASSIGN_OR_RETURN(StrResult, readString());
240+
Table->addString(*IDResult, *StrResult);
241+
}
242+
243+
return std::move(Table);
244+
}
245+
246+
RCParser::ParseOptionType RCParser::parseLanguageStmt() {
247+
ASSIGN_OR_RETURN(Args, readIntsWithCommas(/* min = */ 2, /* max = */ 2));
248+
return make_unique<LanguageResource>((*Args)[0], (*Args)[1]);
249+
}
250+
251+
RCParser::ParseOptionType RCParser::parseCharacteristicsStmt() {
252+
ASSIGN_OR_RETURN(Arg, readInt());
253+
return make_unique<CharacteristicsStmt>(*Arg);
254+
}
255+
256+
RCParser::ParseOptionType RCParser::parseVersionStmt() {
257+
ASSIGN_OR_RETURN(Arg, readInt());
258+
return make_unique<VersionStmt>(*Arg);
259+
}
260+
261+
Error RCParser::getExpectedError(const Twine Message, bool IsAlreadyRead) {
262+
return make_error<ParserError>(
263+
Message, IsAlreadyRead ? std::prev(CurLoc) : CurLoc, End);
264+
}
265+
266+
} // namespace rc
267+
} // namespace llvm

0 commit comments

Comments
 (0)