-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[flang] Tokenize all -D macro bodies, and do it better #168116
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-flang-parser Author: Peter Klausler (klausler) ChangesThe compiler presently tokenizes the bodies of only function-like macro definitions from the command line, and does so crudely. Tokenize keyword-like macros too, get character literals right, and handle numeric constants correctly. (Also delete two needless functions noticed in characters.h.) Fixes #168077. Full diff: https://github.com/llvm/llvm-project/pull/168116.diff 4 Files Affected:
diff --git a/flang/include/flang/Parser/characters.h b/flang/include/flang/Parser/characters.h
index dbdc058c44995..3761700ad348c 100644
--- a/flang/include/flang/Parser/characters.h
+++ b/flang/include/flang/Parser/characters.h
@@ -69,10 +69,6 @@ inline constexpr char ToLowerCaseLetter(char ch) {
return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
}
-inline constexpr char ToLowerCaseLetter(char &&ch) {
- return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
-}
-
inline std::string ToLowerCaseLetters(std::string_view str) {
std::string lowered{str};
for (char &ch : lowered) {
@@ -85,10 +81,6 @@ inline constexpr char ToUpperCaseLetter(char ch) {
return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
}
-inline constexpr char ToUpperCaseLetter(char &&ch) {
- return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
-}
-
inline std::string ToUpperCaseLetters(std::string_view str) {
std::string raised{str};
for (char &ch : raised) {
diff --git a/flang/include/flang/Parser/preprocessor.h b/flang/include/flang/Parser/preprocessor.h
index bb13b4463fa80..0405d42e64f7b 100644
--- a/flang/include/flang/Parser/preprocessor.h
+++ b/flang/include/flang/Parser/preprocessor.h
@@ -38,6 +38,7 @@ class Definition {
Definition(const std::vector<std::string> &argNames, const TokenSequence &,
std::size_t firstToken, std::size_t tokens, bool isVariadic = false);
Definition(const std::string &predefined, AllSources &);
+ Definition(const TokenSequence &predefined);
bool isFunctionLike() const { return isFunctionLike_; }
std::size_t argumentCount() const { return argNames_.size(); }
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 9176b4db3408a..2ba419f055cc4 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -43,6 +43,9 @@ Definition::Definition(const std::string &predefined, AllSources &sources)
replacement_{
predefined, sources.AddCompilerInsertion(predefined).start()} {}
+Definition::Definition(const TokenSequence &repl)
+ : isPredefined_{true}, replacement_{repl} {}
+
bool Definition::set_isDisabled(bool disable) {
bool was{isDisabled_};
isDisabled_ = disable;
@@ -371,40 +374,66 @@ TokenSequence Preprocessor::TokenizeMacroBody(const std::string &str) {
Provenance provenance{allSources_.AddCompilerInsertion(str).start()};
auto end{str.size()};
for (std::string::size_type at{0}; at < end;) {
- // Alternate between tokens that are identifiers (and therefore subject
- // to argument replacement) and those that are not.
- auto start{str.find_first_of(idChars, at)};
- if (start == str.npos) {
- tokens.Put(str.substr(at), provenance + at);
- break;
- } else if (start > at) {
- tokens.Put(str.substr(at, start - at), provenance + at);
+ char ch{str.at(at)};
+ if (IsWhiteSpace(ch)) {
+ ++at;
+ continue;
}
- at = str.find_first_not_of(idChars, start + 1);
- if (at == str.npos) {
+ auto start{at};
+ if (IsLegalIdentifierStart(ch)) {
+ for (++at; at < end && IsLegalInIdentifier(str.at(at)); ++at) {
+ }
+ } else if (IsDecimalDigit(ch) || ch == '.') {
+ for (++at; at < end; ++at) {
+ ch = str.at(at);
+ if (!IsDecimalDigit(ch) && ch != '.') {
+ break;
+ }
+ }
+ if (at < end) {
+ ch = ToUpperCaseLetter(str.at(at));
+ if (ch == 'E' || ch == 'D' || ch == 'Q') {
+ if (++at < end) {
+ ch = str.at(at);
+ if (ch == '+' || ch == '-') {
+ ++at;
+ }
+ for (; at < end && IsDecimalDigit(str.at(at)); ++at) {
+ }
+ }
+ }
+ }
+ } else if (ch == '\'' || ch == '"') {
+ for (++at; at < end && str.at(at) != ch; ++at) {
+ }
+ if (at < end) {
+ ++at;
+ }
+ } else {
+ ++at; // single-character token
+ }
+ if (at >= end || at == str.npos) {
tokens.Put(str.substr(start), provenance + start);
break;
- } else {
- tokens.Put(str.substr(start, at - start), provenance + start);
}
+ tokens.Put(str.substr(start, at - start), provenance + start);
}
return tokens;
}
void Preprocessor::Define(const std::string ¯o, const std::string &value) {
+ auto rhs{TokenizeMacroBody(value)};
if (auto lhs{TokenizeMacroNameAndArgs(macro)}) {
// function-like macro
CharBlock macroName{SaveTokenAsName(lhs->front())};
auto iter{lhs->begin()};
++iter;
std::vector<std::string> argNames{iter, lhs->end()};
- auto rhs{TokenizeMacroBody(value)};
definitions_.emplace(std::make_pair(macroName,
Definition{
argNames, rhs, 0, rhs.SizeInTokens(), /*isVariadic=*/false}));
} else { // keyword macro
- definitions_.emplace(
- SaveTokenAsName(macro), Definition{value, allSources_});
+ definitions_.emplace(SaveTokenAsName(macro), Definition{rhs});
}
}
diff --git a/flang/test/Preprocessing/bug168077.F90 b/flang/test/Preprocessing/bug168077.F90
new file mode 100644
index 0000000000000..d6c1bb0fa0010
--- /dev/null
+++ b/flang/test/Preprocessing/bug168077.F90
@@ -0,0 +1,6 @@
+!RUN: %flang -E -DNVAR=2+1+0+0 %s 2>&1 | FileCheck %s
+!CHECK: pass
+#if NVAR > 2
+call pass
+#endif
+end
|
|
LGTM, Thanks ! |
akuhlens
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
flang/lib/Parser/preprocessor.cpp
Outdated
| } | ||
|
|
||
| void Preprocessor::Define(const std::string ¯o, const std::string &value) { | ||
| auto rhs{TokenizeMacroBody(value)}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should you put an explicit type here since it isn't one of the forms that that explicitly mentions the type? Looking at the rest of the code here, it heavily uses auto without type, so I could see the argument for consistency with existing code.
flang/lib/Parser/preprocessor.cpp
Outdated
| } | ||
| at = str.find_first_not_of(idChars, start + 1); | ||
| if (at == str.npos) { | ||
| auto start{at}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should you use an explicit type here?
The compiler presently tokenizes the bodies of only function-like macro definitions from the command line, and does so crudely. Tokenize keyword-like macros too, get character literals right, and handle numeric constants correctly. (Also delete two needless functions noticed in characters.h.) Fixes llvm#168077.
🐧 Linux x64 Test Results
|
The compiler presently tokenizes the bodies of only function-like macro definitions from the command line, and does so crudely. Tokenize keyword-like macros too, get character literals right, and handle numeric constants correctly. (Also delete two needless functions noticed in characters.h.)
Fixes #168077.