-
Notifications
You must be signed in to change notification settings - Fork 11.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang] Accept directive sentinels in macro-replaced source better #70699
Conversation
@llvm/pr-subscribers-flang-parser Author: Peter Klausler (klausler) ChangesAt present, the prescanner emits an error if a source line or compiler directive, after macro replacement or not, contains a token with a non-Fortran character. In the particular case of the '!' character, the code that checks for bad character will accept the '!' if it appears after a ';', since the '!' might begin a compiler directive. This current implementation fails when a compiler directive appears after some other character that might (by means of further source processing not visible to the prescanner) be replaced with a ';' or newline. Extend the bad character check for '!' to actually check for a compiler directive sentinel instead. Full diff: https://github.com/llvm/llvm-project/pull/70699.diff 5 Files Affected:
diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 2f25b02bf7a323d..c9da6534ed88ba8 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -205,7 +205,7 @@ void Prescanner::Statement() {
Say(preprocessed->GetProvenanceRange(),
"Preprocessed line resembles a preprocessor directive"_warn_en_US);
preprocessed->ToLowerCase()
- .CheckBadFortranCharacters(messages_)
+ .CheckBadFortranCharacters(messages_, *this)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
@@ -217,7 +217,7 @@ void Prescanner::Statement() {
preprocessed->ToLowerCase();
SourceFormChange(preprocessed->ToString());
preprocessed->ClipComment(*this, true /* skip first ! */)
- .CheckBadFortranCharacters(messages_)
+ .CheckBadFortranCharacters(messages_, *this)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
@@ -233,7 +233,7 @@ void Prescanner::Statement() {
}
preprocessed->ToLowerCase()
.ClipComment(*this)
- .CheckBadFortranCharacters(messages_)
+ .CheckBadFortranCharacters(messages_, *this)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
@@ -246,7 +246,7 @@ void Prescanner::Statement() {
if (inFixedForm_ && line.kind == LineClassification::Kind::Source) {
EnforceStupidEndStatementRules(tokens);
}
- tokens.CheckBadFortranCharacters(messages_)
+ tokens.CheckBadFortranCharacters(messages_, *this)
.CheckBadParentheses(messages_)
.Emit(cooked_);
}
@@ -1266,6 +1266,21 @@ const char *Prescanner::IsCompilerDirectiveSentinel(
return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
}
+const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const {
+ const char *p{token.begin()};
+ const char *end{p + token.size()};
+ while (p < end && (*p == ' ' || *p == '\n')) {
+ ++p;
+ }
+ if (p < end && *p == '!') {
+ ++p;
+ }
+ while (end > p && (end[-1] == ' ' || end[-1] == '\t')) {
+ --end;
+ }
+ return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr;
+}
+
constexpr bool IsDirective(const char *match, const char *dir) {
for (; *match; ++match) {
if (*match != ToLowerCaseLetter(*dir++)) {
diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index 021632657a98c13..276fa19a4b1c64c 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -68,7 +68,9 @@ class Prescanner {
bool IsNextLinePreprocessorDirective() const;
TokenSequence TokenizePreprocessorDirective();
Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
+
const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
+ const char *IsCompilerDirectiveSentinel(CharBlock) const;
template <typename... A> Message &Say(A &&...a) {
return messages_.Say(std::forward<A>(a)...);
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index 139d2e1ba811d63..c5a630c471d16ea 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -343,16 +343,23 @@ ProvenanceRange TokenSequence::GetProvenanceRange() const {
}
const TokenSequence &TokenSequence::CheckBadFortranCharacters(
- Messages &messages) const {
+ Messages &messages, const Prescanner &prescanner) const {
std::size_t tokens{SizeInTokens()};
- bool isBangOk{true};
for (std::size_t j{0}; j < tokens; ++j) {
CharBlock token{TokenAt(j)};
char ch{token.FirstNonBlank()};
if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
- if (ch == '!' && isBangOk) {
- // allow in !dir$
- } else if (ch < ' ' || ch >= '\x7f') {
+ if (ch == '!') {
+ if (prescanner.IsCompilerDirectiveSentinel(token)) {
+ continue;
+ } else if (j + 1 < tokens &&
+ prescanner.IsCompilerDirectiveSentinel(
+ TokenAt(j + 1))) { // !dir$, &c.
+ ++j;
+ continue;
+ }
+ }
+ if (ch < ' ' || ch >= '\x7f') {
messages.Say(GetTokenProvenanceRange(j),
"bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
} else {
@@ -360,11 +367,6 @@ const TokenSequence &TokenSequence::CheckBadFortranCharacters(
"bad character ('%c') in Fortran token"_err_en_US, ch);
}
}
- if (ch == ';') {
- isBangOk = true;
- } else if (ch != ' ') {
- isBangOk = false;
- }
}
return *this;
}
diff --git a/flang/lib/Parser/token-sequence.h b/flang/lib/Parser/token-sequence.h
index 6b9e1f87ee01609..3df403d41e636f9 100644
--- a/flang/lib/Parser/token-sequence.h
+++ b/flang/lib/Parser/token-sequence.h
@@ -123,7 +123,8 @@ class TokenSequence {
TokenSequence &RemoveBlanks(std::size_t firstChar = 0);
TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0);
TokenSequence &ClipComment(const Prescanner &, bool skipFirst = false);
- const TokenSequence &CheckBadFortranCharacters(Messages &) const;
+ const TokenSequence &CheckBadFortranCharacters(
+ Messages &, const Prescanner &) const;
const TokenSequence &CheckBadParentheses(Messages &) const;
void Emit(CookedSource &) const;
llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
diff --git a/flang/test/Preprocessing/preprocessed-dirs.F90 b/flang/test/Preprocessing/preprocessed-dirs.F90
new file mode 100644
index 000000000000000..8ac769fdfb61da9
--- /dev/null
+++ b/flang/test/Preprocessing/preprocessed-dirs.F90
@@ -0,0 +1,8 @@
+! RUN: %flang -fc1 -E -fopenacc %s 2>&1 | FileCheck %s
+!CHECK: subroutine r4(x) Z real :: x Z !$acc routine Z print *, x Z end
+#define SUB(s, t) subroutine s(x) Z\
+ t :: x Z\
+ !$acc routine Z\
+ print *, x Z\
+ end subroutine s
+SUB(r4, real)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
At present, the prescanner emits an error if a source line or compiler directive, after macro replacement or not, contains a token with a non-Fortran character. In the particular case of the '!' character, the code that checks for bad character will accept the '!' if it appears after a ';', since the '!' might begin a compiler directive. This current implementation fails when a compiler directive appears after some other character that might (by means of further source processing not visible to the prescanner) be replaced with a ';' or newline. Extend the bad character check for '!' to actually check for a compiler directive sentinel instead.
At present, the prescanner emits an error if a source line or compiler directive, after macro replacement or not, contains a token with a non-Fortran character. In the particular case of the '!' character, the code that checks for bad character will accept the '!' if it appears after a ';', since the '!' might begin a compiler directive.
This current implementation fails when a compiler directive appears after some other character that might (by means of further source processing not visible to the prescanner) be replaced with a ';' or newline.
Extend the bad character check for '!' to actually check for a compiler directive sentinel instead.