-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang-format] Add an fnmatch-like function for .clang-format-ignore #76021
Conversation
@llvm/pr-subscribers-clang-format Author: Owen Pan (owenca) ChangesThis is needed because Windows doesn't have anything equivalent to the POSIX fnmatch() function. Full diff: https://github.com/llvm/llvm-project/pull/76021.diff 5 Files Affected:
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 015ec7c0cc84e3..84a3c136f650a8 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_library(clangFormat
IntegerLiteralSeparatorFixer.cpp
MacroCallReconstructor.cpp
MacroExpander.cpp
+ MatchFilePath.cpp
NamespaceEndCommentsFixer.cpp
ObjCPropertyAttributeOrderFixer.cpp
QualifierAlignmentFixer.cpp
diff --git a/clang/lib/Format/MatchFilePath.cpp b/clang/lib/Format/MatchFilePath.cpp
new file mode 100644
index 00000000000000..203a900e3d3bdd
--- /dev/null
+++ b/clang/lib/Format/MatchFilePath.cpp
@@ -0,0 +1,112 @@
+//===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the functionality of matching a file path name to
+/// a pattern, similar to the POSIX fnmatch() function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MatchFilePath.h"
+
+using namespace llvm;
+
+namespace clang {
+namespace format {
+
+// Check whether `FilePath` matches `Pattern` based on POSIX Section 2.13.
+bool matchFilePath(StringRef Pattern, StringRef FilePath) {
+ assert(!Pattern.empty());
+ assert(!FilePath.empty());
+
+ constexpr auto Separator = '/';
+ const auto EOP = Pattern.size(); // End of `Pattern`.
+ const auto End = FilePath.size(); // End of `FilePath`.
+ unsigned I = 0; // Index to `Pattern`.
+
+ // No match if `Pattern` ends with a non-meta character not equal to the last
+ // character of `FilePath`.
+ if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
+ return false;
+
+ for (const auto &F : FilePath) {
+ if (I == EOP)
+ return false;
+
+ switch (Pattern[I]) {
+ case '\\':
+ if (++I == EOP || F != Pattern[I])
+ return false;
+ ++I;
+ break;
+ case '?':
+ if (F == Separator)
+ return false;
+ ++I;
+ break;
+ case '*': {
+ unsigned J = &F - FilePath.data(); // Index of `F`.
+ // Skip consecutive stars.
+ do {
+ if (++I == EOP)
+ return FilePath.find(Separator, J + 1) == StringRef::npos;
+ } while (Pattern[I] == '*');
+ while (FilePath[J] != Separator) {
+ if (matchFilePath(Pattern.substr(I), FilePath.substr(J)))
+ return true;
+ if (++J == End)
+ return false;
+ }
+ break;
+ }
+ case '[':
+ // Skip e.g. `[!]`.
+ if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
+ // Skip unpaired `[`, brackets containing slashes, and `[]`.
+ if (const auto J = Pattern.find_first_of("]/", I + 1);
+ J != StringRef::npos && Pattern[J] == ']' && J > I + 1) {
+ if (F == Separator)
+ return false;
+ ++I; // After the `[`.
+ bool Negated = false;
+ if (Pattern[I] == '!') {
+ Negated = true;
+ ++I; // After the `!`.
+ }
+ bool Match = false;
+ do {
+ if (I + 2 < J && Pattern[I + 1] == '-') {
+ Match = Pattern[I] <= F && F <= Pattern[I + 2];
+ I += 3; // After the range, e.g. `A-Z`.
+ } else {
+ Match = F == Pattern[I++];
+ }
+ } while (!Match && I < J);
+ if (Negated ? Match : !Match)
+ return false;
+ I = J + 1; // After the `]`.
+ break;
+ }
+ }
+ [[fallthrough]]; // Match `[` literally.
+ default:
+ if (F != Pattern[I])
+ return false;
+ ++I;
+ }
+ }
+
+ // Match trailing stars with null strings.
+ while (I < EOP && Pattern[I] == '*')
+ ++I;
+
+ return I == EOP;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/MatchFilePath.h b/clang/lib/Format/MatchFilePath.h
new file mode 100644
index 00000000000000..482dab7c748e51
--- /dev/null
+++ b/clang/lib/Format/MatchFilePath.h
@@ -0,0 +1,22 @@
+//===--- MatchFilePath.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
+#define LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace format {
+
+bool matchFilePath(llvm::StringRef Pattern, llvm::StringRef FilePath);
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index 53136328928f5c..71f5886d946c80 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -27,6 +27,7 @@ add_clang_unittest(FormatTests
IntegerLiteralSeparatorTest.cpp
MacroCallReconstructorTest.cpp
MacroExpanderTest.cpp
+ MatchFilePathTest.cpp
NamespaceEndCommentsFixerTest.cpp
ObjCPropertyAttributeOrderFixerTest.cpp
QualifierFixerTest.cpp
diff --git a/clang/unittests/Format/MatchFilePathTest.cpp b/clang/unittests/Format/MatchFilePathTest.cpp
new file mode 100644
index 00000000000000..f236987cfa744f
--- /dev/null
+++ b/clang/unittests/Format/MatchFilePathTest.cpp
@@ -0,0 +1,156 @@
+//===- unittest/Format/MatchFilePathTest.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../lib/Format/MatchFilePath.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+namespace {
+
+class MatchFilePathTest : public ::testing::Test {
+protected:
+ bool match(llvm::StringRef FilePath, llvm::StringRef Pattern) {
+ return matchFilePath(Pattern, FilePath);
+ }
+};
+
+// Almost all of the test cases below are from:
+// https://github.com/python/cpython/blob/main/Lib/test/test_fnmatch.py
+
+TEST_F(MatchFilePathTest, Wildcard) {
+ EXPECT_TRUE(match("abc", "?*?"));
+ EXPECT_TRUE(match("abc", "???*"));
+ EXPECT_TRUE(match("abc", "*???"));
+ EXPECT_TRUE(match("abc", "???"));
+ EXPECT_TRUE(match("abc", "*"));
+ EXPECT_TRUE(match("abc", "ab[cd]"));
+ EXPECT_TRUE(match("abc", "ab[!de]"));
+ EXPECT_TRUE(!match("abc", "ab[de]"));
+ EXPECT_TRUE(!match("a", "??"));
+ EXPECT_TRUE(!match("a", "b"));
+}
+
+TEST_F(MatchFilePathTest, Backslash) {
+ EXPECT_TRUE(match("a?", R"(a\?)"));
+ EXPECT_TRUE(!match("a\\", R"(a\)"));
+ EXPECT_TRUE(match("\\", R"([\])"));
+ EXPECT_TRUE(match("a", R"([!\])"));
+ EXPECT_TRUE(!match("\\", R"([!\])"));
+}
+
+TEST_F(MatchFilePathTest, Newline) {
+ EXPECT_TRUE(match("foo\nbar", "foo*"));
+ EXPECT_TRUE(match("foo\nbar\n", "foo*"));
+ EXPECT_TRUE(!match("\nfoo", "foo*"));
+ EXPECT_TRUE(match("\n", "*"));
+}
+
+TEST_F(MatchFilePathTest, Star) {
+ EXPECT_TRUE(match(std::string(50, 'a').c_str(), "*a*a*a*a*a*a*a*a*a*a"));
+ EXPECT_TRUE(
+ !match((std::string(50, 'a') + 'b').c_str(), "*a*a*a*a*a*a*a*a*a*a"));
+}
+
+TEST_F(MatchFilePathTest, CaseSensitive) {
+ EXPECT_TRUE(match("abc", "abc"));
+ EXPECT_TRUE(!match("AbC", "abc"));
+ EXPECT_TRUE(!match("abc", "AbC"));
+ EXPECT_TRUE(match("AbC", "AbC"));
+}
+
+TEST_F(MatchFilePathTest, PathSeparators) {
+ EXPECT_TRUE(match("usr/bin", "usr/bin"));
+ EXPECT_TRUE(match("usr\\bin", R"(usr\\bin)"));
+}
+
+TEST_F(MatchFilePathTest, NumericEscapeSequence) {
+ EXPECT_TRUE(match("test", "te*"));
+ EXPECT_TRUE(match("test\xff", "te*\xff"));
+ EXPECT_TRUE(match("foo\nbar", "foo*"));
+}
+
+TEST_F(MatchFilePathTest, ValidBrackets) {
+ EXPECT_TRUE(match("z", "[az]"));
+ EXPECT_TRUE(!match("z", "[!az]"));
+ EXPECT_TRUE(match("a", "[aa]"));
+ EXPECT_TRUE(match("^", "[^az]"));
+ EXPECT_TRUE(match("[", "[[az]"));
+ EXPECT_TRUE(!match("]", "[!]]"));
+}
+
+TEST_F(MatchFilePathTest, InvalidBrackets) {
+ EXPECT_TRUE(match("[", "["));
+ EXPECT_TRUE(match("[]", "[]"));
+ EXPECT_TRUE(match("[!", "[!"));
+ EXPECT_TRUE(match("[!]", "[!]"));
+}
+
+TEST_F(MatchFilePathTest, Range) {
+ EXPECT_TRUE(match("c", "[b-d]"));
+ EXPECT_TRUE(!match("c", "[!b-d]"));
+ EXPECT_TRUE(match("y", "[b-dx-z]"));
+ EXPECT_TRUE(!match("y", "[!b-dx-z]"));
+}
+
+TEST_F(MatchFilePathTest, Hyphen) {
+ EXPECT_TRUE(!match("#", "[!-#]"));
+ EXPECT_TRUE(!match("-", "[!--.]"));
+ EXPECT_TRUE(match("_", "[^-`]"));
+ EXPECT_TRUE(match("]", "[[-^]"));
+ EXPECT_TRUE(match("]", R"([\-^])"));
+ EXPECT_TRUE(match("-", "[b-]"));
+ EXPECT_TRUE(!match("-", "[!b-]"));
+ EXPECT_TRUE(match("-", "[-b]"));
+ EXPECT_TRUE(!match("-", "[!-b]"));
+ EXPECT_TRUE(match("-", "[-]"));
+ EXPECT_TRUE(!match("-", "[!-]"));
+}
+
+TEST_F(MatchFilePathTest, UpperLELower) {
+ EXPECT_TRUE(!match("c", "[d-b]"));
+ EXPECT_TRUE(match("c", "[!d-b]"));
+ EXPECT_TRUE(match("y", "[d-bx-z]"));
+ EXPECT_TRUE(!match("y", "[!d-bx-z]"));
+ EXPECT_TRUE(match("_", "[d-b^-`]"));
+ EXPECT_TRUE(match("]", "[d-b[-^]"));
+ EXPECT_TRUE(match("b", "[b-b]"));
+}
+
+TEST_F(MatchFilePathTest, SlashAndBackslashInBrackets) {
+ EXPECT_TRUE(!match("/", "[/]"));
+ EXPECT_TRUE(match("\\", R"([\])"));
+ EXPECT_TRUE(match("[/]", "[/]"));
+ EXPECT_TRUE(match("\\", R"([\t])"));
+ EXPECT_TRUE(match("t", R"([\t])"));
+ EXPECT_TRUE(!match("\t", R"([\t])"));
+}
+
+TEST_F(MatchFilePathTest, SlashAndBackslashInRange) {
+ EXPECT_TRUE(!match("a/b", "a[.-0]b"));
+ EXPECT_TRUE(match("a\\b", "a[Z-^]b"));
+ EXPECT_TRUE(!match("a/b", "a[/-0]b"));
+ EXPECT_TRUE(match("a[/-0]b", "a[/-0]b"));
+ EXPECT_TRUE(!match("a/b", "a[.-/]b"));
+ EXPECT_TRUE(match("a[.-/]b", "a[.-/]b"));
+ EXPECT_TRUE(match("a\\b", R"(a[\-^]b)"));
+ EXPECT_TRUE(match("a\\b", R"(a[Z-\]b)"));
+}
+
+TEST_F(MatchFilePathTest, Brackets) {
+ EXPECT_TRUE(match("[", "[[]"));
+ EXPECT_TRUE(match("&", "[a&&b]"));
+ EXPECT_TRUE(match("|", "[a||b]"));
+ EXPECT_TRUE(match("~", "[a~~b]"));
+ EXPECT_TRUE(match(",", "[a-z+--A-Z]"));
+ EXPECT_TRUE(!match(".", "[a-z--/A-Z]"));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang
|
This is needed because Windows doesn't have anything equivalent to the POSIX fnmatch() function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we going to use this specific implementation on all platforms for parity?
The logic on its own looks good as far as I can tell, but, for example, POSIX Section 2.13 mentions that a leading period (hidden files) isn't matched by *
.
This sort of divergence from the strict POSIX spec is fine in my opinion, just confirming we won't cause parity issues.
EXPECT_TRUE(match("abc", "*")); | ||
EXPECT_TRUE(match("abc", "ab[cd]")); | ||
EXPECT_TRUE(match("abc", "ab[!de]")); | ||
EXPECT_TRUE(!match("abc", "ab[de]")); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
EXPECT_TRUE(!match("abc", "ab[de]")); | |
EXPECT_FALSE(match("abc", "ab[de]")); |
Way better to read.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I used !
instead of EXPECT_FALSE
on purpose because it's easier for me to spot the !
which has a different color in my editor, but I'll oblige though. :)
Yes, similar to this.
|
This is needed because Windows doesn't have anything equivalent to the POSIX fnmatch() function.