Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions include/glob_match.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once
#include <string>
#include "common.h"
#include "regex_utils.h"

class GlobPattern {
private:
regex_code m_re_pattern;

public:
// When match_with_parent is true,
// GlobPattern::Match() checks parent paths as well.
explicit GlobPattern(const std::string& glob_pattern, bool match_with_parent = false);

// Returns if a path matches with a glob pattern or not.
[[nodiscard]] bool Match(const std::string& path) const;
};
3 changes: 2 additions & 1 deletion include/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <string>
#include <vector>
#include "cpplint_state.h"
#include "glob_match.h"

namespace fs = std::filesystem;

Expand Down Expand Up @@ -81,7 +82,7 @@ class Options {
// Filters out files listed in the --exclude command line switch. File paths
// in the switch are evaluated relative to the current working directory
std::vector<fs::path> FilterExcludedFiles(std::vector<fs::path> filenames,
const std::vector<fs::path>& excludes);
const std::vector<GlobPattern>& excludes);

public:
Options() :
Expand Down
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ cpplint_sources = [
'src/cleanse.cpp',
'src/states.cpp',
'src/nest_info.cpp',
'src/glob_match.cpp',
]

# main binary
Expand Down
155 changes: 155 additions & 0 deletions src/glob_match.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#include "glob_match.h"
#include <map>
#include <string>
#include <string_view>
#include <vector>
#include "regex_utils.h"

// We use modified version of glob.cpp to convert glob patterns to regex patterns.
// https://github.com/p-ranav/glob/blob/master/source/glob.cpp

static constexpr auto SPECIAL_CHARACTERS = std::string_view{"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"};
static const auto ESCAPE_SET_OPER = RegexCompile(R"([&~|])");
static const auto ESCAPE_REPL_STR = std::string{R"(\\\1)"};

static bool string_replace(std::string &str, std::string_view from, std::string_view to) {
std::size_t start_pos = str.find(from);
if (start_pos == std::string::npos)
return false;
str.replace(start_pos, from.length(), to);
return true;
}

// Convert a glob pattern to a regex pattern.
// When match_with_parent is true,
// returned regex pattern will match with parent paths as well.
static std::string translate(std::string_view pattern, bool match_with_parent) {
std::size_t i = 0, n = pattern.size();
std::string result_string;

while (i < n) {
auto c = pattern[i];
i += 1;
if (c == '*') {
if ((i <= 1 || pattern[i - 2] == '/' || pattern[i - 2] == '\\') &&
i < n && pattern[i] == '*' &&
(i + 1 == n || pattern[i + 1] == '/' || pattern[i + 1] == '\\')) {
if (i + 1 == n) {
result_string += ".*";
break;
} else {
result_string += R"((.*[\\/])?)";
i += 2;
}
} else {
result_string += R"([^\\/]*)";
}
} else if (c == '?') {
result_string += R"([^\\/])";
} else if (c == '[') {
auto j = i;
if (j < n && pattern[j] == '!') {
j += 1;
}
if (j < n && pattern[j] == ']') {
j += 1;
}
while (j < n && pattern[j] != ']') {
j += 1;
}
if (j >= n) {
result_string += "\\[";
} else {
auto stuff = std::string(pattern.begin() + i, pattern.begin() + j);
if (stuff.find("--") == std::string::npos) {
string_replace(stuff, std::string_view{"\\"}, std::string_view{R"(\\)"});
} else {
std::vector<std::string> chunks;
std::size_t k = 0;
if (pattern[i] == '!') {
k = i + 2;
} else {
k = i + 1;
}

while (true) {
k = pattern.find("-", k, j);
if (k == std::string_view::npos) {
break;
}
chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + k));
i = k + 1;
k = k + 3;
}

chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + j));
// Escape backslashes and hyphens for set difference (--).
// Hyphens that create ranges shouldn't be escaped.
bool first = true;
for (auto &chunk : chunks) {
string_replace(chunk, std::string_view{"\\"}, std::string_view{R"(\\)"});
string_replace(chunk, std::string_view{"-"}, std::string_view{R"(\-)"});
if (first) {
stuff += chunk;
first = false;
} else {
stuff += "-" + chunk;
}
}
}

// Escape set operations (&&, ~~ and ||).
RegexReplace(ESCAPE_SET_OPER, ESCAPE_REPL_STR, stuff);
i = j + 1;
if (stuff[0] == '!') {
stuff = R"(^\\/)" + std::string(stuff.begin() + 1, stuff.end());
} else if (stuff[0] == '^' || stuff[0] == '[') {
stuff = "\\\\" + stuff;
}
result_string = result_string + "[" + stuff + "]";
}
} else if (c == '/' || c == '\\') {
// Path separator
result_string += R"([\\/])";
} else {
// SPECIAL_CHARS
// closing ')', '}' and ']'
// '-' (a range in character set)
// '&', '~', (extended character set operations)
// '#' (comment) and WHITESPACE (ignored) in verbose mode
static std::map<int, std::string> special_characters_map;
if (special_characters_map.empty()) {
for (auto &&sc : SPECIAL_CHARACTERS) {
special_characters_map.emplace(
static_cast<int>(sc), std::string("\\") + std::string(1, sc));
}
}

if (SPECIAL_CHARACTERS.find(c) != std::string_view::npos) {
result_string += special_characters_map[static_cast<int>(c)];
} else {
result_string += c;
}
}
}

if (match_with_parent) {
// GlobPattern::Match() should check parent paths as well.
char c = pattern.back();
if (c != '\\' && c != '/')
result_string += R"(([\\/].*)?$)";
} else {
result_string.push_back('$');
}
return result_string;
}

GlobPattern::GlobPattern(const std::string& glob_pattern, bool match_with_parent) {
std::string re_pattern_str = translate(glob_pattern, match_with_parent);
m_re_pattern = RegexCompile(re_pattern_str);
}

bool GlobPattern::Match(const std::string& path) const {
regex_match re_result = RegexCreateMatchData(m_re_pattern);
return RegexMatch(m_re_pattern, path, re_result);
}
27 changes: 11 additions & 16 deletions src/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <vector>
#include "cpplint_state.h"
#include "error_suppressions.h"
#include "glob_match.h"
#include "regex_utils.h"
#include "string_utils.h"
#include "version.h"
Expand Down Expand Up @@ -353,7 +354,7 @@ std::vector<fs::path> Options::ParseArguments(int argc, char** argv,
bool quiet = cpplint_state->Quiet();
std::string counting_style = "";
bool recursive = false;
std::vector<fs::path> excludes = {};
std::vector<GlobPattern> excludes = {};
int num_threads = -1;
m_filters = DEFAULT_FILTERS;

Expand Down Expand Up @@ -416,7 +417,8 @@ std::vector<fs::path> Options::ParseArguments(int argc, char** argv,
std::string val = ArgToValue(opt);
if (val != "") {
excludes.emplace_back(
fs::weakly_canonical(fs::absolute(val)).make_preferred());
fs::weakly_canonical(fs::absolute(val)).make_preferred().string(),
true);
}
} else if (opt.starts_with("--extensions=")) {
ProcessExtensionsOption(ArgToValue(opt));
Expand Down Expand Up @@ -501,28 +503,21 @@ void Options::ProcessIncludeOrderOption(const std::string& val) {
}

static bool ShouldBeExcluded(const fs::path& filename,
const std::vector<fs::path>& excludes) {
for (const fs::path& exc : excludes) {
// TODO(matyalatte): support glob patterns for --exclude
if (filename == exc) // same path
return true;

// Check if exc is a parent path of filename
std::string exc_str = exc.string();
if (exc_str.back() != fs::path::preferred_separator) {
exc_str += fs::path::preferred_separator;
}
if (StrContain(filename.string(), exc_str))
const std::vector<GlobPattern>& excludes) {
std::string file_str = filename.string();
for (const GlobPattern& exc : excludes) {
// Check if file is the same as (or a child of) a glob pattern
if (exc.Match(file_str))
return true;
}
return false;
}

std::vector<fs::path> Options::FilterExcludedFiles(std::vector<fs::path> filenames,
const std::vector<fs::path>& excludes) {
const std::vector<GlobPattern>& excludes) {
// remove matching exclude patterns from m_filenames
auto new_end = std::remove_if(filenames.begin(), filenames.end(),
[excludes](const fs::path& f)->bool {
[&excludes](const fs::path& f)->bool {
return ShouldBeExcluded(f, excludes);
});
filenames.erase(new_end, filenames.end());
Expand Down
88 changes: 88 additions & 0 deletions tests/glob_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#include <gtest/gtest.h>
#include <string>
#include "glob_match.h"

struct GlobCase {
const char* pattern;
const std::string str;
bool expected;
// expected value when parent matching is enabled.
bool expected_parent;
};

class GlobMatchTest : public ::testing::TestWithParam<GlobCase> {
};

const GlobCase glob_cases[] = {
// literal
{ "/foo/bar.h", "/foo/bar.h", true, true },
{ "/foo/bar.h", "/foo/bar-h", false, false },
// any characters
{ "/foo/*h", "/foo/bar-h", true, true },
{ "/foo/bar/*h", "/foo/bar-h", false, false },
{ "/foo/bar/*h", "/foo/bar/test.h", true, true },
{ "/*/test.h", "/foo/test.h", true, true },
{ "/*/test.h", "/foo/bar/test.h", false, false },
{ "foo/*", "foo/test.h", true, true },
{ "foo/*", "foo/bar/test.h", false, true },
// recursive
{ "/**/test.h", "/foo/test.h", true, true },
{ "/**/test.h", "/foo/bar/test.h", true, true },
{ "/**bar/test.h", "/foo/bar/test.h", false, false },
{ "**/test.h", "/foo/test.h", true, true },
{ "**/test.h", "/foo/bar/test.h", true, true },
{ "**/test.h", "test.h", true, true },
{ "**bar/test.h", "/foo/bar/test.h", false, false },
{ "foo/**", "foo/test.h", true, true },
{ "foo/**", "foo/bar/test.h", true, true },
// any single character
{ "/foo/bar?h", "/foo/bar.h", true, true },
{ "/foo/bar?h", "/foo/bar..h", false, false },
{ "/foo/bar?h", "/foo/bar/h", false, false },
// list
{ "/foo/[abc].h", "/foo/b.h", true, true },
{ "/foo/[abc].h", "/foo/d.h", false, false },
// negative list
{ "/foo/[!abc].h", "/foo/b.h", false, false },
{ "/foo/[!abc].h", "/foo/d.h", true, true },
{ "/foo/[!abc].h", "/foo//.h", false, false },
// range
{ "/foo/[a-c].h", "/foo/b.h", true, true },
{ "/foo/[a-c].h", "/foo/d.h", false, false },
// negative range
{ "/foo/[!a-c].h", "/foo/b.h", false, false },
{ "/foo/[!a-c].h", "/foo/d.h", true, true },
{ "/foo/[!a-c].h", "/foo//.h", false, false },
// compare with parent matching
{ "/foo/bar", "/foo/bar/baz", false, true },
{ "/foo/*", "/foo/bar/baz", false, true },
{ "/foo/*/", "/foo/bar/baz", false, true },
{ "/foo/**/test", "/foo/bar/baz/test/a.cpp", false, true },
// windows paths
{ "C:/foo/bar.h", "C:\\foo\\bar.h", true, true },
{ "C:/foo/bar", "C:\\foo\\bar\\baz", false, true },
{ "C:\\foo\\bar", "C:/foo/bar/baz", false, true },
};

INSTANTIATE_TEST_SUITE_P(GlobMatchTestInstantiation,
GlobMatchTest,
::testing::ValuesIn(glob_cases));

TEST_P(GlobMatchTest, GlobMatch) {
const GlobCase test_case = GetParam();
GlobPattern glob(test_case.pattern);
bool match = glob.Match(test_case.str);
EXPECT_EQ(test_case.expected, match) <<
" pattern: " << test_case.pattern << "\n" <<
" str: " << test_case.str;
}

TEST_P(GlobMatchTest, GlobMatchParentMatching) {
const GlobCase test_case = GetParam();
// enable parent matching
GlobPattern glob(test_case.pattern, true);
bool match = glob.Match(test_case.str);
EXPECT_EQ(test_case.expected_parent, match) <<
" pattern: " << test_case.pattern << "\n" <<
" str: " << test_case.str;
}
1 change: 1 addition & 0 deletions tests/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ test_sources = [
'string_test.cpp',
'lines_test.cpp',
'file_test.cpp',
'glob_test.cpp',
]

# build tests
Expand Down