Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cxx_extractor): add ability to canonicalize VName paths per-file #5909

Merged
merged 2 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions kythe/cxx/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,16 @@ cc_library(
visibility = [PUBLIC_VISIBILITY],
deps = [
":common_status",
":regex",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/log",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/synchronization",
"@com_google_absl//absl/types:span",
],
)

Expand Down
95 changes: 91 additions & 4 deletions kythe/cxx/common/path_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <utility>
#include <vector>

#include "absl/algorithm/container.h"
#include "absl/log/log.h"
#include "absl/status/statusor.h"
#include "absl/strings/match.h"
Expand All @@ -37,6 +38,8 @@
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
#include "kythe/cxx/common/regex.h"
#include "kythe/cxx/common/status.h"

namespace kythe {
Expand Down Expand Up @@ -123,6 +126,15 @@ PathParts SplitPath(absl::string_view path) {
return {path.substr(0, pos), absl::ClippedSubstr(path, pos + 1)};
}

constexpr struct VisitPattern {
absl::string_view operator()(absl::string_view pattern) const {
return pattern;
}
absl::string_view operator()(const Regex& pattern) const {
return pattern.pattern();
}
} kVisitPattern;

} // namespace

absl::StatusOr<PathCleaner> PathCleaner::Create(absl::string_view root) {
Expand Down Expand Up @@ -185,7 +197,8 @@ absl::StatusOr<std::string> PathRealizer::Relativize(
}

absl::StatusOr<PathCanonicalizer> PathCanonicalizer::Create(
absl::string_view root, Policy policy) {
absl::string_view root, Policy policy,
absl::Span<const PathEntry> path_map) {
absl::StatusOr<PathCleaner> cleaner = PathCleaner::Create(root);
if (!cleaner.ok()) {
return cleaner.status();
Expand All @@ -195,12 +208,34 @@ absl::StatusOr<PathCanonicalizer> PathCanonicalizer::Create(
if (!realizer.ok()) {
return realizer.status();
}
return PathCanonicalizer(policy, *std::move(cleaner), *std::move(realizer));

std::vector<Policy> override_policies;
std::vector<absl::string_view> override_paths;
for (const auto& [path, policy] : path_map) {
if (!realizer->has_value()) {
realizer = MaybeMakeRealizer(policy, root);
if (!realizer.ok()) {
return realizer.status();
}
}
override_policies.push_back(policy);
override_paths.push_back(std::visit(kVisitPattern, path));
}
absl::StatusOr<RegexSet> override_set = RegexSet::Build(override_paths);
if (!override_set.ok()) {
return override_set.status();
}
return PathCanonicalizer(policy, *std::move(cleaner), *std::move(realizer),
*std::move(override_set),
std::move(override_policies));
}

absl::StatusOr<std::string> PathCanonicalizer::Relativize(
absl::string_view path) const {
switch (policy_) {
absl::StatusOr<Policy> policy = PolicyFor(path);
if (!policy.ok()) return policy.status();

switch (*policy) {
case Policy::kPreferRelative:
if (auto resolved = MaybeRealPath(realizer_, path)) {
if (!IsAbsolutePath(*resolved)) {
Expand All @@ -216,10 +251,22 @@ absl::StatusOr<std::string> PathCanonicalizer::Relativize(
case Policy::kCleanOnly:
return cleaner_.Relativize(path);
}
LOG(FATAL) << "Unknown policy: " << static_cast<int>(policy_);
LOG(FATAL) << "Unknown policy: " << static_cast<int>(*policy);
return std::string(path);
}

absl::StatusOr<PathCanonicalizer::Policy> PathCanonicalizer::PolicyFor(
absl::string_view path) const {
absl::StatusOr<std::vector<int>> match = override_set_.ExplainMatch(path);
if (!match.ok()) {
return match.status();
}
if (match->empty()) {
return policy_;
}
return override_policy_[*absl::c_min_element(*match)];
}

std::optional<PathCanonicalizer::Policy> ParseCanonicalizationPolicy(
absl::string_view policy) {
using Policy = PathCanonicalizer::Policy;
Expand Down Expand Up @@ -264,6 +311,46 @@ std::string JoinPath(absl::string_view a, absl::string_view b) {
absl::StripPrefix(b, "/"));
}

bool AbslParseFlag(absl::string_view text, PathCanonicalizer::PathEntry* entry,
std::string* error) {
size_t pos = text.find('@');
if (pos == text.npos) {
*error = "missing @ delimiter between path and policy";
return false;
}
absl::StatusOr<Regex> path = Regex::Compile(text.substr(0, pos));
if (!path.ok()) {
*error = path.status().message();
return false;
}
entry->path = *std::move(path);
return AbslParseFlag(text.substr(pos + 1), &entry->policy, error);
}

std::string AbslUnparseFlag(const PathCanonicalizer::PathEntry& entry) {
return absl::StrCat(std::visit(kVisitPattern, entry.path), "@",
AbslUnparseFlag(entry.policy));
}

bool AbslParseFlag(absl::string_view text,
std::vector<PathCanonicalizer::PathEntry>* entries,
std::string* error) {
for (const auto& entry : absl::StrSplit(text, ' ')) {
if (!AbslParseFlag(entry, &entries->emplace_back(), error)) {
entries->pop_back();
return false;
}
}
return true;
}

std::string AbslUnparseFlag(
const std::vector<PathCanonicalizer::PathEntry>& entries) {
return absl::StrJoin(entries, " ", [](std::string* out, const auto& entry) {
absl::StrAppend(out, AbslUnparseFlag(entry));
});
}

std::string CleanPath(absl::string_view input) {
const bool is_absolute_path = absl::StartsWith(input, "/");
std::vector<absl::string_view> parts;
Expand Down
50 changes: 47 additions & 3 deletions kythe/cxx/common/path_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@
#include <optional>
#include <string>
#include <utility>
#include <variant>
#include <vector>

#include "absl/container/flat_hash_map.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
#include "kythe/cxx/common/regex.h"

namespace kythe {

Expand Down Expand Up @@ -97,22 +101,44 @@ class PathCanonicalizer {
kPreferReal = 2, ///< Use real path, except if errors.
};

/// \brief An entry to use when overriding the default policy.
struct PathEntry {
/// \brief The path pattern to override.
std::variant<Regex, std::string> path;

/// \brief The policy to apply to matching paths.
Policy policy = Policy::kCleanOnly;
};

/// \brief Creates a new PathCanonicalizer with the given root and policy.
/// \param root The root directory to use when relativizing paths.
/// \param policy The default policy to apply.
static absl::StatusOr<PathCanonicalizer> Create(
absl::string_view root, Policy policy = Policy::kCleanOnly);
absl::string_view root, Policy policy = Policy::kCleanOnly,
absl::Span<const PathEntry> path_map = {});

/// \brief Transforms the provided path into a relative path depending on
/// configured policy.
absl::StatusOr<std::string> Relativize(absl::string_view path) const;

private:
explicit PathCanonicalizer(Policy policy, PathCleaner cleaner,
std::optional<PathRealizer> realizer)
: policy_(policy), cleaner_(cleaner), realizer_(realizer) {}
std::optional<PathRealizer> realizer,
RegexSet override_set,
std::vector<Policy> override_policy)
: policy_(policy),
cleaner_(std::move(cleaner)),
realizer_(std::move(realizer)),
override_set_(std::move(override_set)),
override_policy_(std::move(override_policy)) {}

absl::StatusOr<Policy> PolicyFor(absl::string_view path) const;

Policy policy_;
PathCleaner cleaner_;
std::optional<PathRealizer> realizer_;
RegexSet override_set_;
std::vector<Policy> override_policy_;
};

/// \brief Parses a flag string as a PathCanonicalizer::Policy.
Expand All @@ -135,6 +161,24 @@ std::string AbslUnparseFlag(PathCanonicalizer::Policy policy);
std::optional<PathCanonicalizer::Policy> ParseCanonicalizationPolicy(
absl::string_view policy);

/// \brief Parses a single PathEntry flag value as `<pattern>@<policy>`
bool AbslParseFlag(absl::string_view text, PathCanonicalizer::PathEntry* entry,
std::string* error);

/// \brief Returns the flag string representation of
/// PathCanonicalizer::PathEntry.
std::string AbslUnparseFlag(const PathCanonicalizer::PathEntry& entry);

/// \brief Parses a space separated list of PathEntry flag values.
bool AbslParseFlag(absl::string_view text,
std::vector<PathCanonicalizer::PathEntry>* entries,
std::string* error);

/// \brief Returns the flag string representation of
/// PathCanonicalizer::PathEntry.
std::string AbslUnparseFlag(
const std::vector<PathCanonicalizer::PathEntry>& entry);

/// \brief Append path `b` to path `a`, cleaning and returning the result.
std::string JoinPath(absl::string_view a, absl::string_view b);

Expand Down
62 changes: 62 additions & 0 deletions kythe/cxx/common/path_utils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,15 @@
#include "absl/log/check.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"

namespace kythe {
namespace {
using ::testing::ElementsAre;
using ::testing::FieldsAre;
using ::testing::Property;
using ::testing::VariantWith;

std::error_code Symlink(const std::string& target,
const std::string& linkpath) {
Expand Down Expand Up @@ -152,6 +157,27 @@ TEST(PathUtilsTest, CanonicalizerPolicyParseTest) {
EXPECT_NE("", error);
}

TEST(PathUtilsTest, CanonicalizerPathEntryParseTest) {
std::string error;
std::vector<PathCanonicalizer::PathEntry> entries;
ASSERT_TRUE(
AbslParseFlag("a@clean-only b@prefer-relative", &entries, &error));
EXPECT_THAT(
entries,
ElementsAre(FieldsAre(VariantWith<Regex>(Property(&Regex::pattern, "a")),
PathCanonicalizer::Policy::kCleanOnly),
FieldsAre(VariantWith<Regex>(Property(&Regex::pattern, "b")),
PathCanonicalizer::Policy::kPreferRelative)));
EXPECT_EQ(AbslUnparseFlag(entries), "a@clean-only b@prefer-relative");
}

TEST(PathUtilsTest, CanonicalizerPathEntryParseFailuresTest) {
std::string error;
std::vector<PathCanonicalizer::PathEntry> entries;
ASSERT_FALSE(AbslParseFlag("clean-only b@prefer-relative", &entries, &error));
EXPECT_THAT(error, "missing @ delimiter between path and policy");
}

class CanonicalizerTest : public ::testing::Test {
public:
void SetUp() override {
Expand Down Expand Up @@ -256,5 +282,41 @@ TEST_F(CanonicalizerTest, CanonicalizerPreferReal) {
.value_or(""));
}

TEST_F(CanonicalizerTest, CanonicalizerRealOverrideClean) {
AddDirectory("concrete");
AddDirectory("concrete/subdir");
AddDirectory("concrete/file");
AddSymlink("concrete", "link");

AddDirectory("base");
AddDirectory("elsewhere");
AddDirectory("elsewhere/subdir");
AddDirectory("elsewhere/file");
AddSymlink("../elsewhere", "base/link");

const std::string base = JoinPath(root(), "base");
PathCanonicalizer canonicalizer =
PathCanonicalizer::Create(
root(), PathCanonicalizer::Policy::kCleanOnly,
{{.path = ".*/base/link",
.policy = PathCanonicalizer::Policy::kPreferReal}})
.value();

// Use the default clean-only policy for links/paths which don't match.
EXPECT_EQ("link/file",
canonicalizer.Relativize(JoinPath(root(), "link/subdir/../file"))
.value_or(""));
EXPECT_EQ("link/file",
canonicalizer.Relativize(JoinPath(root(), "./link/subdir/../file"))
.value_or(""));
// And the prefer-real policy for those which do.
EXPECT_EQ("elsewhere/file",
canonicalizer.Relativize(JoinPath(base, "link/subdir/../file"))
.value_or(""));
EXPECT_EQ("base/bad/file",
canonicalizer.Relativize(JoinPath(base, "bad/subdir/../file"))
.value_or(""));
}

} // namespace
} // namespace kythe
10 changes: 6 additions & 4 deletions kythe/cxx/common/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef KYTHE_CXX_COMMON_REGEX_H_
#define KYTHE_CXX_COMMON_REGEX_H_

#include <memory>

Expand All @@ -22,9 +24,6 @@
#include "re2/re2.h"
#include "re2/set.h"

#ifndef KYTHE_CXX_COMMON_REGEX_H_
#define KYTHE_CXX_COMMON_REGEX_H_

namespace kythe {

/// \brief Regex is a Regular value type implemented on top of RE2.
Expand Down Expand Up @@ -53,7 +52,10 @@ class Regex {

/// \brief Retrieves the underlying RE2 object to be compatible with the RE2
/// non-member functions.
operator const RE2&() const { return *re_; }
operator const RE2&() const { return *re_; } // NOLINT

/// \brief Returns the string specification of the regular expression.
absl::string_view pattern() const { return re_->pattern(); }

private:
explicit Regex(std::shared_ptr<const RE2> re) : re_(std::move(re)) {}
Expand Down
3 changes: 2 additions & 1 deletion kythe/cxx/extractor/cxx_extractor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,8 @@ CompilationWriter::RootPath CompilationWriter::RootRelativePath(

if (!canonicalizer_.has_value()) {
if (absl::StatusOr<PathCanonicalizer> canonicalizer =
PathCanonicalizer::Create(root_directory_, path_policy_);
PathCanonicalizer::Create(root_directory_, path_policy_,
path_policy_overrides_);
canonicalizer.ok()) {
canonicalizer_ = *std::move(canonicalizer);
} else {
Expand Down