From 2b14f986dd8c56a0e3bb7df5fbaec3c86bdca239 Mon Sep 17 00:00:00 2001 From: Shahms King Date: Tue, 23 May 2023 07:25:58 -0700 Subject: [PATCH] feat(cxx_common): use a more compact serialization and in-memory format for selection (#5652) --- external.bzl | 8 +- kythe/cxx/extractor/BUILD | 9 + kythe/cxx/extractor/bazel_artifact.h | 32 + .../cxx/extractor/bazel_artifact_selector.cc | 611 +++++++++++++++--- kythe/cxx/extractor/bazel_artifact_selector.h | 111 +++- .../extractor/bazel_artifact_selector_test.cc | 349 ++++++++++ kythe/proto/BUILD | 12 + kythe/proto/bazel_artifact_selector_v2.proto | 62 ++ 8 files changed, 1080 insertions(+), 114 deletions(-) create mode 100644 kythe/proto/bazel_artifact_selector_v2.proto diff --git a/external.bzl b/external.bzl index 577d101207..30cf5c8b11 100644 --- a/external.bzl +++ b/external.bzl @@ -197,11 +197,11 @@ def _cc_dependencies(): maybe( http_archive, name = "com_google_googletest", - sha256 = "81964fe578e9bd7c94dfdb09c8e4d6e6759e19967e397dbea48d1c10e45d0df2", - strip_prefix = "googletest-release-1.12.1", + sha256 = "ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363", + strip_prefix = "googletest-1.13.0", urls = [ - "https://mirror.bazel.build/github.com/google/googletest/archive/refs/tags/release-1.12.1.tar.gz", - "https://github.com/google/googletest/archive/refs/tags/release-1.12.1.tar.gz", + "https://mirror.bazel.build/github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz", + "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz", ], ) diff --git a/kythe/cxx/extractor/BUILD b/kythe/cxx/extractor/BUILD index dda896e076..46b01394ac 100644 --- a/kythe/cxx/extractor/BUILD +++ b/kythe/cxx/extractor/BUILD @@ -332,6 +332,9 @@ cc_test( cc_library( name = "bazel_artifact", hdrs = ["bazel_artifact.h"], + deps = [ + "@com_google_absl//absl/strings:str_format", + ], ) cc_library( @@ -345,10 +348,13 @@ cc_library( ":bazel_artifact", "//kythe/cxx/common:regex", "//kythe/proto:bazel_artifact_selector_cc_proto", + "//kythe/proto:bazel_artifact_selector_v2_cc_proto", "@build_event_stream_proto//:build_event_stream_cc_proto", "@com_github_google_glog//:glog", + "@com_google_absl//absl/base", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/hash", "@com_google_absl//absl/status", @@ -373,6 +379,9 @@ cc_test( "//third_party:gtest_main", "@build_event_stream_proto//:build_event_stream_cc_proto", "@com_github_google_glog//:glog", + "@com_github_inazarenko_protobuf_matchers//protobuf-matchers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", diff --git a/kythe/cxx/extractor/bazel_artifact.h b/kythe/cxx/extractor/bazel_artifact.h index f17d86db1e..a55c46def0 100644 --- a/kythe/cxx/extractor/bazel_artifact.h +++ b/kythe/cxx/extractor/bazel_artifact.h @@ -17,10 +17,15 @@ #ifndef KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_H_ #define KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_H_ +#include +#include #include #include #include +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" + namespace kythe { /// \brief A pair of local path and canonical URI for a given Bazel output file. @@ -43,6 +48,17 @@ struct BazelArtifactFile { friend H AbslHashValue(H h, const BazelArtifactFile& file) { return H::combine(std::move(h), file.local_path, file.uri); } + + template + friend void AbslStringify(Sink& sink, const BazelArtifactFile& file) { + absl::Format(&sink, "BazelArtifactFile{.local_path = %v, .uri = %v}", + absl::FormatStreamed(std::quoted(file.local_path)), + absl::FormatStreamed(std::quoted(file.uri))); + } + friend std::ostream& operator<<(std::ostream& out, + const BazelArtifactFile& file) { + return (out << absl::StreamFormat("%v", file)); + } }; /// \brief A list of extracted compilation units and the target which owns them. @@ -58,6 +74,22 @@ struct BazelArtifact { bool operator!=(const BazelArtifact& other) const { return !(*this == other); } + + template + friend H AbslHashValue(H h, const BazelArtifact& artifact) { + return H::combine(std::move(h), artifact.label, artifact.files); + } + + template + friend void AbslStringify(Sink& sink, const BazelArtifact& artifact) { + absl::Format(&sink, "BazelArtifact{.label = %v, .files = { %s }}", + absl::FormatStreamed(std::quoted(artifact.label)), + absl::StrJoin(artifact.files, ", ")); + } + friend std::ostream& operator<<(std::ostream& out, + const BazelArtifact& artifact) { + return (out << absl::StreamFormat("%v", artifact)); + } }; } // namespace kythe diff --git a/kythe/cxx/extractor/bazel_artifact_selector.cc b/kythe/cxx/extractor/bazel_artifact_selector.cc index b5853c789a..5e68545c69 100644 --- a/kythe/cxx/extractor/bazel_artifact_selector.cc +++ b/kythe/cxx/extractor/bazel_artifact_selector.cc @@ -15,13 +15,26 @@ */ #include "kythe/cxx/extractor/bazel_artifact_selector.h" +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/status/status.h" +#include "absl/strings/ascii.h" #include "absl/strings/escaping.h" +#include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "glog/logging.h" #include "google/protobuf/any.pb.h" +#include "kythe/cxx/extractor/bazel_artifact.h" #include "kythe/proto/bazel_artifact_selector.pb.h" +#include "kythe/proto/bazel_artifact_selector_v2.pb.h" #include "re2/re2.h" namespace kythe { @@ -57,6 +70,11 @@ BazelArtifactFile ToBazelArtifactFile(const build_event_stream::File& file) { }; } +template +T& GetOrConstruct(std::optional& value) { + return value.has_value() ? *value : value.emplace(); +} + template struct FromRange { template @@ -103,6 +121,22 @@ absl::Status DeserializeInternal(T& selector, const U& container) { : absl::NotFoundError( absl::StrCat("No state found: ", error.ToString())); } +bool StrictAtoI(absl::string_view value, int64_t* out) { + if (value == "0") { + return 0; + } + if (value.empty() || value.front() == '0') { + // We need to ignore leading zeros as they don't contribute to the integral + // value. + return false; + } + for (char ch : value) { + if (!absl::ascii_isdigit(ch)) { + return false; + } + } + return absl::SimpleAtoi(value, out); +} } // namespace absl::Status BazelArtifactSelector::Deserialize( @@ -131,56 +165,323 @@ absl::optional AspectArtifactSelector::Select( return result; } -bool AspectArtifactSelector::SerializeInto(google::protobuf::Any& state) const { - kythe::proto::BazelAspectArtifactSelectorState raw; - *raw.mutable_disposed() = FromRange{state_.disposed}; - for (const auto& [key, fileset] : state_.filesets) { - auto& entry = (*raw.mutable_filesets())[key]; - for (const auto& file : fileset.files) { - auto* file_entry = entry.add_files(); - file_entry->set_name(file->local_path); - file_entry->set_uri(file->uri); +class AspectArtifactSelectorSerializationHelper { + public: + using FileId = AspectArtifactSelector::FileId; + using ProtoFile = ::kythe::proto::BazelAspectArtifactSelectorStateV2::File; + using FileSet = AspectArtifactSelector::FileSet; + using ProtoFileSet = + ::kythe::proto::BazelAspectArtifactSelectorStateV2::FileSet; + using FileSetId = AspectArtifactSelector::FileSetId; + using State = AspectArtifactSelector::State; + + static bool SerializeInto( + const State& state, + kythe::proto::BazelAspectArtifactSelectorStateV2& result) { + return Serializer(&state, result).Serialize(); + } + + static absl::Status DeserializeFrom( + const kythe::proto::BazelAspectArtifactSelectorStateV2& state, + State& result) { + return Deserializer(&state, result).Deserialize(); + } + + private: + class Serializer { + public: + explicit Serializer(const State* state ABSL_ATTRIBUTE_LIFETIME_BOUND, + kythe::proto::BazelAspectArtifactSelectorStateV2& result + ABSL_ATTRIBUTE_LIFETIME_BOUND) + : state_(*CHECK_NOTNULL(state)), result_(result) {} + + bool Serialize() { + for (const auto& [id, file_set] : state_.file_sets.file_sets()) { + SerializeFileSet(id, file_set); + } + for (FileSetId id : state_.file_sets.disposed()) { + SerializeDisposed(id); + } + for (const auto& [id, target] : state_.pending) { + SerializePending(id, target); + } + return true; } - for (const auto& id : fileset.children) { - entry.add_file_sets()->set_id(id); + + private: + static int64_t ToSerializationId(FileSetId id, size_t other) { + if (const auto [unpacked] = id; unpacked >= 0) { + return unpacked; + } + // 0 is reserved for the integral ids, so start at -1. + return -1 - static_cast(other); } - } - *raw.mutable_pending() = FromRange{state_.pending}; - state.PackFrom(std::move(raw)); - return true; -} + int64_t SerializeFileSetId(FileSetId id) { + auto [iter, inserted] = set_id_map_.try_emplace( + id, ToSerializationId(id, result_.file_set_ids().size())); + if (inserted && iter->second < 0) { + result_.add_file_set_ids(state_.file_sets.ToString(id)); + } + return iter->second; + } -AspectArtifactSelector::AspectArtifactSelector( - const AspectArtifactSelector& other) { - *this = other; -} + void SerializeFileSet(FileSetId id, const FileSet& file_set) { + auto& entry = (*result_.mutable_file_sets())[SerializeFileSetId(id)]; + for (FileId file_id : file_set.files) { + if (std::optional index = SerializeFile(file_id)) { + entry.add_files(*index); + } + } + for (FileSetId child_id : file_set.file_sets) { + entry.add_file_sets(SerializeFileSetId(child_id)); + } + } -AspectArtifactSelector& AspectArtifactSelector::operator=( - const AspectArtifactSelector& other) { - // Not particular efficient, but avoids false-sharing with the internal - // shared_ptr. - google::protobuf::Any state; - (void)other.SerializeInto(state); - (void)DeserializeFrom(state); - return *this; + std::optional SerializeFile(FileId id) { + const BazelArtifactFile* file = state_.files.Find(id); + if (file == nullptr) { + LOG(INFO) << "Omitting extracted FileId from serialization: " + << std::get<0>(id); + // FileSets may still reference files which have already been selected. + // If so, don't keep them when serializing. + return std::nullopt; + } + auto [iter, inserted] = + file_id_map_.try_emplace(id, result_.files().size()); + if (!inserted) { + return iter->second; + } + + auto* entry = result_.add_files(); + entry->set_local_path(file->local_path); + entry->set_uri(file->uri); + return iter->second; + } + + void SerializeDisposed(FileSetId id) { + result_.add_disposed(SerializeFileSetId(id)); + } + + void SerializePending(FileSetId id, absl::string_view target) { + (*result_.mutable_pending())[SerializeFileSetId(id)] = target; + } + + const State& state_; + kythe::proto::BazelAspectArtifactSelectorStateV2& result_; + + absl::flat_hash_map file_id_map_; + absl::flat_hash_map set_id_map_; + }; + + class Deserializer { + public: + explicit Deserializer( + const kythe::proto::BazelAspectArtifactSelectorStateV2* state + ABSL_ATTRIBUTE_LIFETIME_BOUND, + State& result ABSL_ATTRIBUTE_LIFETIME_BOUND) + : state_(*CHECK_NOTNULL(state)), result_(result) {} + + absl::Status Deserialize() { + // First, deserialize all of the disposed sets to help check consistency + // during the rest of deserialization. + for (int64_t id : state_.disposed()) { + absl::StatusOr real_id = DeserializeFileSetId(id); + if (!real_id.ok()) return real_id.status(); + result_.file_sets.Dispose(*real_id); + } + { + // Then check the file_set_ids list for uniqueness: + absl::flat_hash_set non_integer_ids( + state_.file_set_ids().begin(), state_.file_set_ids().end()); + if (non_integer_ids.size() != state_.file_set_ids().size()) { + return absl::InvalidArgumentError("Inconsistent file_set_ids map"); + } + } + + for (const auto& [id, file_set] : state_.file_sets()) { + // Ensure pending and live file sets are distinct. + if (state_.pending().contains(id)) { + return absl::InvalidArgumentError( + absl::StrCat("FileSet ", id, " is both pending and live")); + } + absl::Status status = DeserializeFileSet(id, file_set); + if (!status.ok()) return status; + } + for (const auto& [id, target] : state_.pending()) { + absl::Status status = DeserializePending(id, target); + if (!status.ok()) return status; + } + return absl::OkStatus(); + } + + private: + static constexpr FileSetId kDummy{0}; + + static absl::StatusOr ToDeserializationId( + const kythe::proto::BazelAspectArtifactSelectorStateV2& state, + int64_t id) { + if (id < 0) { + // Normalize the -1 based index. + size_t index = -(id + 1); + if (index > state.file_set_ids().size()) { + return absl::InvalidArgumentError(absl::StrCat( + "Non-integral FileSetId index out of range: ", index)); + } + return state.file_set_ids(index); + } + return absl::StrCat(id); + } + + absl::StatusOr DeserializeFileSetId(int64_t id) { + auto [iter, inserted] = set_id_map_.try_emplace(id, kDummy); + if (inserted) { + absl::StatusOr string_id = ToDeserializationId(state_, id); + if (!string_id.ok()) return string_id.status(); + + std::optional file_set_id = + result_.file_sets.InternUnlessDisposed(*string_id); + if (!file_set_id.has_value()) { + return absl::InvalidArgumentError( + "Encountered disposed FileSetId during deserialization"); + } + iter->second = *file_set_id; + } + return iter->second; + } + + absl::Status DeserializeFileSet(int64_t id, const ProtoFileSet& file_set) { + absl::StatusOr file_set_id = DeserializeFileSetId(id); + if (!file_set_id.ok()) return file_set_id.status(); + + FileSet result_set; + for (uint64_t file_id : file_set.files()) { + absl::StatusOr real_id = DeserializeFile(file_id); + if (!real_id.ok()) return real_id.status(); + + result_set.files.push_back(*real_id); + } + for (int64_t child_id : file_set.file_sets()) { + if (!(state_.file_sets().contains(child_id) || + state_.pending().contains(child_id))) { + // Ensure internal consistency. + return absl::InvalidArgumentError(absl::StrCat( + "Child FileSetId is neither live nor pending: ", id)); + } + + absl::StatusOr real_id = DeserializeFileSetId(child_id); + if (!real_id.ok()) return real_id.status(); + + result_set.file_sets.push_back(*real_id); + } + if (!result_.file_sets.InsertUnlessDisposed(*file_set_id, + std::move(result_set))) { + return absl::InvalidArgumentError( + absl::StrCat("FileSetId both disposed and live: ", id)); + } + return absl::OkStatus(); + } + + absl::StatusOr DeserializeFile(uint64_t id) { + if (id > state_.files_size()) { + return absl::InvalidArgumentError( + absl::StrCat("File index out of range: ", id)); + } + return result_.files.Insert(BazelArtifactFile{ + .local_path = state_.files(id).local_path(), + .uri = state_.files(id).uri(), + }); + } + + absl::Status DeserializePending(int64_t id, absl::string_view target) { + absl::StatusOr real_id = DeserializeFileSetId(id); + if (!real_id.ok()) return real_id.status(); + + result_.pending.try_emplace(*real_id, target); + return absl::OkStatus(); + } + + const kythe::proto::BazelAspectArtifactSelectorStateV2& state_; + State& result_; + + absl::flat_hash_map set_id_map_; + }; +}; + +bool AspectArtifactSelector::SerializeInto(google::protobuf::Any& state) const { + switch (options_.serialization_format) { + case AspectArtifactSelectorSerializationFormat::kV2: { + kythe::proto::BazelAspectArtifactSelectorStateV2 raw; + if (!AspectArtifactSelectorSerializationHelper::SerializeInto(state_, + raw)) { + return false; + } + state.PackFrom(std::move(raw)); + return true; + } + case AspectArtifactSelectorSerializationFormat::kV1: { + kythe::proto::BazelAspectArtifactSelectorState raw; + for (FileSetId id : state_.file_sets.disposed()) { + raw.add_disposed(state_.file_sets.ToString(id)); + } + for (const auto& [id, target] : state_.pending) { + (*raw.mutable_pending())[state_.file_sets.ToString(id)] = target; + } + for (const auto& [id, file_set] : state_.file_sets.file_sets()) { + auto& entry = (*raw.mutable_filesets())[state_.file_sets.ToString(id)]; + for (FileSetId child_id : file_set.file_sets) { + entry.add_file_sets()->set_id(state_.file_sets.ToString(child_id)); + } + for (FileId file_id : file_set.files) { + const BazelArtifactFile* file = state_.files.Find(file_id); + if (file == nullptr) continue; + + auto* file_entry = entry.add_files(); + file_entry->set_name(file->local_path); + file_entry->set_uri(file->uri); + } + } + state.PackFrom(std::move(raw)); + return true; + } + } + return false; } absl::Status AspectArtifactSelector::DeserializeFrom( const google::protobuf::Any& state) { - kythe::proto::BazelAspectArtifactSelectorState raw; - if (state.UnpackTo(&raw)) { - state_ = { - .disposed = FromRange{raw.disposed()}, - .filesets = {}, // Set below. - .pending = FromRange{raw.pending()}, - }; - for (auto& [key, fileset] : *raw.mutable_filesets()) { - InsertFileSet(key, fileset); + if (auto raw = kythe::proto::BazelAspectArtifactSelectorStateV2(); + state.UnpackTo(&raw)) { + state_ = {}; + return AspectArtifactSelectorSerializationHelper::DeserializeFrom(raw, + state_); + } else if (state.Is()) { + return absl::InvalidArgumentError( + "Malformed kythe.proto.BazelAspectArtifactSelectorStateV2"); + } + if (auto raw = kythe::proto::BazelAspectArtifactSelectorState(); + state.UnpackTo(&raw)) { + state_ = {}; + for (const auto& id : raw.disposed()) { + if (std::optional file_set_id = + state_.file_sets.InternUnlessDisposed(id)) { + state_.file_sets.Dispose(*file_set_id); + } + } + for (const auto& [id, target] : raw.pending()) { + if (std::optional file_set_id = + state_.file_sets.InternUnlessDisposed(id)) { + state_.pending.try_emplace(*file_set_id, target); + } + } + for (const auto& [id, file_set] : raw.filesets()) { + if (std::optional file_set_id = + state_.file_sets.InternUnlessDisposed(id)) { + InsertFileSet(*file_set_id, file_set); + } } return absl::OkStatus(); - } - if (state.Is()) { + } else if (state.Is()) { return absl::InvalidArgumentError( "Malformed kythe.proto.BazelAspectArtifactSelectorState"); } @@ -188,24 +489,150 @@ absl::Status AspectArtifactSelector::DeserializeFrom( "State not of type kythe.proto.BazelAspectArtifactSelectorState"); } +AspectArtifactSelector::FileTable::FileTable(const FileTable& other) + : next_id_(other.next_id_), + file_map_(other.file_map_), + id_map_(file_map_.size()) { + for (const auto& [file, id] : file_map_) { + id_map_.insert_or_assign(id, &file); + } +} + +AspectArtifactSelector::FileTable& AspectArtifactSelector::FileTable::operator=( + const FileTable& other) { + next_id_ = other.next_id_; + file_map_ = other.file_map_; + id_map_.clear(); + for (const auto& [file, id] : file_map_) { + id_map_.insert_or_assign(id, &file); + } + return *this; +} + +AspectArtifactSelector::FileId AspectArtifactSelector::FileTable::Insert( + BazelArtifactFile file) { + auto [iter, inserted] = + file_map_.emplace(std::move(file), std::make_tuple(next_id_)); + if (inserted) { + next_id_++; + id_map_[iter->second] = &iter->first; + } + return iter->second; +} + +std::optional AspectArtifactSelector::FileTable::Extract( + FileId id) { + if (auto id_node = id_map_.extract(id); !id_node.empty()) { + auto file_node = file_map_.extract(*id_node.mapped()); + // file_map_ owns the memory underlying the pointer we dereferenced here. + // If it's missing from the map, we're well into UB trouble. + CHECK(!file_node.empty()); + return std::move(file_node.key()); + } + return std::nullopt; +} + +BazelArtifactFile AspectArtifactSelector::FileTable::ExtractFile( + BazelArtifactFile file) { + if (auto file_node = file_map_.extract(file); !file_node.empty()) { + id_map_.erase(file_node.mapped()); + } + return file; +} + +const BazelArtifactFile* AspectArtifactSelector::FileTable::Find( + FileId id) const { + auto iter = id_map_.find(id); + if (iter == id_map_.end()) { + return nullptr; + } + return iter->second; +} + +std::optional +AspectArtifactSelector::FileSetTable::InternUnlessDisposed( + absl::string_view id) { + auto [result, inserted] = InternOrCreate(id); + if (!inserted && disposed_.contains(result)) { + return std::nullopt; + } + return result; +} + +std::pair +AspectArtifactSelector::FileSetTable::InternOrCreate(absl::string_view id) { + int64_t token; + if (StrictAtoI(id, &token)) { + return {{token}, false}; + } + auto [iter, inserted] = id_map_.try_emplace(id, std::make_tuple(next_id_)); + if (inserted) { + next_id_--; // Non-integral ids are mapped to negative values. + inverse_id_map_.try_emplace(iter->second, iter->first); + } + return {{iter->second}, inserted}; +} + +bool AspectArtifactSelector::FileSetTable::InsertUnlessDisposed( + FileSetId id, FileSet file_set) { + if (disposed_.contains(id)) { + return false; + } + file_sets_.insert_or_assign(id, std::move(file_set)); + return true; // A false return indicates the set has already been disposed. +} + +std::optional +AspectArtifactSelector::FileSetTable::ExtractAndDispose(FileSetId id) { + if (auto node = file_sets_.extract(id); !node.empty()) { + disposed_.insert(id); + return std::move(node.mapped()); + } + return std::nullopt; +} + +void AspectArtifactSelector::FileSetTable::Dispose(FileSetId id) { + disposed_.insert(id); + file_sets_.erase(id); +} + +bool AspectArtifactSelector::FileSetTable::Disposed(FileSetId id) { + return disposed_.contains(id); +} + +std::string AspectArtifactSelector::FileSetTable::ToString(FileSetId id) const { + if (const auto [unpacked] = id; unpacked >= 0) { + return absl::StrCat(unpacked); + } + return inverse_id_map_.at(id); +} + absl::optional AspectArtifactSelector::SelectFileSet( absl::string_view id, const build_event_stream::NamedSetOfFiles& fileset) { - bool kept = InsertFileSet(id, fileset); - - // TODO(shahms): check pending *before* the insertion. - if (auto node = state_.pending.extract(id); !node.empty()) { - BazelArtifact result = {.label = std::string(node.mapped())}; - ReadFilesInto(id, result.label, result.files); - if (result.files.empty()) { - return absl::nullopt; + std::optional file_set_id = InternUnlessDisposed(id); + if (!file_set_id.has_value()) { + // Already disposed, skip. + return std::nullopt; + } + // This was a pending file set, select it directly. + if (auto node = state_.pending.extract(*file_set_id); !node.empty()) { + state_.file_sets.Dispose(*file_set_id); + BazelArtifact result = {.label = node.mapped()}; + for (const auto& file : fileset.files()) { + if (options_.file_name_allowlist.Match(file.name())) { + result.files.push_back( + state_.files.ExtractFile(ToBazelArtifactFile(file))); + } + } + for (const auto& child : fileset.file_sets()) { + if (std::optional child_id = + InternUnlessDisposed(child.id())) { + ExtractFilesInto(*child_id, result.label, result.files); + } } return result; } - - if (!kept) { - // There were no files, no children and no previous references, skip it. - state_.disposed.insert(std::string(id)); - } + InsertFileSet(*file_set_id, fileset); return absl::nullopt; } @@ -217,9 +644,13 @@ absl::optional AspectArtifactSelector::SelectTargetCompleted( .label = id.label(), }; for (const auto& output_group : payload.output_group()) { + // TODO(shahms): optionally prune *all* output groups, matching first. if (options_.output_group_allowlist.Match(output_group.name())) { - for (const auto& filesets : output_group.file_sets()) { - ReadFilesInto(filesets.id(), id.label(), result.files); + for (const auto& fileset : output_group.file_sets()) { + if (std::optional file_set_id = + InternUnlessDisposed(fileset.id())) { + ExtractFilesInto(*file_set_id, result.label, result.files); + } } } } @@ -230,63 +661,53 @@ absl::optional AspectArtifactSelector::SelectTargetCompleted( return absl::nullopt; } -void AspectArtifactSelector::ReadFilesInto( - absl::string_view id, absl::string_view target, +void AspectArtifactSelector::ExtractFilesInto( + FileSetId id, absl::string_view target, std::vector& files) { - if (state_.disposed.contains(id)) { + if (state_.file_sets.Disposed(id)) { return; } - if (auto node = state_.filesets.extract(id); !node.empty()) { - state_.disposed.insert(std::string(id)); - const FileSet& fileset = node.mapped(); - files.reserve(files.size() + fileset.files.size()); - for (const auto* file : fileset.files) { - auto iter = state_.files.find(*file); - CHECK(iter != state_.files.end()) << "Attempt to remove a missing file!"; - if (--iter->second == 0) { - files.push_back(std::move(state_.files.extract(iter).key())); - } else { - files.push_back(iter->first); - } - } - - for (const auto& child : fileset.children) { - ReadFilesInto(child, target, files); - } - + std::optional file_set = state_.file_sets.ExtractAndDispose(id); + if (!file_set.has_value()) { + // Files where requested, but we haven't disposed that filesets id yet. + // Record this for future processing. + LOG(INFO) << "NamedSetOfFiles " << state_.file_sets.ToString(id) + << " requested by " << target << " but not yet disposed."; + state_.pending.emplace(id, target); return; } - // Files where requested, but we haven't disposed that filesets id yet. Record - // this for future processing. - LOG(INFO) << "NamedSetOfFiles " << id << " requested by " << target - << " but not yet disposed."; - state_.pending.emplace(id, target); + for (FileId file_id : file_set->files) { + if (std::optional file = state_.files.Extract(file_id)) { + files.push_back(*std::move(file)); + } + } + for (FileSetId child_id : file_set->file_sets) { + ExtractFilesInto(child_id, target, files); + } } -bool AspectArtifactSelector::InsertFileSet( - absl::string_view id, const build_event_stream::NamedSetOfFiles& fileset) { - if (state_.disposed.contains(id)) { - return false; - } - bool kept = false; +void AspectArtifactSelector::InsertFileSet( + FileSetId id, const build_event_stream::NamedSetOfFiles& fileset) { + std::optional file_set; for (const auto& file : fileset.files()) { if (options_.file_name_allowlist.Match(file.name())) { - auto iter = state_.files.try_emplace(ToBazelArtifactFile(file), 0).first; - iter->second++; - state_.filesets[id].files.push_back(&iter->first); - kept = true; + FileId file_id = state_.files.Insert(ToBazelArtifactFile(file)); + GetOrConstruct(file_set).files.push_back(file_id); } } for (const auto& child : fileset.file_sets()) { - if (!state_.disposed.contains(child.id())) { - if (state_.filesets[id].children.insert(child.id()).second) { - kept = true; - } + if (std::optional child_id = InternUnlessDisposed(child.id())) { + GetOrConstruct(file_set).file_sets.push_back(*child_id); } } - return kept; + if (file_set.has_value()) { + state_.file_sets.InsertUnlessDisposed(id, *std::move(file_set)); + } else { + // Nothing to do with this fileset, mark it disposed. + state_.file_sets.Dispose(id); + } } ExtraActionSelector::ExtraActionSelector( diff --git a/kythe/cxx/extractor/bazel_artifact_selector.h b/kythe/cxx/extractor/bazel_artifact_selector.h index 2d463c9259..98f918c469 100644 --- a/kythe/cxx/extractor/bazel_artifact_selector.h +++ b/kythe/cxx/extractor/bazel_artifact_selector.h @@ -16,12 +16,16 @@ #ifndef KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_SELECTOR_H_ #define KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_SELECTOR_H_ +#include #include #include +#include +#include #include #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/container/inlined_vector.h" #include "absl/container/node_hash_map.h" #include "absl/meta/type_traits.h" #include "absl/status/status.h" @@ -130,6 +134,12 @@ class AnyArtifactSelector final : public BazelArtifactSelector { std::function get_; }; +/// \brief Known serialization format versions. +enum class AspectArtifactSelectorSerializationFormat { + kV1, // The initial, bulky-but-simple format. + kV2, // The newer, flatter, smaller format. +}; + /// \brief Options class used for constructing an AspectArtifactSelector. struct AspectArtifactSelectorOptions { // A set of patterns used to filter file names from NamedSetOfFiles events. @@ -140,6 +150,9 @@ struct AspectArtifactSelectorOptions { RegexSet output_group_allowlist; // A set of patterns used to filter aspect names from TargetComplete events. RegexSet target_aspect_allowlist = RegexSet::Build({".*"}).value(); + // Which serialization format version to use. + AspectArtifactSelectorSerializationFormat serialization_format = + AspectArtifactSelectorSerializationFormat::kV2; }; /// \brief A BazelArtifactSelector implementation which tracks state from @@ -154,8 +167,8 @@ class AspectArtifactSelector final : public BazelArtifactSelector { explicit AspectArtifactSelector(Options options) : options_(std::move(options)) {} - AspectArtifactSelector(const AspectArtifactSelector& other); - AspectArtifactSelector& operator=(const AspectArtifactSelector& other); + AspectArtifactSelector(const AspectArtifactSelector&) = default; + AspectArtifactSelector& operator=(const AspectArtifactSelector&) = default; AspectArtifactSelector(AspectArtifactSelector&&) = default; AspectArtifactSelector& operator=(AspectArtifactSelector&&) = default; @@ -174,22 +187,86 @@ class AspectArtifactSelector final : public BazelArtifactSelector { absl::Status DeserializeFrom(const google::protobuf::Any& state) final; private: + friend class AspectArtifactSelectorSerializationHelper; + + using FileId = std::tuple; + using FileSetId = std::tuple; + + class FileTable { + public: + FileTable() = default; + FileTable(const FileTable& other); + FileTable& operator=(const FileTable& other); + FileTable(FileTable&&) = default; + FileTable& operator=(FileTable&&) = default; + + FileId Insert(BazelArtifactFile file); + std::optional Extract(FileId id); + // Extract the equivalent file, if present, returning the argument. + BazelArtifactFile ExtractFile(BazelArtifactFile file); + + const BazelArtifactFile* Find(FileId) const; + + auto begin() const { return id_map_.begin(); } + auto end() const { return id_map_.end(); } + + private: + uint64_t next_id_ = 0; + // TODO(shahms): DO NOT SUBMIT: + // This currently emits a file on the first extraction only, + // rather than tracking the use-count. This is intentional, but maybe + // undesirable. + absl::node_hash_map file_map_; + absl::flat_hash_map id_map_; + }; + struct FileSet { - std::vector files; - absl::flat_hash_set children; + absl::InlinedVector files; + absl::InlinedVector file_sets; }; - struct State { + class FileSetTable { + public: + std::optional InternUnlessDisposed(absl::string_view id); + bool InsertUnlessDisposed(FileSetId id, FileSet file_set); + // Extracts the FileSet and, if previously present, marks it disposed. + std::optional ExtractAndDispose(FileSetId id); + // Unconditionally marks a FileSet as disposed. + // Erases it if present in the map. + void Dispose(FileSetId id); + [[nodiscard]] bool Disposed(FileSetId id); + + std::string ToString(FileSetId id) const; + + const absl::flat_hash_map& file_sets() const { + return file_sets_; + } + const absl::flat_hash_set& disposed() const { return disposed_; } + + private: + std::pair InternOrCreate(absl::string_view id); + + // A record of all pending FileSets. + absl::flat_hash_map file_sets_; // A record of all of the NamedSetOfFiles events which have been processed. - absl::flat_hash_set disposed; - // Map of active files to count of filesets which contain it. - absl::node_hash_map files; - // Mapping from fileset id to NamedSetOfFiles whose file names matched - // the allowlist, but have not yet been consumed by an event. - absl::flat_hash_map filesets; + absl::flat_hash_set disposed_; + + // The next integral id to use. + // Non-integral file set ids are mapped to negative values. + int64_t next_id_ = -1; + // For non-integral file set ids coming from Bazel. + absl::flat_hash_map id_map_; + absl::flat_hash_map inverse_id_map_; + }; + + struct State { + // A record of all of the potentially-selectable files encountered. + FileTable files; + // A record of all of the potentially-selectable NamedSetOfFiles. + FileSetTable file_sets; // Mapping from fileset id to target name which required that // file set when it had not yet been seen. - absl::flat_hash_map pending; + absl::flat_hash_map pending; }; absl::optional SelectFileSet( absl::string_view id, const build_event_stream::NamedSetOfFiles& fileset); @@ -198,11 +275,15 @@ class AspectArtifactSelector final : public BazelArtifactSelector { const build_event_stream::BuildEventId::TargetCompletedId& id, const build_event_stream::TargetComplete& payload); - void ReadFilesInto(absl::string_view id, absl::string_view target, - std::vector& files); - bool InsertFileSet(absl::string_view id, + void ExtractFilesInto(FileSetId id, absl::string_view target, + std::vector& files); + void InsertFileSet(FileSetId id, const build_event_stream::NamedSetOfFiles& fileset); + std::optional InternUnlessDisposed(absl::string_view id) { + return state_.file_sets.InternUnlessDisposed(id); + } + Options options_; State state_; }; diff --git a/kythe/cxx/extractor/bazel_artifact_selector_test.cc b/kythe/cxx/extractor/bazel_artifact_selector_test.cc index a201202b39..033eafdee3 100644 --- a/kythe/cxx/extractor/bazel_artifact_selector_test.cc +++ b/kythe/cxx/extractor/bazel_artifact_selector_test.cc @@ -15,7 +15,13 @@ */ #include "kythe/cxx/extractor/bazel_artifact_selector.h" +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/functional/any_invocable.h" #include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "absl/types/span.h" @@ -26,12 +32,118 @@ #include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "kythe/cxx/extractor/bazel_artifact.h" +#include "protobuf-matchers/protocol-buffer-matchers.h" #include "re2/re2.h" #include "src/main/java/com/google/devtools/build/lib/buildeventstream/proto/build_event_stream.pb.h" namespace kythe { namespace { +using ::protobuf_matchers::EqualsProto; using ::testing::Eq; +using ::testing::FieldsAre; +using ::testing::IsEmpty; +using ::testing::Not; +using ::testing::Optional; +using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; + +struct FileSet { + std::vector files; + std::vector file_sets; +}; + +using IdGenerator = absl::AnyInvocable; + +IdGenerator NumericIdGenerator() { + return [i = 0]() mutable { return absl::StrCat(i++); }; +} + +IdGenerator AlphaIdGenerator() { + return [i = 0]() mutable { + std::string result = absl::StrCat(i++); + for (char& ch : result) { + ch = (ch - '0') + 'a'; + } + return result; + }; +} + +IdGenerator MixedIdGenerator() { + return [numeric = NumericIdGenerator(), alpha = AlphaIdGenerator(), + i = 0]() mutable { return (i++ % 2) ? numeric() : alpha(); }; +} + +absl::flat_hash_map GenerateFileSets( + int count, IdGenerator& next_id) { + absl::flat_hash_map result; + for (int i = 0; i < count; ++i) { + auto id = next_id(); + result[id].files = {absl::StrCat("path/to/file/", id, ".kzip")}; + } + return result; +} + +void ToNamedSetOfFilesEvents( + const absl::flat_hash_map& file_sets, + std::vector& result) { + result.reserve(result.size() + file_sets.size()); + for (const auto& [id, file_set] : file_sets) { + auto& event = result.emplace_back(); + event.mutable_id()->mutable_named_set()->set_id(id); + for (const auto& path : file_set.files) { + auto* file = event.mutable_named_set_of_files()->add_files(); + file->set_name(path); + file->set_uri(absl::StrCat("file:///", path)); + } + for (const auto& child_id : file_set.file_sets) { + event.mutable_named_set_of_files()->add_file_sets()->set_id(child_id); + } + } +} + +// The TargetCompleted event will always come after the NamedSetOfFiles events. +void ToTargetCompletedBuildEvents( + absl::string_view label, + const absl::flat_hash_map& file_sets, + std::vector& result) { + ToNamedSetOfFilesEvents(file_sets, result); + + auto& event = result.emplace_back(); + event.mutable_id()->mutable_target_completed()->set_label(label); + event.mutable_completed()->set_success(true); + auto* output_group = event.mutable_completed()->add_output_group(); + for (const auto& [id, unused] : file_sets) { + output_group->add_file_sets()->set_id(id); + } +} + +struct BuildEventOptions { + int target_count = 10; + int files_per_target = 2; + int common_file_count = 2; +}; + +std::vector GenerateBuildEvents( + const BuildEventOptions& options = {}, + IdGenerator next_id = MixedIdGenerator()) { + absl::flat_hash_map common = + GenerateFileSets(options.common_file_count, next_id); + + std::vector events; + ToNamedSetOfFilesEvents(common, events); + for (int i = 0; i < options.target_count; ++i) { + absl::flat_hash_map files = + GenerateFileSets(options.files_per_target, next_id); + for (auto& [unused_id, file_set] : files) { + for (const auto& [id, unused] : common) { + file_set.file_sets.push_back(id); + } + } + ToTargetCompletedBuildEvents(absl::StrCat("//path/to/target:", i), files, + events); + } + return events; +} AspectArtifactSelector::Options DefaultOptions() { return { @@ -164,6 +276,243 @@ TEST(AspectArtifactSelectorTest, SelectsFailedTargets) { })); } +struct StressTestCase { + enum class NamedSetIdStyle { + kIntegral, + kAlpha, + kMixed, + }; + NamedSetIdStyle id_style = NamedSetIdStyle::kMixed; + bool reversed = false; + AspectArtifactSelectorSerializationFormat serialization_format = + AspectArtifactSelectorSerializationFormat::kV2; + + using TupleType = std::tuple; + explicit StressTestCase(const TupleType& t) + : id_style(std::get<0>(t)), + reversed(std::get<1>(t)), + serialization_format(std::get<2>(t)) {} +}; + +class AspectArtifactSelectorStressTest + : public testing::TestWithParam { + public: + std::vector GenerateTestBuildEvents( + const BuildEventOptions& options) { + std::vector events = + GenerateBuildEvents(options, MakeIdGenerator()); + if (GetParam().reversed) { + std::reverse(events.begin(), events.end()); + } + return events; + } + + IdGenerator MakeIdGenerator() { + switch (GetParam().id_style) { + case StressTestCase::NamedSetIdStyle::kIntegral: + return NumericIdGenerator(); + case StressTestCase::NamedSetIdStyle::kAlpha: + return AlphaIdGenerator(); + case StressTestCase::NamedSetIdStyle::kMixed: + return MixedIdGenerator(); + } + } +}; + +INSTANTIATE_TEST_SUITE_P( + AspectArtifactSelectorStressTest, AspectArtifactSelectorStressTest, + testing::ConvertGenerator(testing::Combine( + testing::Values(StressTestCase::NamedSetIdStyle::kIntegral, + StressTestCase::NamedSetIdStyle::kAlpha, + StressTestCase::NamedSetIdStyle::kMixed), + testing::Bool(), + testing::Values(AspectArtifactSelectorSerializationFormat::kV1, + AspectArtifactSelectorSerializationFormat::kV2))), + [](const testing::TestParamInfo< + AspectArtifactSelectorStressTest::ParamType>& info) { + std::string id_style = [&] { + switch (info.param.id_style) { + case StressTestCase::NamedSetIdStyle::kAlpha: + return "Alphabetic"; + case StressTestCase::NamedSetIdStyle::kIntegral: + return "Integral"; + case StressTestCase::NamedSetIdStyle::kMixed: + return "Mixed"; + } + }(); + std::string format = [&] { + switch (info.param.serialization_format) { + case AspectArtifactSelectorSerializationFormat::kV1: + return "V1"; + case AspectArtifactSelectorSerializationFormat::kV2: + return "V2"; + } + }(); + return absl::StrCat(info.param.reversed ? "Reversed" : "Ordered", "_", + id_style, "_", format); + }); + +// Verify that the selector selects the expected number of files +// distributed across several targets. +TEST_P(AspectArtifactSelectorStressTest, SelectsExpectedTargetFiles) { + std::vector events = GenerateTestBuildEvents( + {.target_count = 10, .files_per_target = 2, .common_file_count = 2}); + + AspectArtifactSelector selector(DefaultOptions()); + absl::flat_hash_set targets; + absl::flat_hash_set files; + for (const auto& event : events) { + if (auto artifact = selector.Select(event)) { + targets.insert(artifact->label); + files.insert(artifact->files.begin(), artifact->files.end()); + } + } + EXPECT_THAT(targets, SizeIs(10)); + EXPECT_THAT(files, SizeIs(10 * 2 + 2)); +} + +// Verify that the selector selects the expected number of files +// distributed across several targets, after deserialization into +// a freshly constructed selector. +TEST_P(AspectArtifactSelectorStressTest, + SelectsExpectedTargetFilesWhenFreshlyDeserialized) { + std::vector events = GenerateTestBuildEvents( + {.target_count = 10, .files_per_target = 2, .common_file_count = 2}); + + absl::flat_hash_set targets; + absl::flat_hash_set files; + + std::optional state; + for (const auto& event : events) { + AspectArtifactSelector selector(DefaultOptions()); + if (state.has_value()) { + ASSERT_EQ(selector.DeserializeFrom(*state), absl::OkStatus()) + << state->DebugString(); + } + + if (auto artifact = selector.Select(event)) { + targets.insert(artifact->label); + files.insert(artifact->files.begin(), artifact->files.end()); + } + + ASSERT_TRUE(selector.SerializeInto(state.emplace())); + } + EXPECT_THAT(targets, SizeIs(10)); + EXPECT_THAT(files, SizeIs(10 * 2 + 2)); +} + +// Verify that the selector selects the expected number of files +// distributed across several targets, after deserialization. +TEST_P(AspectArtifactSelectorStressTest, + SelectsExpectedTargetFilesWhenDeserialized) { + std::vector events = GenerateTestBuildEvents( + {.target_count = 10, .files_per_target = 2, .common_file_count = 2}); + + absl::flat_hash_set targets; + absl::flat_hash_set files; + + std::optional state; + AspectArtifactSelector selector(DefaultOptions()); + for (const auto& event : events) { + if (state.has_value()) { + ASSERT_EQ(selector.DeserializeFrom(*state), absl::OkStatus()) + << state->DebugString(); + } + + if (auto artifact = selector.Select(event)) { + targets.insert(artifact->label); + files.insert(artifact->files.begin(), artifact->files.end()); + } + + ASSERT_TRUE(selector.SerializeInto(state.emplace())); + } + EXPECT_THAT(targets, SizeIs(10)); + EXPECT_THAT(files, SizeIs(10 * 2 + 2)); +} + +TEST(AspectArtifactSelectorTest, SerializationRoundTrips) { + AspectArtifactSelector selector(DefaultOptions()); + + // Pending. + EXPECT_THAT(selector.Select(ParseEventOrDie(R"pb( + id { + target_completed { + label: "//path/to/target:name" + aspect: "//aspect:file.bzl%name" + } + } + completed { + success: true + output_group { + name: "kythe_compilation_unit" + file_sets { id: "1" } + } + })pb")), + Eq(absl::nullopt)); + // Active. + ASSERT_THAT(selector.Select(ParseEventOrDie(R"pb( + id { named_set { id: "2" } } + named_set_of_files { + files { name: "path/to/1/file.kzip" uri: "file:///path/to/1/file.kzip" } + files { name: "path/to/2/file.kzip" uri: "file:///path/to/2/file.kzip" } + })pb")), + Eq(absl::nullopt)); + + // Active => Disposed. + ASSERT_THAT(selector.Select(ParseEventOrDie(R"pb( + id { named_set { id: "3" } } + named_set_of_files { + files { name: "path/to/1/file.kzip" uri: "file:///path/to/1/file.kzip" } + files { name: "path/to/3/file.kzip" uri: "file:///path/to/3/file.kzip" } + })pb")), + Eq(absl::nullopt)); + EXPECT_THAT( + selector.Select(ParseEventOrDie(R"pb( + id { + target_completed { + label: "//path/to/disposed/target:name" + aspect: "//aspect:file.bzl%name" + } + } + completed { + success: true + output_group { + name: "kythe_compilation_unit" + file_sets { id: "3" } + } + })pb")), + Optional(FieldsAre( + "//path/to/disposed/target:name", + UnorderedElementsAre( + FieldsAre("path/to/1/file.kzip", "file:///path/to/1/file.kzip"), + FieldsAre("path/to/3/file.kzip", + "file:///path/to/3/file.kzip"))))); + + google::protobuf::Any initial; + ASSERT_TRUE(selector.SerializeInto(initial)); + { + // The original selector round trips. + ASSERT_THAT(selector.DeserializeFrom(initial), absl::OkStatus()); + + google::protobuf::Any deserialized; + ASSERT_TRUE(selector.SerializeInto(deserialized)); + + EXPECT_THAT(deserialized, EqualsProto(initial)); + } + + { + // A freshly constructed selector round trips. + AspectArtifactSelector empty_selector(DefaultOptions()); + ASSERT_THAT(empty_selector.DeserializeFrom(initial), absl::OkStatus()); + + google::protobuf::Any deserialized; + ASSERT_TRUE(empty_selector.SerializeInto(deserialized)); + + EXPECT_THAT(deserialized, EqualsProto(initial)); + } +} + TEST(AspectArtifactSelectorTest, CompatibleWithAny) { // Just ensures that AspectArtifactSelector can be assigned to an Any. AnyArtifactSelector unused = AspectArtifactSelector(DefaultOptions()); diff --git a/kythe/proto/BUILD b/kythe/proto/BUILD index 7e78635566..897fe277ac 100644 --- a/kythe/proto/BUILD +++ b/kythe/proto/BUILD @@ -711,6 +711,18 @@ cc_proto_library( ], ) +proto_library( + name = "bazel_artifact_selector_v2_proto", + srcs = ["bazel_artifact_selector_v2.proto"], +) + +cc_proto_library( + name = "bazel_artifact_selector_v2_cc_proto", + deps = [ + ":bazel_artifact_selector_v2_proto", + ], +) + # Public GeneratedProtoInfo file option proto_library( name = "generated_message_info_proto", diff --git a/kythe/proto/bazel_artifact_selector_v2.proto b/kythe/proto/bazel_artifact_selector_v2.proto new file mode 100644 index 0000000000..2b764bc608 --- /dev/null +++ b/kythe/proto/bazel_artifact_selector_v2.proto @@ -0,0 +1,62 @@ +/* + * Copyright 2023 The Kythe Authors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package kythe.proto; + +option java_multiple_files = true; + +message BazelAspectArtifactSelectorStateV2 { + message File { + string local_path = 1; + string uri = 2; + } + // All potentially selectable files. + repeated File files = 1; + + message FileSet { + // All of the files which are part of this set. + // The values in this list are indexes into the `files` field. + repeated uint64 files = 2; + // All of the filesets which are children of this one. + // The values in this list are either integral representations + // of the NamedSetOfFilesId (if >=0) or the negative index within + // the `file_set_ids` field. + repeated int64 file_sets = 3; + } + // Mapping between file_set_id and FileSet. + // Either integral representations of the NamedSetOfFilesId (if >=0) + // or the negative index within the `file_set_ids` field. + map file_sets = 2; + + // All file set ids which have been selected and removed. + // The values in this list are either integral representations + // of the NamedSetOfFilesId (if >=0) or the negative index within + // the `file_set_ids` field. + repeated int64 disposed = 3; + + // Mapping between file_set_id and the target for which the + // file set was requested. + // The keys in this map are either integral representations + // of the NamedSetOfFilesId (if >=0) or the negative index within + // the `file_set_ids` field. + map pending = 4; + + // Mapping between the numerical ids used elsewhere and the string + // value for non-integral NamedSetOfFilesId. + repeated string file_set_ids = 5; +}