diff --git a/src/core/jsonschema/frame.cc b/src/core/jsonschema/frame.cc index e1ee47838..72ef00cd6 100644 --- a/src/core/jsonschema/frame.cc +++ b/src/core/jsonschema/frame.cc @@ -1397,6 +1397,11 @@ auto SchemaFrame::reset() -> void { this->pointers_with_non_orphan_.clear(); this->pointer_to_location_.clear(); this->reachability_.clear(); + this->references_by_destination_.clear(); + this->location_members_children_.clear(); + this->descendants_by_pointer_.clear(); + this->potential_sources_by_location_.clear(); + this->reachability_graph_.clear(); this->root_.clear(); this->locations_.clear(); this->references_.clear(); @@ -1415,115 +1420,135 @@ auto SchemaFrame::populate_pointer_to_location() const -> void { } } -// TODO: Find a way to split or simplify this monster while preserving -// its performance? -auto SchemaFrame::populate_reachability(const Location &base, - const SchemaWalker &walker, - const SchemaResolver &resolver) const - -> const ReachabilityCache & { - for (auto &entry : this->reachability_) { - if (entry.first == &base) { - return entry.second; - } +auto SchemaFrame::populate_location_members( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->location_members_children_.empty()) { + return; } - auto &cache = - this->reachability_.emplace_back(&base, ReachabilityCache{}).second; - - // --------------------------------------------------------------------------- - // (1) Find all unreachable locations - // --------------------------------------------------------------------------- + this->populate_pointer_to_location(); - if (this->pointers_with_non_orphan_.empty()) { - for (const auto &entry : this->locations_) { - if (entry.second.type != LocationType::Pointer && !entry.second.orphan) { - this->pointers_with_non_orphan_.insert(std::cref(entry.second.pointer)); - } + for (const auto &entry : this->locations_) { + if (entry.second.type != LocationType::Subschema) { + continue; + } + if (!entry.second.parent.has_value()) { + continue; + } + const auto &parent_pointer{entry.second.parent.value()}; + const auto relative{entry.second.pointer.slice(parent_pointer.size())}; + if (relative.empty() || !relative.at(0).is_property()) { + continue; + } + const auto parent_location{this->traverse(parent_pointer)}; + if (!parent_location.has_value()) { + continue; } + const auto vocabs{this->vocabularies(parent_location->get(), resolver)}; + const auto &keyword_result{walker(relative.at(0).to_property(), vocabs)}; + if (keyword_result.type == SchemaKeywordType::LocationMembers) { + this->location_members_children_.insert(std::cref(entry.second.pointer)); + } + } +} + +auto SchemaFrame::populate_descendants() const -> void { + if (!this->descendants_by_pointer_.empty()) { + return; } - std::vector unreachable_locations; + this->populate_pointer_to_location(); + for (const auto &entry : this->locations_) { if (entry.second.type == LocationType::Pointer) { continue; } const auto &pointer{entry.second.pointer}; - auto cache_iter = cache.find(std::cref(pointer)); - if (cache_iter != cache.end()) { - continue; + const auto *location{&entry.second}; + + WeakPointer prefix; + for (std::size_t index = 0; index <= pointer.size(); ++index) { + auto prefix_iter = this->pointer_to_location_.find(std::cref(prefix)); + if (prefix_iter != this->pointer_to_location_.end() && + !prefix_iter->second.empty()) { + const auto &key_pointer{prefix_iter->second.front()->pointer}; + this->descendants_by_pointer_[std::cref(key_pointer)].push_back( + location); + } + if (index < pointer.size()) { + const auto &token{pointer.at(index)}; + if (token.is_property()) { + prefix.emplace_back(token.to_property(), token.property_hash()); + } else { + prefix.push_back(token.to_index()); + } + } } + } +} - bool is_reachable{false}; - if (pointer == base.pointer) { - is_reachable = true; - } else if (pointer.starts_with(base.pointer)) { - is_reachable = base.orphan || this->pointers_with_non_orphan_.contains( - std::cref(pointer)); - } +auto SchemaFrame::populate_potential_sources( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->potential_sources_by_location_.empty()) { + return; + } - cache.emplace(std::cref(pointer), is_reachable); - if (!is_reachable) { - unreachable_locations.push_back(&entry.second); + this->populate_reference_graph(); + this->populate_location_members(walker, resolver); + + for (const auto &entry : this->locations_) { + if (entry.second.type == LocationType::Pointer) { + continue; } - } - // --------------------------------------------------------------------------- - // (2) Filter out descendants that cross a container boundary - // --------------------------------------------------------------------------- + const auto &pointer{entry.second.pointer}; + const auto *location{&entry.second}; + std::vector sources; - if (base.orphan) { - std::vector> nested_entries; - for (const auto &entry : this->locations_) { - if (entry.second.type != LocationType::Subschema) { - continue; - } - const auto &pointer{entry.second.pointer}; - if (pointer == base.pointer || !pointer.starts_with(base.pointer)) { - continue; - } - if (!entry.second.parent.has_value()) { - continue; - } - const auto &parent_pointer{entry.second.parent.value()}; - const auto relative{pointer.slice(parent_pointer.size())}; - if (relative.empty() || !relative.at(0).is_property()) { - continue; - } - const auto parent_location{this->traverse(parent_pointer)}; - if (!parent_location.has_value()) { - continue; + WeakPointer ancestor = pointer; + bool first_iteration{true}; + while (first_iteration || !ancestor.empty()) { + auto destination_iterator = + this->references_by_destination_.find(std::cref(ancestor)); + if (destination_iterator != this->references_by_destination_.end()) { + bool crosses{false}; + if (ancestor != pointer) { + for (const auto &boundary_ref : this->location_members_children_) { + const auto &boundary{boundary_ref.get()}; + if (pointer.starts_with(boundary) && + !ancestor.starts_with(boundary)) { + crosses = true; + break; + } + } + } + + for (const auto *source_pointer : destination_iterator->second) { + sources.push_back( + PotentialSource{.source_pointer = source_pointer, + .source_parent = source_pointer->initial(), + .crosses = crosses}); + } } - const auto vocabularies{ - this->vocabularies(parent_location->get(), resolver)}; - const auto &keyword_result{ - walker(relative.at(0).to_property(), vocabularies)}; - if (keyword_result.type == SchemaKeywordType::LocationMembers) { - nested_entries.push_back(std::cref(pointer)); + + if (ancestor.empty()) { + break; } + ancestor = ancestor.initial(); + first_iteration = false; } - for (const auto &entry : this->locations_) { - if (entry.second.type == LocationType::Pointer) { - continue; - } - auto cache_iter = cache.find(std::cref(entry.second.pointer)); - if (cache_iter == cache.end() || !cache_iter->second) { - continue; - } - for (const auto &nested : nested_entries) { - if (entry.second.pointer.starts_with(nested.get())) { - cache_iter->second = false; - unreachable_locations.push_back(&entry.second); - break; - } - } + if (!sources.empty()) { + this->potential_sources_by_location_[location] = std::move(sources); } } +} - // --------------------------------------------------------------------------- - // (3) Build a reverse mapping from reference destinations to their sources - // --------------------------------------------------------------------------- +auto SchemaFrame::populate_reference_graph() const -> void { + if (!this->references_by_destination_.empty()) { + return; + } std::unordered_map> dynamic_anchors_by_fragment; @@ -1573,192 +1598,153 @@ auto SchemaFrame::populate_reachability(const Location &base, } } - std::unordered_map, - std::vector, WeakPointer::Hasher, - WeakPointer::Comparator> - references_by_destination; for (const auto &[source, destination] : reference_destinations) { - references_by_destination[std::cref(*destination)].push_back(source); + this->references_by_destination_[std::cref(*destination)].push_back(source); } +} - // --------------------------------------------------------------------------- - // (4) Precompute which references could make each orphan reachable - // --------------------------------------------------------------------------- - - std::unordered_set needed_pointers; - for (const auto *unreachable_location : unreachable_locations) { - needed_pointers.insert(unreachable_location->pointer); - const Location *loc{unreachable_location}; - while (loc != nullptr && loc->parent.has_value()) { - auto [iter, inserted] = needed_pointers.insert(loc->parent.value()); - if (!inserted) { - break; - } - loc = nullptr; - for (const auto &entry : this->locations_) { - if (entry.second.pointer == *iter) { - loc = &entry.second; - break; - } - } - } +auto SchemaFrame::populate_reachability_graph( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->reachability_graph_.empty()) { + return; } + this->populate_pointer_to_location(); + this->populate_location_members(walker, resolver); + this->populate_reference_graph(); + for (const auto &entry : this->locations_) { - if (needed_pointers.contains(entry.second.pointer)) { - this->pointer_to_location_[std::cref(entry.second.pointer)].push_back( - &entry.second); + if (entry.second.pointer.empty()) { + continue; } - } - - struct PotentialSource { - const WeakPointer *source_pointer; - bool crosses; - }; - struct PotentialReach { - const Location *location; - std::vector potential_sources; - }; - std::vector unreachable_with_sources; - unreachable_with_sources.reserve(unreachable_locations.size()); - std::unordered_map vocabularies_cache; - - for (const auto *unreachable_location : unreachable_locations) { - const auto &pointer{unreachable_location->pointer}; - PotentialReach entry{.location = unreachable_location, - .potential_sources = {}}; - - WeakPointer ancestor = pointer; - bool first_iteration{true}; - while (first_iteration || !ancestor.empty()) { - auto destination_iterator = - references_by_destination.find(std::cref(ancestor)); - if (destination_iterator != references_by_destination.end()) { - bool crosses{false}; - if (ancestor != pointer) { - const Location *check_location{unreachable_location}; - while (check_location != nullptr) { - if (check_location->pointer == ancestor) { - break; - } + const auto parent_pointer{entry.second.pointer.initial()}; + auto parent_iterator = + this->pointer_to_location_.find(std::cref(parent_pointer)); + if (parent_iterator == this->pointer_to_location_.end()) { + continue; + } - if (!check_location->parent.has_value()) { - break; - } + for (const Location *parent_location : parent_iterator->second) { + this->reachability_graph_[parent_location].push_back( + ReachabilityEdge{.target = &entry.second, + .orphan_context_only = entry.second.orphan, + .is_reference = false}); + } + } - const auto parent_location{ - this->traverse(check_location->parent.value())}; - if (!parent_location.has_value()) { - break; - } + for (const auto &[destination_reference, sources] : + this->references_by_destination_) { + auto destination_locations_iterator = + this->pointer_to_location_.find(destination_reference); + if (destination_locations_iterator == this->pointer_to_location_.end()) { + continue; + } - const auto relative{check_location->pointer.slice( - check_location->parent.value().size())}; - if (!relative.empty() && relative.at(0).is_property()) { - const auto &parent_loc{parent_location->get()}; - auto vocab_iterator = - vocabularies_cache.find(parent_loc.base_dialect); - if (vocab_iterator == vocabularies_cache.end()) { - auto [inserted_iterator, inserted] = vocabularies_cache.emplace( - parent_loc.base_dialect, - this->vocabularies(parent_loc, resolver)); - vocab_iterator = inserted_iterator; - } + const Location *destination_location{nullptr}; + for (const auto *location : destination_locations_iterator->second) { + if (location->type != LocationType::Pointer) { + destination_location = location; + break; + } + } - const auto &keyword_result{ - walker(relative.at(0).to_property(), vocab_iterator->second)}; - if (keyword_result.type == SchemaKeywordType::LocationMembers) { - crosses = true; - break; - } - } + if (!destination_location && + !destination_locations_iterator->second.empty()) { + destination_location = destination_locations_iterator->second.front(); + } - check_location = &parent_location->get(); - } - } + if (!destination_location) { + continue; + } - for (const auto *source_pointer : destination_iterator->second) { - entry.potential_sources.push_back(PotentialSource{ - .source_pointer = source_pointer, .crosses = crosses}); - } + for (const auto *source_pointer : sources) { + if (source_pointer->empty()) { + continue; } - if (ancestor.empty()) { - break; + const auto source_parent_pointer{source_pointer->initial()}; + auto source_parent_iterator = + this->pointer_to_location_.find(std::cref(source_parent_pointer)); + if (source_parent_iterator == this->pointer_to_location_.end()) { + continue; } - ancestor = ancestor.initial(); - first_iteration = false; - } - if (!entry.potential_sources.empty()) { - unreachable_with_sources.push_back(std::move(entry)); + for (const Location *source_parent_location : + source_parent_iterator->second) { + this->reachability_graph_[source_parent_location].push_back( + ReachabilityEdge{.target = destination_location, + .orphan_context_only = false, + .is_reference = true}); + } } } +} - std::ranges::sort(unreachable_with_sources, [](const PotentialReach &left, - const PotentialReach &right) { - return left.location->pointer.size() < right.location->pointer.size(); - }); - - // --------------------------------------------------------------------------- - // (5) Propagate reachability through references using fixpoint iteration - // --------------------------------------------------------------------------- - - bool changed{true}; - while (changed) { - changed = false; - std::vector> newly_reachable; - - auto write_iterator = unreachable_with_sources.begin(); - for (auto read_iterator = unreachable_with_sources.begin(); - read_iterator != unreachable_with_sources.end(); ++read_iterator) { - bool became_reachable = false; +auto SchemaFrame::populate_reachability(const Location &base, + const SchemaWalker &walker, + const SchemaResolver &resolver) const + -> const ReachabilityCache & { + const ReachabilityKey key{.pointer = &base.pointer, .orphan = base.orphan}; + auto cache_iterator = this->reachability_.find(key); + if (cache_iterator != this->reachability_.end()) { + return cache_iterator->second; + } - for (const auto &potential_source : read_iterator->potential_sources) { - if (potential_source.crosses) { - continue; + auto &cache = this->reachability_[key]; + this->populate_reachability_graph(walker, resolver); + const Location *base_location{&base}; + std::vector queue; + std::unordered_set visited; + + auto mark_pointer_reachable = [this, &cache](const WeakPointer &pointer) { + auto locations_iterator = + this->pointer_to_location_.find(std::cref(pointer)); + if (locations_iterator != this->pointer_to_location_.end()) { + for (const auto *location : locations_iterator->second) { + if (location->type != LocationType::Pointer) { + cache.emplace(std::cref(location->pointer), true); } + } + } + }; - const auto &source_parent{potential_source.source_pointer->initial()}; - const auto reachability_iterator{cache.find(std::cref(source_parent))}; - const bool source_parent_reachable{reachability_iterator != - cache.end() && - reachability_iterator->second}; + queue.push_back(base_location); + visited.insert(base_location); + mark_pointer_reachable(base_location->pointer); - if (source_parent_reachable) { - became_reachable = true; - break; - } - } + std::size_t queue_index{0}; + while (queue_index < queue.size()) { + const Location *current = queue[queue_index++]; - if (became_reachable) { - cache[std::cref(read_iterator->location->pointer)] = true; - newly_reachable.push_back(std::cref(read_iterator->location->pointer)); - changed = true; - } else { - if (write_iterator != read_iterator) { - *write_iterator = std::move(*read_iterator); - } - ++write_iterator; - } + auto edges_iterator = this->reachability_graph_.find(current); + if (edges_iterator == this->reachability_graph_.end()) { + continue; } - unreachable_with_sources.erase(write_iterator, - unreachable_with_sources.end()); - for (auto &[cache_pointer, cache_reachable] : cache) { - if (cache_reachable) { + for (const auto &edge : edges_iterator->second) { + if (visited.contains(edge.target)) { continue; } - if (!this->pointers_with_non_orphan_.contains(cache_pointer)) { + + if (edge.orphan_context_only && !base.orphan && !current->orphan) { continue; } - for (const auto &reached : newly_reachable) { - if (cache_pointer.get().starts_with(reached.get())) { - cache_reachable = true; - break; + + if (!edge.is_reference && edge.orphan_context_only) { + auto target_iterator = this->location_members_children_.find( + std::cref(edge.target->pointer)); + if (target_iterator != this->location_members_children_.end()) { + const auto keyword_path{edge.target->pointer.initial()}; + if (keyword_path.starts_with(current->pointer)) { + continue; + } } } + + visited.insert(edge.target); + queue.push_back(edge.target); + mark_pointer_reachable(edge.target->pointer); } } @@ -1771,8 +1757,7 @@ auto SchemaFrame::is_reachable(const Location &base, const Location &location, assert(location.type != LocationType::Pointer); const auto &cache{this->populate_reachability(base, walker, resolver)}; const auto iterator{cache.find(std::cref(location.pointer))}; - assert(iterator != cache.end()); - return iterator->second; + return iterator != cache.end() && iterator->second; } } // namespace sourcemeta::core diff --git a/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h b/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h index 954860f69..f18a5f1c5 100644 --- a/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h +++ b/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h @@ -268,11 +268,59 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { using ReachabilityCache = std::unordered_map, bool, WeakPointer::Hasher, WeakPointer::Comparator>; - mutable std::vector> + struct ReachabilityKey { + const WeakPointer *pointer; + bool orphan; + auto operator==(const ReachabilityKey &other) const noexcept -> bool { + return this->pointer == other.pointer && this->orphan == other.orphan; + } + }; + struct ReachabilityKeyHasher { + auto operator()(const ReachabilityKey &key) const noexcept -> std::size_t { + return std::hash{}(key.pointer) ^ + (std::hash{}(key.orphan) << 1); + } + }; + mutable std::unordered_map reachability_; + mutable std::unordered_map, + std::vector, + WeakPointer::Hasher, WeakPointer::Comparator> + references_by_destination_; + mutable std::unordered_set, + WeakPointer::Hasher, WeakPointer::Comparator> + location_members_children_; + mutable std::unordered_map, + std::vector, WeakPointer::Hasher, + WeakPointer::Comparator> + descendants_by_pointer_; + struct PotentialSource { + const WeakPointer *source_pointer; + WeakPointer source_parent; + bool crosses; + }; + mutable std::unordered_map> + potential_sources_by_location_; + struct ReachabilityEdge { + const Location *target; + bool orphan_context_only; + bool is_reference; + }; + mutable std::unordered_map> + reachability_graph_; bool standalone_{false}; auto populate_pointer_to_location() const -> void; + auto populate_reference_graph() const -> void; + auto populate_location_members(const SchemaWalker &walker, + const SchemaResolver &resolver) const -> void; + auto populate_descendants() const -> void; + auto populate_potential_sources(const SchemaWalker &walker, + const SchemaResolver &resolver) const -> void; + auto populate_reachability_graph(const SchemaWalker &walker, + const SchemaResolver &resolver) const + -> void; auto populate_reachability(const Location &base, const SchemaWalker &walker, const SchemaResolver &resolver) const -> const ReachabilityCache &;