From fc80f3d47520a0a6564d92eb7ae288a72fa1c589 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 17 Nov 2025 17:41:38 -0400 Subject: [PATCH] Improve linter with regards to cross-dialect embedded resources Fixes: https://github.com/sourcemeta/studio/issues/67 Signed-off-by: Juan Cruz Viotti --- DEPENDENCIES | 2 +- test/CMakeLists.txt | 3 +- ...=> fail_lint_invalid_embedded_resource.sh} | 10 ++++--- test/lint/pass_lint_embedded_resource.sh | 29 +++++++++++++++++++ test/lint/pass_lint_list_exclude.sh | 5 +++- test/lint/pass_lint_list_long.sh | 5 +++- test/lint/pass_lint_list_short.sh | 5 +++- test/lint/pass_lint_list_strict.sh | 5 +++- vendor/core/src/core/jsonschema/frame.cc | 22 +++++++------- .../core/src/core/jsonschema/transformer.cc | 14 ++++----- .../src/extension/alterschema/CMakeLists.txt | 1 + .../src/extension/alterschema/alterschema.cc | 2 ++ .../alterschema/linter/ignored_metaschema.h | 29 +++++++++++++++++++ 13 files changed, 103 insertions(+), 29 deletions(-) rename test/lint/{fail_lint_embedded_resource.sh => fail_lint_invalid_embedded_resource.sh} (75%) create mode 100755 test/lint/pass_lint_embedded_resource.sh create mode 100644 vendor/core/src/extension/alterschema/linter/ignored_metaschema.h diff --git a/DEPENDENCIES b/DEPENDENCIES index e38fae38..81f812f0 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,5 +1,5 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core f94f8b4bc7c1406ee37f86fe3c1886e8bedf70ee +core https://github.com/sourcemeta/core eff08dbc8da88db93c52643ff2eb84db1741b662 jsonbinpack https://github.com/sourcemeta/jsonbinpack abd40e41050d14d74af1fddb5c397de5cca3b13c blaze https://github.com/sourcemeta/blaze e6da51b14d4a187b03c175af2c87395f9c150d15 hydra https://github.com/sourcemeta/hydra af9f2c54709d620872ead0c3f8f683c15a0fa702 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f089dbc5..3e96e3e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -277,8 +277,9 @@ add_jsonschema_test_unix(compile/pass_resolve_remap_relative) # Lint add_jsonschema_test_unix(lint/pass_lint_fix) add_jsonschema_test_unix(lint/pass_lint_no_fix) +add_jsonschema_test_unix(lint/pass_lint_embedded_resource) add_jsonschema_test_unix(lint/fail_lint) -add_jsonschema_test_unix(lint/fail_lint_embedded_resource) +add_jsonschema_test_unix(lint/fail_lint_invalid_embedded_resource) add_jsonschema_test_unix(lint/fail_lint_with_id) add_jsonschema_test_unix(lint/fail_lint_fix_broken_reference) add_jsonschema_test_unix(lint/fail_lint_default_dialect) diff --git a/test/lint/fail_lint_embedded_resource.sh b/test/lint/fail_lint_invalid_embedded_resource.sh similarity index 75% rename from test/lint/fail_lint_embedded_resource.sh rename to test/lint/fail_lint_invalid_embedded_resource.sh index 14f3981f..3ffa2394 100755 --- a/test/lint/fail_lint_embedded_resource.sh +++ b/test/lint/fail_lint_invalid_embedded_resource.sh @@ -16,7 +16,7 @@ cat << 'EOF' > "$TMP/schema.json" "embedded": { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "embedded", - "allOf": [ { "$ref": "#/definitions/foo" } ], + "$ref": "#/definitions/foo", "definitions": { "foo": { "type": "number" } } @@ -29,13 +29,15 @@ cd "$TMP" "$1" lint "$TMP/schema.json" >"$TMP/stderr.txt" 2>&1 && CODE="$?" || CODE="$?" test "$CODE" = "1" || exit 1 +cat "$TMP/stderr.txt" + cat << 'EOF' > "$TMP/expected.txt" schema.json:10:7: `definitions` was superseded by `$defs` in 2019-09 and later versions (definitions_to_defs) at location "/$defs/embedded/definitions" -schema.json:9:20: - Wrapping `$ref` in `allOf` was only necessary in JSON Schema Draft 7 and older (unnecessary_allof_ref_wrapper_modern) - at location "/$defs/embedded/allOf/0/$ref" +schema.json:7:7: + A `$schema` declaration without a sibling identifier (or with a sibling `$ref` in Draft 7 and older dialects), is ignored (ignored_metaschema) + at location "/$defs/embedded/$schema" EOF diff "$TMP/stderr.txt" "$TMP/expected.txt" diff --git a/test/lint/pass_lint_embedded_resource.sh b/test/lint/pass_lint_embedded_resource.sh new file mode 100755 index 00000000..6b409d9c --- /dev/null +++ b/test/lint/pass_lint_embedded_resource.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/schema.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/main", + "$ref": "embedded", + "$defs": { + "embedded": { + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "embedded", + "allOf": [ { "$ref": "#/definitions/foo" } ], + "definitions": { + "foo": { "type": "number" } + } + } + } +} +EOF + +cd "$TMP" +"$1" lint "$TMP/schema.json" diff --git a/test/lint/pass_lint_list_exclude.sh b/test/lint/pass_lint_list_exclude.sh index 16f24805..a2cea7e8 100755 --- a/test/lint/pass_lint_list_exclude.sh +++ b/test/lint/pass_lint_list_exclude.sh @@ -88,6 +88,9 @@ exclusive_minimum_number_and_minimum if_without_then_else The `if` keyword is meaningless without the presence of the `then` or `else` keywords +ignored_metaschema + A `$schema` declaration without a sibling identifier (or with a sibling `$ref` in Draft 7 and older dialects), is ignored + items_array_default Setting the `items` keyword to the empty array does not add any further constraint @@ -163,7 +166,7 @@ unsatisfiable_max_contains unsatisfiable_min_properties Setting `minProperties` to a number less than `required` does not add any further constraint -Number of rules: 51 +Number of rules: 52 EOF diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/lint/pass_lint_list_long.sh b/test/lint/pass_lint_list_long.sh index a1389cdc..7c08ead3 100755 --- a/test/lint/pass_lint_list_long.sh +++ b/test/lint/pass_lint_list_long.sh @@ -94,6 +94,9 @@ exclusive_minimum_number_and_minimum if_without_then_else The `if` keyword is meaningless without the presence of the `then` or `else` keywords +ignored_metaschema + A `$schema` declaration without a sibling identifier (or with a sibling `$ref` in Draft 7 and older dialects), is ignored + items_array_default Setting the `items` keyword to the empty array does not add any further constraint @@ -169,7 +172,7 @@ unsatisfiable_max_contains unsatisfiable_min_properties Setting `minProperties` to a number less than `required` does not add any further constraint -Number of rules: 53 +Number of rules: 54 EOF diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/lint/pass_lint_list_short.sh b/test/lint/pass_lint_list_short.sh index 1bf8b975..a6d80c87 100755 --- a/test/lint/pass_lint_list_short.sh +++ b/test/lint/pass_lint_list_short.sh @@ -94,6 +94,9 @@ exclusive_minimum_number_and_minimum if_without_then_else The `if` keyword is meaningless without the presence of the `then` or `else` keywords +ignored_metaschema + A `$schema` declaration without a sibling identifier (or with a sibling `$ref` in Draft 7 and older dialects), is ignored + items_array_default Setting the `items` keyword to the empty array does not add any further constraint @@ -169,7 +172,7 @@ unsatisfiable_max_contains unsatisfiable_min_properties Setting `minProperties` to a number less than `required` does not add any further constraint -Number of rules: 53 +Number of rules: 54 EOF diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/lint/pass_lint_list_strict.sh b/test/lint/pass_lint_list_strict.sh index 4ada149c..126a543a 100755 --- a/test/lint/pass_lint_list_strict.sh +++ b/test/lint/pass_lint_list_strict.sh @@ -94,6 +94,9 @@ exclusive_minimum_number_and_minimum if_without_then_else The `if` keyword is meaningless without the presence of the `then` or `else` keywords +ignored_metaschema + A `$schema` declaration without a sibling identifier (or with a sibling `$ref` in Draft 7 and older dialects), is ignored + items_array_default Setting the `items` keyword to the empty array does not add any further constraint @@ -172,7 +175,7 @@ unsatisfiable_max_contains unsatisfiable_min_properties Setting `minProperties` to a number less than `required` does not add any further constraint -Number of rules: 54 +Number of rules: 55 EOF diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/vendor/core/src/core/jsonschema/frame.cc b/vendor/core/src/core/jsonschema/frame.cc index 3ea898bd..7adc34ca 100644 --- a/vendor/core/src/core/jsonschema/frame.cc +++ b/vendor/core/src/core/jsonschema/frame.cc @@ -311,24 +311,19 @@ auto repopulate_instance_locations( sourcemeta::core::SchemaFrame::Instances::mapped_type &destination, const std::optional &accumulator) -> void { - if (cache_entry.orphan && cache_entry.instance_location.empty()) { - return; - } else if (cache_entry.parent.has_value() && - // Don't consider bases from the root subschema, as if that - // subschema has any instance location other than "", then it - // indicates a recursive reference - !cache_entry.parent.value().empty()) { + // Check parent first as even orphan schemas can inherit instance locations + // from their parents if the parent is in the evaluation flow + if (cache_entry.parent.has_value() && + // Don't consider bases from the root subschema, as if that + // subschema has any instance location other than "", then it + // indicates a recursive reference + !cache_entry.parent.value().empty()) { const auto match{instances.find(cache_entry.parent.value())}; if (match == instances.cend()) { return; } for (const auto &parent_instance_location : match->second) { - // Guard against overly unrolling recursive schemas - if (parent_instance_location == cache_entry.instance_location) { - continue; - } - auto new_accumulator = cache_entry.relative_instance_location; if (accumulator.has_value()) { for (const auto &token : accumulator.value()) { @@ -349,6 +344,9 @@ auto repopulate_instance_locations( frame, instances, cache, cache_entry.parent.value(), cache.at(cache_entry.parent.value()), destination, new_accumulator); } + } else if (cache_entry.orphan && cache_entry.instance_location.empty()) { + // Only return early for orphan schemas if they don't have a parent + return; } } diff --git a/vendor/core/src/core/jsonschema/transformer.cc b/vendor/core/src/core/jsonschema/transformer.cc index 885b17a2..2218d514 100644 --- a/vendor/core/src/core/jsonschema/transformer.cc +++ b/vendor/core/src/core/jsonschema/transformer.cc @@ -131,8 +131,7 @@ auto SchemaTransformer::check( subschema_count += 1; const auto ¤t{get(schema, entry.second.pointer)}; - const auto current_vocabularies{ - vocabularies(schema, resolver, entry.second.dialect)}; + const auto current_vocabularies{frame.vocabularies(entry.second, resolver)}; bool subresult{true}; for (const auto &[name, rule] : this->rules) { const auto outcome{rule->check(current, schema, current_vocabularies, @@ -183,7 +182,8 @@ auto SchemaTransformer::apply( auto ¤t{get(schema, entry.second.pointer)}; const auto current_vocabularies{ - vocabularies(schema, resolver, entry.second.dialect)}; + frame.vocabularies(entry.second, resolver)}; + for (const auto &[name, rule] : this->rules) { const auto subresult{rule->apply(current, schema, current_vocabularies, walker, resolver, frame, @@ -219,15 +219,15 @@ auto SchemaTransformer::apply( continue; } - const auto &target{destination.value().get().pointer}; + const auto &target{destination.value().get()}; // The destination still exists, so we don't have to do anything - if (try_get(schema, target)) { + if (try_get(schema, target.pointer)) { continue; } const auto new_fragment{rule->rereference( - reference.second.destination, reference.first.second, target, - entry.second.pointer)}; + reference.second.destination, reference.first.second, + target.relative_pointer, entry.second.relative_pointer)}; // Note we use the base from the original reference before any // canonicalisation takes place so that we don't overly change diff --git a/vendor/core/src/extension/alterschema/CMakeLists.txt b/vendor/core/src/extension/alterschema/CMakeLists.txt index 31bffe79..0e4cd800 100644 --- a/vendor/core/src/extension/alterschema/CMakeLists.txt +++ b/vendor/core/src/extension/alterschema/CMakeLists.txt @@ -56,6 +56,7 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME alterschema linter/duplicate_anyof_branches.h linter/else_without_if.h linter/if_without_then_else.h + linter/ignored_metaschema.h linter/max_contains_without_contains.h linter/min_contains_without_contains.h linter/modern_official_dialect_with_empty_fragment.h diff --git a/vendor/core/src/extension/alterschema/alterschema.cc b/vendor/core/src/extension/alterschema/alterschema.cc index 9619120c..5f10adb1 100644 --- a/vendor/core/src/extension/alterschema/alterschema.cc +++ b/vendor/core/src/extension/alterschema/alterschema.cc @@ -77,6 +77,7 @@ inline auto APPLIES_TO_POINTERS(std::vector &&keywords) #include "linter/exclusive_maximum_number_and_maximum.h" #include "linter/exclusive_minimum_number_and_minimum.h" #include "linter/if_without_then_else.h" +#include "linter/ignored_metaschema.h" #include "linter/items_array_default.h" #include "linter/items_schema_default.h" #include "linter/max_contains_without_contains.h" @@ -123,6 +124,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); bundle.add(); bundle.add(); bundle.add(); diff --git a/vendor/core/src/extension/alterschema/linter/ignored_metaschema.h b/vendor/core/src/extension/alterschema/linter/ignored_metaschema.h new file mode 100644 index 00000000..8831cff1 --- /dev/null +++ b/vendor/core/src/extension/alterschema/linter/ignored_metaschema.h @@ -0,0 +1,29 @@ +class IgnoredMetaschema final : public SchemaTransformRule { +public: + IgnoredMetaschema() + : SchemaTransformRule{ + "ignored_metaschema", + "A `$schema` declaration without a sibling identifier (or with a " + "sibling `$ref` in Draft 7 and older dialects), is ignored"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::core::Vocabularies &, + const sourcemeta::core::SchemaFrame &, + const sourcemeta::core::SchemaFrame::Location &location, + const sourcemeta::core::SchemaWalker &, + const sourcemeta::core::SchemaResolver &) const + -> sourcemeta::core::SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(schema.is_object() && schema.defines("$schema") && + schema.at("$schema").is_string()); + const auto dialect{sourcemeta::core::dialect(schema)}; + ONLY_CONTINUE_IF(dialect.has_value()); + ONLY_CONTINUE_IF(dialect.value() != location.dialect); + return APPLIES_TO_KEYWORDS("$schema"); + } + + auto transform(JSON &schema, const Result &) const -> void override { + schema.erase("$schema"); + } +};