diff --git a/DEPENDENCIES b/DEPENDENCIES index 5cf6e298e..6efadfe87 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,4 +1,4 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 1b3ab73db3f0a4dfe0e1ee1e59601a81bfe100fe -blaze https://github.com/sourcemeta/blaze 04832d45bf4327d4ec874fa67f339797cd49b375 +core https://github.com/sourcemeta/core 428cbdf92f6330b0f6ae918ac0a9dce089a0470b +blaze https://github.com/sourcemeta/blaze 90c9d98ebeb0c13bd1d75fd3b0e0fb89770cc53d bootstrap https://github.com/twbs/bootstrap 1a6fdfae6be09b09eaced8f0e442ca6f7680a61e diff --git a/src/runtime/encoder_any.cc b/src/runtime/encoder_any.cc index 43c907555..358696b43 100644 --- a/src/runtime/encoder_any.cc +++ b/src/runtime/encoder_any.cc @@ -31,10 +31,9 @@ auto Encoder::LARGE_CHOICE_INDEX(const sourcemeta::core::JSON &document, const struct LARGE_CHOICE_INDEX &options) -> void { assert(options.choices.size() > 0); - const auto iterator{ - std::ranges::find_if(options.choices, [&document](const auto &choice) { - return choice == document; - })}; + const auto iterator{std::ranges::find_if( + options.choices, + [&document](const auto &choice) -> bool { return choice == document; })}; assert(iterator != std::cend(options.choices)); const auto cursor{std::distance(std::cbegin(options.choices), iterator)}; assert(sourcemeta::core::is_within(cursor, static_cast(0), @@ -47,10 +46,9 @@ auto Encoder::TOP_LEVEL_BYTE_CHOICE_INDEX( const struct TOP_LEVEL_BYTE_CHOICE_INDEX &options) -> void { assert(options.choices.size() > 0); assert(sourcemeta::core::is_byte(options.choices.size())); - const auto iterator{ - std::ranges::find_if(options.choices, [&document](auto const &choice) { - return choice == document; - })}; + const auto iterator{std::ranges::find_if( + options.choices, + [&document](auto const &choice) -> bool { return choice == document; })}; assert(iterator != std::cend(options.choices)); const auto cursor{std::distance(std::cbegin(options.choices), iterator)}; assert(sourcemeta::core::is_within( diff --git a/src/runtime/loader_v1_array.h b/src/runtime/loader_v1_array.h index 253fb63c4..21bcc309c 100644 --- a/src/runtime/loader_v1_array.h +++ b/src/runtime/loader_v1_array.h @@ -29,7 +29,7 @@ auto FIXED_TYPED_ARRAY(const sourcemeta::core::JSON &options) -> Encoding { std::transform(prefix_encodings.as_array().cbegin(), prefix_encodings.as_array().cend(), std::back_inserter(encodings), - [](const auto &element) { return load(element); }); + [](const auto &element) -> Encoding { return load(element); }); assert(encodings.size() == prefix_encodings.size()); return sourcemeta::jsonbinpack::FIXED_TYPED_ARRAY{ .size = static_cast(size.to_integer()), @@ -57,7 +57,7 @@ auto BOUNDED_8BITS_TYPED_ARRAY(const sourcemeta::core::JSON &options) std::transform(prefix_encodings.as_array().cbegin(), prefix_encodings.as_array().cend(), std::back_inserter(encodings), - [](const auto &element) { return load(element); }); + [](const auto &element) -> Encoding { return load(element); }); assert(encodings.size() == prefix_encodings.size()); return sourcemeta::jsonbinpack::BOUNDED_8BITS_TYPED_ARRAY{ .minimum = static_cast(minimum.to_integer()), @@ -81,7 +81,7 @@ auto FLOOR_TYPED_ARRAY(const sourcemeta::core::JSON &options) -> Encoding { std::transform(prefix_encodings.as_array().cbegin(), prefix_encodings.as_array().cend(), std::back_inserter(encodings), - [](const auto &element) { return load(element); }); + [](const auto &element) -> Encoding { return load(element); }); assert(encodings.size() == prefix_encodings.size()); return sourcemeta::jsonbinpack::FLOOR_TYPED_ARRAY{ .minimum = static_cast(minimum.to_integer()), @@ -104,7 +104,7 @@ auto ROOF_TYPED_ARRAY(const sourcemeta::core::JSON &options) -> Encoding { std::transform(prefix_encodings.as_array().cbegin(), prefix_encodings.as_array().cend(), std::back_inserter(encodings), - [](const auto &element) { return load(element); }); + [](const auto &element) -> Encoding { return load(element); }); assert(encodings.size() == prefix_encodings.size()); return sourcemeta::jsonbinpack::ROOF_TYPED_ARRAY{ .maximum = static_cast(maximum.to_integer()), diff --git a/vendor/blaze/DEPENDENCIES b/vendor/blaze/DEPENDENCIES index a71569883..78654694a 100644 --- a/vendor/blaze/DEPENDENCIES +++ b/vendor/blaze/DEPENDENCIES @@ -1,5 +1,5 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core bb1c78e8fa148a2ece951bb776798a43fe328821 +core https://github.com/sourcemeta/core 428cbdf92f6330b0f6ae918ac0a9dce089a0470b jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite 60755c1097769e313fae3ec4d63bcc9d49b5d2d5 jsonschema-2020-12 https://github.com/json-schema-org/json-schema-spec 769daad75a9553562333a8937a187741cb708c72 jsonschema-2019-09 https://github.com/json-schema-org/json-schema-spec 41014ea723120ce70b314d72f863c6929d9f3cfd diff --git a/vendor/blaze/schemas/canonical-2019-09.json b/vendor/blaze/schemas/canonical-2019-09.json index dbbc3d0d1..95ecc4cf1 100644 --- a/vendor/blaze/schemas/canonical-2019-09.json +++ b/vendor/blaze/schemas/canonical-2019-09.json @@ -140,6 +140,38 @@ "properties": { "enum": { "type": "array", + "anyOf": [ + { + "items": { + "type": "string" + } + }, + { + "items": { + "type": "number" + } + }, + { + "items": { + "type": "boolean" + } + }, + { + "items": { + "type": "null" + } + }, + { + "items": { + "type": "array" + } + }, + { + "items": { + "type": "object" + } + } + ], "minItems": 1, "uniqueItems": true } diff --git a/vendor/blaze/schemas/canonical-2020-12.json b/vendor/blaze/schemas/canonical-2020-12.json index 556cc0f0f..18ff51de3 100644 --- a/vendor/blaze/schemas/canonical-2020-12.json +++ b/vendor/blaze/schemas/canonical-2020-12.json @@ -140,6 +140,38 @@ "properties": { "enum": { "type": "array", + "anyOf": [ + { + "items": { + "type": "string" + } + }, + { + "items": { + "type": "number" + } + }, + { + "items": { + "type": "boolean" + } + }, + { + "items": { + "type": "null" + } + }, + { + "items": { + "type": "array" + } + }, + { + "items": { + "type": "object" + } + } + ], "minItems": 1, "uniqueItems": true } diff --git a/vendor/blaze/schemas/canonical-draft1.json b/vendor/blaze/schemas/canonical-draft1.json index 2d40e4912..286488998 100644 --- a/vendor/blaze/schemas/canonical-draft1.json +++ b/vendor/blaze/schemas/canonical-draft1.json @@ -89,6 +89,38 @@ "properties": { "enum": { "type": "array", + "anyOf": [ + { + "items": { + "type": "string" + } + }, + { + "items": { + "type": "number" + } + }, + { + "items": { + "type": "boolean" + } + }, + { + "items": { + "type": "null" + } + }, + { + "items": { + "type": "array" + } + }, + { + "items": { + "type": "object" + } + } + ], "minItems": 1, "uniqueItems": true } diff --git a/vendor/blaze/schemas/canonical-draft2.json b/vendor/blaze/schemas/canonical-draft2.json index 4e3144c4c..cd6349ba3 100644 --- a/vendor/blaze/schemas/canonical-draft2.json +++ b/vendor/blaze/schemas/canonical-draft2.json @@ -89,6 +89,38 @@ "properties": { "enum": { "type": "array", + "anyOf": [ + { + "items": { + "type": "string" + } + }, + { + "items": { + "type": "number" + } + }, + { + "items": { + "type": "boolean" + } + }, + { + "items": { + "type": "null" + } + }, + { + "items": { + "type": "array" + } + }, + { + "items": { + "type": "object" + } + } + ], "minItems": 1, "uniqueItems": true } diff --git a/vendor/blaze/schemas/canonical-draft3.json b/vendor/blaze/schemas/canonical-draft3.json index f9a350759..e78cbf60f 100644 --- a/vendor/blaze/schemas/canonical-draft3.json +++ b/vendor/blaze/schemas/canonical-draft3.json @@ -70,7 +70,56 @@ } } }, - "schema": { + "extends": { + "x-lint-exclude": "simple_properties_identifiers", + "type": "object", + "allOf": [ + { + "$ref": "#/$defs/metadata" + }, + { + "$ref": "#/$defs/core" + } + ], + "required": [ "extends" ], + "properties": { + "extends": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/schema" + } + }, + "required": { + "type": "boolean" + } + }, + "unevaluatedProperties": false + }, + "union": { + "x-lint-exclude": "simple_properties_identifiers", + "type": "object", + "allOf": [ + { + "$ref": "#/$defs/metadata" + }, + { + "$ref": "#/$defs/core" + } + ], + "required": [ "type" ], + "properties": { + "type": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/schema" + } + } + }, + "unevaluatedProperties": false + }, + "leaf": { "anyOf": [ { "const": {} @@ -102,6 +151,38 @@ "properties": { "enum": { "type": "array", + "anyOf": [ + { + "items": { + "type": "string" + } + }, + { + "items": { + "type": "number" + } + }, + { + "items": { + "type": "boolean" + } + }, + { + "items": { + "type": "null" + } + }, + { + "items": { + "type": "array" + } + }, + { + "items": { + "type": "object" + } + } + ], "minItems": 1, "uniqueItems": true } @@ -336,55 +417,19 @@ } }, "unevaluatedProperties": false + } + ] + }, + "schema": { + "anyOf": [ + { + "$ref": "#/$defs/leaf" }, { - "x-lint-exclude": "simple_properties_identifiers", - "type": "object", - "allOf": [ - { - "$ref": "#/$defs/metadata" - }, - { - "$ref": "#/$defs/core" - } - ], - "required": [ "type" ], - "properties": { - "type": { - "type": "array", - "minItems": 1, - "items": { - "$ref": "#/$defs/schema" - } - } - }, - "unevaluatedProperties": false + "$ref": "#/$defs/union" }, { - "x-lint-exclude": "simple_properties_identifiers", - "type": "object", - "allOf": [ - { - "$ref": "#/$defs/metadata" - }, - { - "$ref": "#/$defs/core" - } - ], - "required": [ "extends" ], - "properties": { - "extends": { - "type": "array", - "minItems": 1, - "items": { - "$ref": "#/$defs/schema" - } - }, - "required": { - "type": "boolean" - } - }, - "unevaluatedProperties": false + "$ref": "#/$defs/extends" }, { "x-lint-exclude": "simple_properties_identifiers", @@ -404,7 +449,18 @@ "maxItems": 1, "minItems": 1, "items": { - "$ref": "#/$defs/schema" + "$comment": "TODO: `disallow` should only ever wrap a single-kind leaf. We additionally permit `extends` and `type` unions here as an interim escape hatch: negating a conjunction or disjunction whose wrapper is targeted by a `$ref` cannot be pushed to the leaves, because doing so would dissolve the referenced node. The proper fix is to invert such references so the `disallow` wraps a `$ref` leaf instead, after which both can be dropped from this list", + "anyOf": [ + { + "$ref": "#/$defs/leaf" + }, + { + "$ref": "#/$defs/union" + }, + { + "$ref": "#/$defs/extends" + } + ] } } }, diff --git a/vendor/blaze/schemas/canonical-draft4.json b/vendor/blaze/schemas/canonical-draft4.json index 836da4f30..636405f56 100644 --- a/vendor/blaze/schemas/canonical-draft4.json +++ b/vendor/blaze/schemas/canonical-draft4.json @@ -82,6 +82,38 @@ "properties": { "enum": { "type": "array", + "anyOf": [ + { + "items": { + "type": "string" + } + }, + { + "items": { + "type": "number" + } + }, + { + "items": { + "type": "boolean" + } + }, + { + "items": { + "type": "null" + } + }, + { + "items": { + "type": "array" + } + }, + { + "items": { + "type": "object" + } + } + ], "minItems": 1, "uniqueItems": true } diff --git a/vendor/blaze/schemas/canonical-draft6.json b/vendor/blaze/schemas/canonical-draft6.json index ea56e9576..e2fb8935b 100644 --- a/vendor/blaze/schemas/canonical-draft6.json +++ b/vendor/blaze/schemas/canonical-draft6.json @@ -87,6 +87,38 @@ "properties": { "enum": { "type": "array", + "anyOf": [ + { + "items": { + "type": "string" + } + }, + { + "items": { + "type": "number" + } + }, + { + "items": { + "type": "boolean" + } + }, + { + "items": { + "type": "null" + } + }, + { + "items": { + "type": "array" + } + }, + { + "items": { + "type": "object" + } + } + ], "minItems": 1, "uniqueItems": true } diff --git a/vendor/blaze/schemas/canonical-draft7.json b/vendor/blaze/schemas/canonical-draft7.json index 964abd538..a5f9b247b 100644 --- a/vendor/blaze/schemas/canonical-draft7.json +++ b/vendor/blaze/schemas/canonical-draft7.json @@ -93,6 +93,38 @@ "properties": { "enum": { "type": "array", + "anyOf": [ + { + "items": { + "type": "string" + } + }, + { + "items": { + "type": "number" + } + }, + { + "items": { + "type": "boolean" + } + }, + { + "items": { + "type": "null" + } + }, + { + "items": { + "type": "array" + } + }, + { + "items": { + "type": "object" + } + } + ], "minItems": 1, "uniqueItems": true } diff --git a/vendor/blaze/src/alterschema/CMakeLists.txt b/vendor/blaze/src/alterschema/CMakeLists.txt index 6fd7ad96f..22da55e73 100644 --- a/vendor/blaze/src/alterschema/CMakeLists.txt +++ b/vendor/blaze/src/alterschema/CMakeLists.txt @@ -13,6 +13,7 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT blaze NAME alterschema canonicalizer/deprecated_false_drop.h canonicalizer/draft3_type_any.h canonicalizer/disallow_array_to_extends.h + canonicalizer/disallow_double_negation.h canonicalizer/disallow_extends_to_type.h canonicalizer/disallow_to_array_of_schemas.h canonicalizer/disallow_type_union_to_extends.h @@ -26,6 +27,7 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT blaze NAME alterschema canonicalizer/empty_disallow_drop.h canonicalizer/enum_drop_redundant_validation.h canonicalizer/enum_filter_by_type.h + canonicalizer/enum_split_by_type.h canonicalizer/exclusive_maximum_boolean_integer_fold.h canonicalizer/exclusive_maximum_integer_to_maximum.h canonicalizer/exclusive_minimum_boolean_integer_fold.h diff --git a/vendor/blaze/src/alterschema/alterschema.cc b/vendor/blaze/src/alterschema/alterschema.cc index efadc22fc..9b32e78f2 100644 --- a/vendor/blaze/src/alterschema/alterschema.cc +++ b/vendor/blaze/src/alterschema/alterschema.cc @@ -119,6 +119,7 @@ auto WALK_UP_IN_PLACE_APPLICATORS(const JSON &root, const SchemaFrame &frame, #include "canonicalizer/dependent_schemas_to_any_of.h" #include "canonicalizer/deprecated_false_drop.h" #include "canonicalizer/disallow_array_to_extends.h" +#include "canonicalizer/disallow_double_negation.h" #include "canonicalizer/disallow_extends_to_type.h" #include "canonicalizer/disallow_to_array_of_schemas.h" #include "canonicalizer/disallow_type_union_to_extends.h" @@ -133,6 +134,7 @@ auto WALK_UP_IN_PLACE_APPLICATORS(const JSON &root, const SchemaFrame &frame, #include "canonicalizer/empty_disallow_drop.h" #include "canonicalizer/enum_drop_redundant_validation.h" #include "canonicalizer/enum_filter_by_type.h" +#include "canonicalizer/enum_split_by_type.h" #include "canonicalizer/exclusive_maximum_boolean_integer_fold.h" #include "canonicalizer/exclusive_maximum_integer_to_maximum.h" #include "canonicalizer/exclusive_minimum_boolean_integer_fold.h" @@ -524,6 +526,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); bundle.add(); bundle.add(); bundle.add(); @@ -539,6 +542,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); bundle.add(); bundle.add(); bundle.add(); diff --git a/vendor/blaze/src/alterschema/canonicalizer/dependencies_to_any_of.h b/vendor/blaze/src/alterschema/canonicalizer/dependencies_to_any_of.h index 06945a467..9ab137c0b 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/dependencies_to_any_of.h +++ b/vendor/blaze/src/alterschema/canonicalizer/dependencies_to_any_of.h @@ -22,8 +22,8 @@ class DependenciesToAnyOf final : public SchemaTransformRule { const auto *dependencies{schema.try_at("dependencies")}; ONLY_CONTINUE_IF(dependencies && dependencies->is_object()); - ONLY_CONTINUE_IF( - std::ranges::any_of(dependencies->as_object(), [](const auto &entry) { + ONLY_CONTINUE_IF(std::ranges::any_of( + dependencies->as_object(), [](const auto &entry) -> auto { return is_schema(entry.second) || entry.second.is_array(); })); return true; diff --git a/vendor/blaze/src/alterschema/canonicalizer/dependencies_to_extends_disallow.h b/vendor/blaze/src/alterschema/canonicalizer/dependencies_to_extends_disallow.h index 24a27b0a5..5db582376 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/dependencies_to_extends_disallow.h +++ b/vendor/blaze/src/alterschema/canonicalizer/dependencies_to_extends_disallow.h @@ -21,8 +21,8 @@ class DependenciesToExtendsDisallow final : public SchemaTransformRule { const auto *dependencies{schema.try_at("dependencies")}; ONLY_CONTINUE_IF(dependencies && dependencies->is_object()); - ONLY_CONTINUE_IF( - std::ranges::any_of(dependencies->as_object(), [](const auto &entry) { + ONLY_CONTINUE_IF(std::ranges::any_of( + dependencies->as_object(), [](const auto &entry) -> auto { return is_schema(entry.second) || entry.second.is_array() || entry.second.is_string(); })); diff --git a/vendor/blaze/src/alterschema/canonicalizer/dependent_required_to_any_of.h b/vendor/blaze/src/alterschema/canonicalizer/dependent_required_to_any_of.h index 360f38bbb..525859e92 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/dependent_required_to_any_of.h +++ b/vendor/blaze/src/alterschema/canonicalizer/dependent_required_to_any_of.h @@ -26,7 +26,7 @@ class DependentRequiredToAnyOf final : public SchemaTransformRule { ONLY_CONTINUE_IF(std::ranges::any_of( dependent_required->as_object(), - [](const auto &entry) { return entry.second.is_array(); })); + [](const auto &entry) -> auto { return entry.second.is_array(); })); if (!vocabularies.contains_any( {Vocabularies::Known::JSON_Schema_2019_09_Applicator, diff --git a/vendor/blaze/src/alterschema/canonicalizer/disallow_double_negation.h b/vendor/blaze/src/alterschema/canonicalizer/disallow_double_negation.h new file mode 100644 index 000000000..4a6ffa816 --- /dev/null +++ b/vendor/blaze/src/alterschema/canonicalizer/disallow_double_negation.h @@ -0,0 +1,117 @@ +class DisallowDoubleNegation final : public SchemaTransformRule { +public: + using mutates = std::true_type; + using reframe_after_transform = std::true_type; + DisallowDoubleNegation() + : SchemaTransformRule{ + "disallow_double_negation", + "A `disallow` whose single negated schema is itself a `disallow` " + "of " + "a single schema is a double negation equivalent to the inner " + "schema"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::blaze::Vocabularies &vocabularies, + const sourcemeta::blaze::SchemaFrame &frame, + const sourcemeta::blaze::SchemaFrame::Location &location, + const sourcemeta::blaze::SchemaWalker &walker, + const sourcemeta::blaze::SchemaResolver &, const bool) const + -> SchemaTransformRule::Result override { + static const JSON::String KEYWORD{"disallow"}; + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_Draft_3, + Vocabularies::Known::JSON_Schema_Draft_3_Hyper}) && + schema.is_object()); + + const auto *disallow{schema.try_at(KEYWORD)}; + ONLY_CONTINUE_IF(disallow && disallow->is_array() && disallow->size() == 1); + ONLY_CONTINUE_IF(is_single_negation(disallow->at(0))); + + // Lifting the inner schema merges its keywords into this node, so the node + // must assert nothing besides `disallow` (otherwise a sibling constraint + // sharing a key with the inner schema would be silently clobbered) + ONLY_CONTINUE_IF( + wraps_single_constraint(schema, "disallow", walker, vocabularies)); + + // Collapsing the chain dissolves every intermediate `disallow` wrapper, so + // a reference targeting any of them has no valid new home: bail. A + // reference into the surviving innermost schema relocates with it (handled + // by `rereference`) + auto wrapper{location.pointer}; + const sourcemeta::core::JSON *node{&disallow->at(0)}; + while (is_single_negation(*node)) { + wrapper.push_back(std::cref(KEYWORD)); + wrapper.push_back(static_cast(0)); + if (frame.has_references_to(wrapper)) { + return false; + } + + node = &node->at(KEYWORD).at(0); + } + + return true; + } + + auto transform(JSON &schema, const Result &) const -> void override { + auto inner{schema.at("disallow").at(0).at("disallow").at(0)}; + schema.erase("disallow"); + + while (is_single_negation(inner) && + is_single_negation(inner.at("disallow").at(0))) { + auto next{inner.at("disallow").at(0).at("disallow").at(0)}; + inner = std::move(next); + } + + if (inner.is_object()) { + schema.merge(inner.as_object()); + } + } + + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, + const Pointer &target, + const Pointer ¤t) const + -> Pointer override { + auto old_prefix{current.concat({"disallow", 0, "disallow", 0})}; + while ( + target.starts_with(old_prefix.concat({"disallow", 0, "disallow", 0}))) { + old_prefix = old_prefix.concat({"disallow", 0, "disallow", 0}); + } + + if (!target.starts_with(old_prefix)) { + return target; + } + + return target.rebase(old_prefix, current); + } + +private: + static auto is_single_negation(const sourcemeta::core::JSON &schema) -> bool { + return schema.is_object() && schema.size() == 1 && + schema.defines("disallow") && schema.at("disallow").is_array() && + schema.at("disallow").size() == 1; + } + + static auto wraps_single_constraint( + const sourcemeta::core::JSON &schema, const std::string_view keyword, + const sourcemeta::blaze::SchemaWalker &walker, + const sourcemeta::blaze::Vocabularies &vocabularies) -> bool { + for (const auto &entry : schema.as_object()) { + if (entry.first == keyword) { + continue; + } + + const auto type{walker(entry.first, vocabularies).type}; + if (type != SchemaKeywordType::Annotation && + type != SchemaKeywordType::Comment && + type != SchemaKeywordType::Other && + type != SchemaKeywordType::Unknown && + type != SchemaKeywordType::LocationMembers) { + return false; + } + } + + return true; + } +}; diff --git a/vendor/blaze/src/alterschema/canonicalizer/enum_split_by_type.h b/vendor/blaze/src/alterschema/canonicalizer/enum_split_by_type.h new file mode 100644 index 000000000..67c3213aa --- /dev/null +++ b/vendor/blaze/src/alterschema/canonicalizer/enum_split_by_type.h @@ -0,0 +1,123 @@ +class EnumSplitByType final : public SchemaTransformRule { +public: + using mutates = std::true_type; + using reframe_after_transform = std::true_type; + EnumSplitByType() + : SchemaTransformRule{ + "enum_split_by_type", + "An `enum` whose values span more than one type is the disjunction " + "of its single-type subsets, so it splits into a union of " + "single-type enums"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::blaze::Vocabularies &vocabularies, + const sourcemeta::blaze::SchemaFrame &, + const sourcemeta::blaze::SchemaFrame::Location &, + const sourcemeta::blaze::SchemaWalker &walker, + const sourcemeta::blaze::SchemaResolver &, const bool) const + -> SchemaTransformRule::Result override { + const bool any_of_dialect{ + vocabularies.contains_any({Vocabularies::Known::JSON_Schema_Draft_4, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_7}) || + (vocabularies.contains( + Vocabularies::Known::JSON_Schema_2019_09_Validation) && + vocabularies.contains( + Vocabularies::Known::JSON_Schema_2019_09_Applicator)) || + (vocabularies.contains( + Vocabularies::Known::JSON_Schema_2020_12_Validation) && + vocabularies.contains( + Vocabularies::Known::JSON_Schema_2020_12_Applicator))}; + const bool type_union_dialect{ + vocabularies.contains_any({Vocabularies::Known::JSON_Schema_Draft_0, + Vocabularies::Known::JSON_Schema_Draft_1, + Vocabularies::Known::JSON_Schema_Draft_2, + Vocabularies::Known::JSON_Schema_Draft_3})}; + ONLY_CONTINUE_IF((any_of_dialect || type_union_dialect) && + schema.is_object()); + + const auto *enumeration{schema.try_at("enum")}; + ONLY_CONTINUE_IF(enumeration && enumeration->is_array() && + !enumeration->empty()); + + JSON::TypeSet kinds; + for (const auto &value : enumeration->as_array()) { + kinds.set(static_cast(kind_of(value))); + } + ONLY_CONTINUE_IF(kinds.count() > 1); + + // The split moves only the `enum` values into branches and leaves every + // sibling on this node, so it must not strand an assertion/applicator next + // to the new union. Identity, metadata, and reference-into siblings ride + // along, while validation siblings are isolated into an `allOf`/`extends` + // by `enum_drop_redundant_validation` before this rule runs + ONLY_CONTINUE_IF( + wraps_single_constraint(schema, "enum", walker, vocabularies)); + + this->use_any_of_ = any_of_dialect; + return true; + } + + auto transform(JSON &schema, const Result &) const -> void override { + auto branches{JSON::make_array()}; + for (const auto &value : schema.at("enum").as_array()) { + const auto kind{kind_of(value)}; + bool merged{false}; + for (auto &branch : branches.as_array()) { + if (kind_of(branch.at("enum").at(0)) == kind) { + branch.at("enum").push_back(value); + merged = true; + break; + } + } + + if (!merged) { + auto values{JSON::make_array()}; + values.push_back(value); + auto branch{JSON::make_object()}; + branch.assign("enum", std::move(values)); + branches.push_back(std::move(branch)); + } + } + + schema.erase("enum"); + schema.assign(this->use_any_of_ ? "anyOf" : "type", std::move(branches)); + } + +private: + static auto kind_of(const sourcemeta::core::JSON &value) + -> sourcemeta::core::JSON::Type { + // Fold integers and reals into a single `number` kind, as an `enum` that + // mixes them is still single-type + const auto type{value.type()}; + return type == sourcemeta::core::JSON::Type::Integer + ? sourcemeta::core::JSON::Type::Real + : type; + } + + static auto wraps_single_constraint( + const sourcemeta::core::JSON &schema, const std::string_view keyword, + const sourcemeta::blaze::SchemaWalker &walker, + const sourcemeta::blaze::Vocabularies &vocabularies) -> bool { + for (const auto &entry : schema.as_object()) { + if (entry.first == keyword) { + continue; + } + + const auto type{walker(entry.first, vocabularies).type}; + if (type != SchemaKeywordType::Annotation && + type != SchemaKeywordType::Comment && + type != SchemaKeywordType::Other && + type != SchemaKeywordType::Unknown && + type != SchemaKeywordType::LocationMembers) { + return false; + } + } + + return true; + } + + mutable bool use_any_of_{false}; +}; diff --git a/vendor/blaze/src/alterschema/canonicalizer/implicit_contains_keywords.h b/vendor/blaze/src/alterschema/canonicalizer/implicit_contains_keywords.h index 1f9a579c1..5c633cedc 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/implicit_contains_keywords.h +++ b/vendor/blaze/src/alterschema/canonicalizer/implicit_contains_keywords.h @@ -28,16 +28,17 @@ class ImplicitContainsKeywords final : public SchemaTransformRule { } else { ONLY_CONTINUE_IF(!schema.defines("minContains") && !schema.defines("maxContains")); - ONLY_CONTINUE_IF(!WALK_UP_IN_PLACE_APPLICATORS( - root, frame, location, walker, resolver, - [](const JSON &ancestor, - const Vocabularies &ancestor_vocabularies) { - return ancestor.defines("unevaluatedItems") && - ancestor_vocabularies.contains( - Vocabularies::Known:: - JSON_Schema_2020_12_Unevaluated); - }) - .has_value()); + ONLY_CONTINUE_IF( + !WALK_UP_IN_PLACE_APPLICATORS( + root, frame, location, walker, resolver, + [](const JSON &ancestor, + const Vocabularies &ancestor_vocabularies) -> bool { + return ancestor.defines("unevaluatedItems") && + ancestor_vocabularies.contains( + Vocabularies::Known:: + JSON_Schema_2020_12_Unevaluated); + }) + .has_value()); } return true; diff --git a/vendor/blaze/src/alterschema/canonicalizer/items_implicit.h b/vendor/blaze/src/alterschema/canonicalizer/items_implicit.h index 9ff3db9b1..d4e88e279 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/items_implicit.h +++ b/vendor/blaze/src/alterschema/canonicalizer/items_implicit.h @@ -41,7 +41,7 @@ class ItemsImplicit final : public SchemaTransformRule { !WALK_UP_IN_PLACE_APPLICATORS( root, frame, location, walker, resolver, [](const JSON &ancestor, - const Vocabularies &ancestor_vocabularies) { + const Vocabularies &ancestor_vocabularies) -> bool { return ancestor.defines("unevaluatedItems") && ancestor_vocabularies.contains_any( {Vocabularies::Known::JSON_Schema_2020_12_Unevaluated, diff --git a/vendor/blaze/src/alterschema/canonicalizer/type_inherit_in_place.h b/vendor/blaze/src/alterschema/canonicalizer/type_inherit_in_place.h index c13edb81b..033425e78 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/type_inherit_in_place.h +++ b/vendor/blaze/src/alterschema/canonicalizer/type_inherit_in_place.h @@ -51,11 +51,11 @@ class TypeInheritInPlace final : public SchemaTransformRule { // rules may want to lift type out of conjunctions const auto ancestor{WALK_UP( root, frame, location, walker, resolver, - [](const SchemaKeywordType keyword_type) { + [](const SchemaKeywordType keyword_type) -> bool { return IS_IN_PLACE_APPLICATOR(keyword_type) && keyword_type != SchemaKeywordType::ApplicatorElementsInPlace; }, - [](const JSON &ancestor_schema, const Vocabularies &) { + [](const JSON &ancestor_schema, const Vocabularies &) -> bool { return ancestor_schema.defines("type"); })}; diff --git a/vendor/blaze/src/alterschema/canonicalizer/unsatisfiable_type_and_enum.h b/vendor/blaze/src/alterschema/canonicalizer/unsatisfiable_type_and_enum.h index 8d74ed3ee..ba0a4e242 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/unsatisfiable_type_and_enum.h +++ b/vendor/blaze/src/alterschema/canonicalizer/unsatisfiable_type_and_enum.h @@ -44,7 +44,7 @@ class UnsatisfiableTypeAndEnum final : public SchemaTransformRule { declared_types.test(std::to_underlying(JSON::Type::Integer))}; ONLY_CONTINUE_IF(std::ranges::none_of( enum_value->as_array(), - [&declared_types, integer_matches_integral](const auto &value) { + [&declared_types, integer_matches_integral](const auto &value) -> auto { return declared_types.test(std::to_underlying(value.type())) || (integer_matches_integral && value.is_integral()); })); diff --git a/vendor/blaze/src/alterschema/common/content_schema_without_media_type.h b/vendor/blaze/src/alterschema/common/content_schema_without_media_type.h index 2e0012dc9..4c00ae3ec 100644 --- a/vendor/blaze/src/alterschema/common/content_schema_without_media_type.h +++ b/vendor/blaze/src/alterschema/common/content_schema_without_media_type.h @@ -1,5 +1,6 @@ class ContentSchemaWithoutMediaType final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"contentSchema"}; public: diff --git a/vendor/blaze/src/alterschema/common/dependencies_property_tautology.h b/vendor/blaze/src/alterschema/common/dependencies_property_tautology.h index 0c094a6a8..ea0746bf5 100644 --- a/vendor/blaze/src/alterschema/common/dependencies_property_tautology.h +++ b/vendor/blaze/src/alterschema/common/dependencies_property_tautology.h @@ -38,7 +38,7 @@ class DependenciesPropertyTautology final : public SchemaTransformRule { ONLY_CONTINUE_IF(properties && properties->is_object()); ONLY_CONTINUE_IF(std::ranges::any_of( - properties->as_object(), [dependencies](const auto &entry) { + properties->as_object(), [dependencies](const auto &entry) -> auto { if (!entry.second.is_object()) { return false; } @@ -58,7 +58,7 @@ class DependenciesPropertyTautology final : public SchemaTransformRule { ONLY_CONTINUE_IF(required && required->is_array()); ONLY_CONTINUE_IF(std::ranges::any_of( - required->as_array(), [dependencies](const auto &element) { + required->as_array(), [dependencies](const auto &element) -> auto { if (!element.is_string()) { return false; } @@ -70,7 +70,7 @@ class DependenciesPropertyTautology final : public SchemaTransformRule { auto transform(JSON &schema, const Result &result) const -> void override { const bool is_draft_3_path{ - std::ranges::any_of(result.locations, [](const auto &pointer) { + std::ranges::any_of(result.locations, [](const auto &pointer) -> auto { return pointer.size() == 1 && pointer.at(0).is_property() && pointer.at(0).to_property() == "properties"; })}; diff --git a/vendor/blaze/src/alterschema/common/dependent_required_tautology.h b/vendor/blaze/src/alterschema/common/dependent_required_tautology.h index ccf74fb30..793c631e1 100644 --- a/vendor/blaze/src/alterschema/common/dependent_required_tautology.h +++ b/vendor/blaze/src/alterschema/common/dependent_required_tautology.h @@ -31,7 +31,7 @@ class DependentRequiredTautology final : public SchemaTransformRule { ONLY_CONTINUE_IF( std::any_of(required->as_array().cbegin(), required->as_array().cend(), - [dependent_required](const auto &element) { + [dependent_required](const auto &element) -> auto { return element.is_string() && dependent_required->defines(element.to_string()); })); diff --git a/vendor/blaze/src/alterschema/common/disallow_narrows_type.h b/vendor/blaze/src/alterschema/common/disallow_narrows_type.h index 95e554863..376429f3b 100644 --- a/vendor/blaze/src/alterschema/common/disallow_narrows_type.h +++ b/vendor/blaze/src/alterschema/common/disallow_narrows_type.h @@ -46,7 +46,7 @@ class DisallowNarrowsType final : public SchemaTransformRule { } const bool all_in_parent{std::ranges::all_of( - entry_types, [&parent_type_names](const auto &type_name) { + entry_types, [&parent_type_names](const auto &type_name) -> auto { return parent_type_names.contains(type_name); })}; if (!all_in_parent) { diff --git a/vendor/blaze/src/alterschema/common/dynamic_ref_to_static_ref.h b/vendor/blaze/src/alterschema/common/dynamic_ref_to_static_ref.h index 628d58a9a..c0174fb6d 100644 --- a/vendor/blaze/src/alterschema/common/dynamic_ref_to_static_ref.h +++ b/vendor/blaze/src/alterschema/common/dynamic_ref_to_static_ref.h @@ -1,6 +1,8 @@ class DynamicRefToStaticRef final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD_DYNAMIC_REF{"$dynamicRef"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD_RECURSIVE_REF{"$recursiveRef"}; public: diff --git a/vendor/blaze/src/alterschema/common/else_without_if.h b/vendor/blaze/src/alterschema/common/else_without_if.h index 11a7bc45d..386aa2074 100644 --- a/vendor/blaze/src/alterschema/common/else_without_if.h +++ b/vendor/blaze/src/alterschema/common/else_without_if.h @@ -1,5 +1,6 @@ class ElseWithoutIf final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"else"}; public: diff --git a/vendor/blaze/src/alterschema/common/enum_with_type.h b/vendor/blaze/src/alterschema/common/enum_with_type.h index c9a7d5452..3914fa140 100644 --- a/vendor/blaze/src/alterschema/common/enum_with_type.h +++ b/vendor/blaze/src/alterschema/common/enum_with_type.h @@ -75,7 +75,7 @@ class EnumWithType final : public SchemaTransformRule { current_types.test(std::to_underlying(JSON::Type::Integer))}; ONLY_CONTINUE_IF(std::ranges::all_of( enum_value->as_array(), - [¤t_types, integer_matches_integral](const auto &item) { + [¤t_types, integer_matches_integral](const auto &item) -> auto { return current_types.test(std::to_underlying(item.type())) || (integer_matches_integral && item.is_integral()); })); diff --git a/vendor/blaze/src/alterschema/common/if_without_then_else.h b/vendor/blaze/src/alterschema/common/if_without_then_else.h index eb646ecde..4cd7c68b4 100644 --- a/vendor/blaze/src/alterschema/common/if_without_then_else.h +++ b/vendor/blaze/src/alterschema/common/if_without_then_else.h @@ -1,5 +1,6 @@ class IfWithoutThenElse final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"if"}; public: diff --git a/vendor/blaze/src/alterschema/common/non_applicable_additional_items.h b/vendor/blaze/src/alterschema/common/non_applicable_additional_items.h index 3f9d988ca..fb7fe1ebc 100644 --- a/vendor/blaze/src/alterschema/common/non_applicable_additional_items.h +++ b/vendor/blaze/src/alterschema/common/non_applicable_additional_items.h @@ -1,5 +1,6 @@ class NonApplicableAdditionalItems final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"additionalItems"}; public: diff --git a/vendor/blaze/src/alterschema/common/non_applicable_disallow_types.h b/vendor/blaze/src/alterschema/common/non_applicable_disallow_types.h index dc31b6b7f..93f0c855b 100644 --- a/vendor/blaze/src/alterschema/common/non_applicable_disallow_types.h +++ b/vendor/blaze/src/alterschema/common/non_applicable_disallow_types.h @@ -91,7 +91,7 @@ class NonApplicableDisallowTypes final : public SchemaTransformRule { if (!type.is_array()) { return false; } - return std::ranges::all_of(type.as_array(), [](const auto &entry) { + return std::ranges::all_of(type.as_array(), [](const auto &entry) -> auto { return entry.is_string() && entry.to_string() != "any"; }); } diff --git a/vendor/blaze/src/alterschema/common/not_false.h b/vendor/blaze/src/alterschema/common/not_false.h index c01122c92..ff4663d8d 100644 --- a/vendor/blaze/src/alterschema/common/not_false.h +++ b/vendor/blaze/src/alterschema/common/not_false.h @@ -1,5 +1,6 @@ class NotFalse final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"not"}; public: diff --git a/vendor/blaze/src/alterschema/common/required_properties_in_properties.h b/vendor/blaze/src/alterschema/common/required_properties_in_properties.h index b23399adf..b3d80ee0c 100644 --- a/vendor/blaze/src/alterschema/common/required_properties_in_properties.h +++ b/vendor/blaze/src/alterschema/common/required_properties_in_properties.h @@ -47,7 +47,7 @@ class RequiredPropertiesInProperties final : public SchemaTransformRule { !this->defined_in_properties_sibling(schema, property.to_string()) && !WALK_UP_IN_PLACE_APPLICATORS( root, frame, location, walker, resolver, - [&](const JSON &ancestor, const Vocabularies &) { + [&](const JSON &ancestor, const Vocabularies &) -> bool { return this->defined_in_properties_sibling( ancestor, property.to_string()); }) diff --git a/vendor/blaze/src/alterschema/common/then_without_if.h b/vendor/blaze/src/alterschema/common/then_without_if.h index 85e1d0cbb..ede24a3cc 100644 --- a/vendor/blaze/src/alterschema/common/then_without_if.h +++ b/vendor/blaze/src/alterschema/common/then_without_if.h @@ -1,5 +1,6 @@ class ThenWithoutIf final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"then"}; public: diff --git a/vendor/blaze/src/alterschema/common/unknown_local_ref.h b/vendor/blaze/src/alterschema/common/unknown_local_ref.h index 00042760a..a94798f50 100644 --- a/vendor/blaze/src/alterschema/common/unknown_local_ref.h +++ b/vendor/blaze/src/alterschema/common/unknown_local_ref.h @@ -1,5 +1,6 @@ class UnknownLocalRef final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"$ref"}; public: diff --git a/vendor/blaze/src/alterschema/common/unsatisfiable_drop_validation.h b/vendor/blaze/src/alterschema/common/unsatisfiable_drop_validation.h index b8f7048c7..1258feda9 100644 --- a/vendor/blaze/src/alterschema/common/unsatisfiable_drop_validation.h +++ b/vendor/blaze/src/alterschema/common/unsatisfiable_drop_validation.h @@ -83,12 +83,13 @@ class UnsatisfiableDropValidation final : public SchemaTransformRule { return true; } if (value.is_array()) { - return std::ranges::any_of(value.as_array(), [](const auto &entry) { - if (entry.is_string()) { - return entry.to_string() == "any"; - } - return sourcemeta::blaze::is_empty_schema(entry); - }); + return std::ranges::any_of( + value.as_array(), [](const auto &entry) -> auto { + if (entry.is_string()) { + return entry.to_string() == "any"; + } + return sourcemeta::blaze::is_empty_schema(entry); + }); } return false; } diff --git a/vendor/blaze/src/alterschema/linter/content_schema_default.h b/vendor/blaze/src/alterschema/linter/content_schema_default.h index d2761f9da..df8c245da 100644 --- a/vendor/blaze/src/alterschema/linter/content_schema_default.h +++ b/vendor/blaze/src/alterschema/linter/content_schema_default.h @@ -1,5 +1,6 @@ class ContentSchemaDefault final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"contentSchema"}; public: diff --git a/vendor/blaze/src/alterschema/linter/dependencies_default.h b/vendor/blaze/src/alterschema/linter/dependencies_default.h index 721db34fd..185c59e6a 100644 --- a/vendor/blaze/src/alterschema/linter/dependencies_default.h +++ b/vendor/blaze/src/alterschema/linter/dependencies_default.h @@ -1,5 +1,6 @@ class DependenciesDefault final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"dependencies"}; public: diff --git a/vendor/blaze/src/alterschema/linter/else_empty.h b/vendor/blaze/src/alterschema/linter/else_empty.h index 532c808c0..460cb6ffc 100644 --- a/vendor/blaze/src/alterschema/linter/else_empty.h +++ b/vendor/blaze/src/alterschema/linter/else_empty.h @@ -1,5 +1,6 @@ class ElseEmpty final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"else"}; public: diff --git a/vendor/blaze/src/alterschema/linter/invalid_external_ref.h b/vendor/blaze/src/alterschema/linter/invalid_external_ref.h index 5a722e07b..41deffc7b 100644 --- a/vendor/blaze/src/alterschema/linter/invalid_external_ref.h +++ b/vendor/blaze/src/alterschema/linter/invalid_external_ref.h @@ -86,6 +86,7 @@ class InvalidExternalRef final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"$ref"}; mutable std::unordered_map> resolver_cache_; mutable std::unordered_map> diff --git a/vendor/blaze/src/alterschema/linter/items_schema_default.h b/vendor/blaze/src/alterschema/linter/items_schema_default.h index 48949c48b..b8444eff1 100644 --- a/vendor/blaze/src/alterschema/linter/items_schema_default.h +++ b/vendor/blaze/src/alterschema/linter/items_schema_default.h @@ -1,5 +1,6 @@ class ItemsSchemaDefault final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"items"}; public: diff --git a/vendor/blaze/src/alterschema/linter/portable_anchor_names.h b/vendor/blaze/src/alterschema/linter/portable_anchor_names.h index 9fe660f4a..6e6cf73c6 100644 --- a/vendor/blaze/src/alterschema/linter/portable_anchor_names.h +++ b/vendor/blaze/src/alterschema/linter/portable_anchor_names.h @@ -53,11 +53,16 @@ class PortableAnchorNames final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const sourcemeta::core::JSON::String ANCHOR{"$anchor"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const sourcemeta::core::JSON::String DYNAMIC_ANCHOR{ "$dynamicAnchor"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const sourcemeta::core::JSON::String ID_MODERN{"$id"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const sourcemeta::core::JSON::String ID_DRAFT_4{"id"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const Regex SAFE_ANCHOR_PATTERN{ to_regex("^[A-Za-z][A-Za-z0-9_.-]*$").value()}; diff --git a/vendor/blaze/src/alterschema/linter/property_names_default.h b/vendor/blaze/src/alterschema/linter/property_names_default.h index 5a2b75e05..f53bee6e5 100644 --- a/vendor/blaze/src/alterschema/linter/property_names_default.h +++ b/vendor/blaze/src/alterschema/linter/property_names_default.h @@ -1,5 +1,6 @@ class PropertyNamesDefault final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"propertyNames"}; public: diff --git a/vendor/blaze/src/alterschema/linter/property_names_type_default.h b/vendor/blaze/src/alterschema/linter/property_names_type_default.h index 320a50868..2ec448b78 100644 --- a/vendor/blaze/src/alterschema/linter/property_names_type_default.h +++ b/vendor/blaze/src/alterschema/linter/property_names_type_default.h @@ -31,7 +31,7 @@ class PropertyNamesTypeDefault final : public SchemaTransformRule { type && ((type->is_string() && type->to_string() == "string") || (type->is_array() && std::all_of(type->as_array().begin(), type->as_array().end(), - [](const auto &item) { + [](const auto &item) -> auto { return item.is_string() && item.to_string() == "string"; })))); diff --git a/vendor/blaze/src/alterschema/linter/then_empty.h b/vendor/blaze/src/alterschema/linter/then_empty.h index 921849993..5db5d7272 100644 --- a/vendor/blaze/src/alterschema/linter/then_empty.h +++ b/vendor/blaze/src/alterschema/linter/then_empty.h @@ -1,5 +1,6 @@ class ThenEmpty final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"then"}; public: diff --git a/vendor/blaze/src/alterschema/linter/unevaluated_items_default.h b/vendor/blaze/src/alterschema/linter/unevaluated_items_default.h index b4912bc56..8cc91bdc5 100644 --- a/vendor/blaze/src/alterschema/linter/unevaluated_items_default.h +++ b/vendor/blaze/src/alterschema/linter/unevaluated_items_default.h @@ -1,5 +1,6 @@ class UnevaluatedItemsDefault final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"unevaluatedItems"}; public: diff --git a/vendor/blaze/src/alterschema/linter/unevaluated_properties_default.h b/vendor/blaze/src/alterschema/linter/unevaluated_properties_default.h index fb1753b0e..6b4a8331a 100644 --- a/vendor/blaze/src/alterschema/linter/unevaluated_properties_default.h +++ b/vendor/blaze/src/alterschema/linter/unevaluated_properties_default.h @@ -1,5 +1,6 @@ class UnevaluatedPropertiesDefault final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"unevaluatedProperties"}; public: diff --git a/vendor/blaze/src/alterschema/linter/unknown_format_prefix.h b/vendor/blaze/src/alterschema/linter/unknown_format_prefix.h index 2f423feca..54b6bcd65 100644 --- a/vendor/blaze/src/alterschema/linter/unknown_format_prefix.h +++ b/vendor/blaze/src/alterschema/linter/unknown_format_prefix.h @@ -75,15 +75,19 @@ class UnknownFormatPrefix final : public SchemaTransformRule { return nullptr; } + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::unordered_set DRAFT_3_FORMATS{ "date-time", "date", "time", "utc-millisec", "regex", "color", "style", "phone", "uri", "email", "ip-address", "ipv6", "host-name"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::unordered_set DRAFT_4_FORMATS{ "date-time", "email", "hostname", "ipv4", "ipv6", "uri"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::unordered_set DRAFT_6_FORMATS{ "date-time", "email", "hostname", "ipv4", "ipv6", "uri", "uri-reference", "uri-template", "json-pointer"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::unordered_set DRAFT_7_FORMATS{ "date-time", "date", "time", "email", "idn-email", "hostname", "idn-hostname", "ipv4", @@ -91,6 +95,7 @@ class UnknownFormatPrefix final : public SchemaTransformRule { "iri-reference", "uri-template", "json-pointer", "relative-json-pointer", "regex"}; static inline const std::unordered_set + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) DRAFT_2019_09_FORMATS{ "date-time", "date", "time", "duration", "email", "idn-email", @@ -100,6 +105,7 @@ class UnknownFormatPrefix final : public SchemaTransformRule { "uri-template", "json-pointer", "relative-json-pointer", "regex"}; static inline const std::unordered_set + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) DRAFT_2020_12_FORMATS{ "date-time", "date", "time", "duration", "email", "idn-email", diff --git a/vendor/blaze/src/alterschema/linter/unnecessary_allof_ref_wrapper_modern.h b/vendor/blaze/src/alterschema/linter/unnecessary_allof_ref_wrapper_modern.h index bf2d2d0d4..e179e5fef 100644 --- a/vendor/blaze/src/alterschema/linter/unnecessary_allof_ref_wrapper_modern.h +++ b/vendor/blaze/src/alterschema/linter/unnecessary_allof_ref_wrapper_modern.h @@ -30,7 +30,7 @@ class UnnecessaryAllOfRefWrapperModern final : public SchemaTransformRule { // define `$ref` (a common multiple composition pattern) ONLY_CONTINUE_IF( !(all_of.size() > 1 && - std::ranges::all_of(all_of.as_array(), [](const auto &entry) { + std::ranges::all_of(all_of.as_array(), [](const auto &entry) -> auto { return entry.is_object() && entry.defines("$ref"); }))); diff --git a/vendor/blaze/src/alterschema/linter/unnecessary_allof_wrapper.h b/vendor/blaze/src/alterschema/linter/unnecessary_allof_wrapper.h index f2e20d872..b16b52d9d 100644 --- a/vendor/blaze/src/alterschema/linter/unnecessary_allof_wrapper.h +++ b/vendor/blaze/src/alterschema/linter/unnecessary_allof_wrapper.h @@ -1,5 +1,6 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"allOf"}; public: @@ -128,7 +129,7 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { } if (std::ranges::any_of(metadata.dependencies, - [&](const auto &dependency) { + [&](const auto &dependency) -> auto { return !entry.defines(dependency) && (schema.defines(dependency) || elevated.contains(dependency)); diff --git a/vendor/blaze/src/alterschema/linter/unnecessary_extends_wrapper.h b/vendor/blaze/src/alterschema/linter/unnecessary_extends_wrapper.h index e5334b2c3..9a7747561 100644 --- a/vendor/blaze/src/alterschema/linter/unnecessary_extends_wrapper.h +++ b/vendor/blaze/src/alterschema/linter/unnecessary_extends_wrapper.h @@ -1,5 +1,6 @@ class UnnecessaryExtendsWrapper final : public SchemaTransformRule { private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string KEYWORD{"extends"}; public: @@ -84,7 +85,7 @@ class UnnecessaryExtendsWrapper final : public SchemaTransformRule { } if (std::ranges::any_of( - metadata.dependencies, [&](const auto &dependency) { + metadata.dependencies, [&](const auto &dependency) -> auto { return !entry.defines(std::string{dependency}) && (schema.defines(std::string{dependency}) || elevated.contains(dependency)); @@ -139,7 +140,7 @@ class UnnecessaryExtendsWrapper final : public SchemaTransformRule { if (!type.is_array()) { return false; } - return std::ranges::all_of(type.as_array(), [](const auto &entry) { + return std::ranges::all_of(type.as_array(), [](const auto &entry) -> auto { return entry.is_string() && entry.to_string() != "any"; }); } diff --git a/vendor/blaze/src/alterschema/transformer.cc b/vendor/blaze/src/alterschema/transformer.cc index aaf811c30..ef62c2907 100644 --- a/vendor/blaze/src/alterschema/transformer.cc +++ b/vendor/blaze/src/alterschema/transformer.cc @@ -270,11 +270,13 @@ auto SchemaTransformer::apply(core::JSON &schema, const auto &target{destination.value().get()}; potentially_broken_references.push_back( - {core::to_pointer(reference.first.second), - core::JSON::String{reference.second.original}, - reference.second.destination, - core::JSON::String{reference.second.fragment.value()}, - core::to_pointer(target.pointer), target.relative_pointer}); + {.origin = core::to_pointer(reference.first.second), + .original = core::JSON::String{reference.second.original}, + .destination = reference.second.destination, + .fragment = + core::JSON::String{reference.second.fragment.value()}, + .target_pointer = core::to_pointer(target.pointer), + .target_relative_pointer = target.relative_pointer}); } rule->transform(current, outcome); @@ -400,7 +402,7 @@ auto SchemaTransformer::apply(core::JSON &schema, } auto SchemaTransformer::remove(const std::string_view name) -> bool { - return std::erase_if(this->rules, [&name](const auto &entry) { + return std::erase_if(this->rules, [&name](const auto &entry) -> auto { return std::get<0>(entry)->name() == name; }) > 0; } diff --git a/vendor/blaze/src/alterschema/upgrade/helpers.h b/vendor/blaze/src/alterschema/upgrade/helpers.h index c1041100b..ee1b62947 100644 --- a/vendor/blaze/src/alterschema/upgrade/helpers.h +++ b/vendor/blaze/src/alterschema/upgrade/helpers.h @@ -1,3 +1,4 @@ +// NOLINTNEXTLINE(bugprone-throwing-static-initialization) static const std::string DIALECT_OVERRIDE_KEYWORD{ "x-sourcemeta-dialect-override-subschema"}; diff --git a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_2020_12_keywords.h b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_2020_12_keywords.h index ebeaee8d0..883e9a1fb 100644 --- a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_2020_12_keywords.h +++ b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_2020_12_keywords.h @@ -58,6 +58,7 @@ class PrefixPromoted202012Keywords final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array KEYWORDS{ {"prefixItems", "$dynamicAnchor", "$dynamicRef"}}; diff --git a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_2019_09_keywords.h b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_2019_09_keywords.h index 961845a6b..9282e3c4a 100644 --- a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_2019_09_keywords.h +++ b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_2019_09_keywords.h @@ -64,6 +64,7 @@ class PrefixPromoted201909Keywords final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array KEYWORDS{ {"$anchor", "$recursiveAnchor", "$recursiveRef", "$vocabulary", "$defs", "dependentSchemas", "dependentRequired", "unevaluatedItems", diff --git a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_4_keywords.h b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_4_keywords.h index 9ba5de908..d00a2e557 100644 --- a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_4_keywords.h +++ b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_4_keywords.h @@ -64,6 +64,7 @@ class PrefixPromotedDraft4Keywords final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array KEYWORDS{ {"multipleOf", "maxProperties", "minProperties", "allOf", "anyOf", "oneOf", "not"}}; diff --git a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_6_keywords.h b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_6_keywords.h index 0fd8ec4f3..5560a9c54 100644 --- a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_6_keywords.h +++ b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_6_keywords.h @@ -64,6 +64,7 @@ class PrefixPromotedDraft6Keywords final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array KEYWORDS{ {"const", "contains", "propertyNames", "examples"}}; diff --git a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_7_keywords.h b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_7_keywords.h index 764997913..a80f12f0c 100644 --- a/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_7_keywords.h +++ b/vendor/blaze/src/alterschema/upgrade/prefix_promoted_draft_7_keywords.h @@ -64,6 +64,7 @@ class PrefixPromotedDraft7Keywords final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array KEYWORDS{ {"$comment", "if", "then", "else", "readOnly", "writeOnly", "contentMediaType", "contentEncoding"}}; diff --git a/vendor/blaze/src/alterschema/upgrade/upgrade_2019_09_to_2020_12.h b/vendor/blaze/src/alterschema/upgrade/upgrade_2019_09_to_2020_12.h index 794612846..06c775123 100644 --- a/vendor/blaze/src/alterschema/upgrade/upgrade_2019_09_to_2020_12.h +++ b/vendor/blaze/src/alterschema/upgrade/upgrade_2019_09_to_2020_12.h @@ -196,6 +196,7 @@ class Upgrade201909To202012 final : public SchemaTransformRule { "https://json-schema.org/draft/2020-12/vocab/unevaluated"}; static inline const std::unordered_map + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) VOCAB_URI_MAP_2019_09_TO_2020_12{ {"https://json-schema.org/draft/2019-09/vocab/core", "https://json-schema.org/draft/2020-12/vocab/core"}, @@ -541,7 +542,8 @@ class Upgrade201909To202012 final : public SchemaTransformRule { const auto relative_weak{pointer.resolve_from(resource_pointer)}; this->anchor_renames_.push_back( - {sourcemeta::core::to_pointer(relative_weak), rename_iter->second}); + {.subschema_pointer = sourcemeta::core::to_pointer(relative_weak), + .new_name = rename_iter->second}); } for (const auto &reference : frame.references()) { @@ -567,7 +569,8 @@ class Upgrade201909To202012 final : public SchemaTransformRule { const auto relative_weak{ reference.first.second.resolve_from(resource_pointer)}; this->anchor_ref_rewrites_.push_back( - {sourcemeta::core::to_pointer(relative_weak), new_value}); + {.ref_pointer = sourcemeta::core::to_pointer(relative_weak), + .new_value = new_value}); } } diff --git a/vendor/blaze/src/alterschema/upgrade/upgrade_draft_3_to_draft_4.h b/vendor/blaze/src/alterschema/upgrade/upgrade_draft_3_to_draft_4.h index b7967ab72..106109cd4 100644 --- a/vendor/blaze/src/alterschema/upgrade/upgrade_draft_3_to_draft_4.h +++ b/vendor/blaze/src/alterschema/upgrade/upgrade_draft_3_to_draft_4.h @@ -73,8 +73,10 @@ class UpgradeDraft3ToDraft4 final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string DRAFT_3_URL{ "http://json-schema.org/draft-03/schema#"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string DRAFT_4_URL{ "http://json-schema.org/draft-04/schema#"}; diff --git a/vendor/blaze/src/alterschema/upgrade/upgrade_draft_4_to_draft_6.h b/vendor/blaze/src/alterschema/upgrade/upgrade_draft_4_to_draft_6.h index ecda996cd..ee08df3f0 100644 --- a/vendor/blaze/src/alterschema/upgrade/upgrade_draft_4_to_draft_6.h +++ b/vendor/blaze/src/alterschema/upgrade/upgrade_draft_4_to_draft_6.h @@ -127,10 +127,13 @@ class UpgradeDraft4ToDraft6 final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string DRAFT_4_URL{ "http://json-schema.org/draft-04/schema#"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string DRAFT_6_URL{ "http://json-schema.org/draft-06/schema#"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array PROMOTED_KEYWORDS{ {"const", "contains", "propertyNames", "examples"}}; diff --git a/vendor/blaze/src/alterschema/upgrade/upgrade_draft_6_to_draft_7.h b/vendor/blaze/src/alterschema/upgrade/upgrade_draft_6_to_draft_7.h index 6b62e2033..3687b8ea1 100644 --- a/vendor/blaze/src/alterschema/upgrade/upgrade_draft_6_to_draft_7.h +++ b/vendor/blaze/src/alterschema/upgrade/upgrade_draft_6_to_draft_7.h @@ -58,12 +58,16 @@ class UpgradeDraft6ToDraft7 final : public SchemaTransformRule { } private: + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string DRAFT_4_URL{ "http://json-schema.org/draft-04/schema#"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string DRAFT_6_URL{ "http://json-schema.org/draft-06/schema#"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::string DRAFT_7_URL{ "http://json-schema.org/draft-07/schema#"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array PROMOTED_KEYWORDS{ {"$comment", "if", "then", "else", "readOnly", "writeOnly", "contentMediaType", "contentEncoding"}}; diff --git a/vendor/blaze/src/alterschema/upgrade/upgrade_draft_7_to_draft_2019_09.h b/vendor/blaze/src/alterschema/upgrade/upgrade_draft_7_to_draft_2019_09.h index 0f45ca4db..ad134352b 100644 --- a/vendor/blaze/src/alterschema/upgrade/upgrade_draft_7_to_draft_2019_09.h +++ b/vendor/blaze/src/alterschema/upgrade/upgrade_draft_7_to_draft_2019_09.h @@ -113,22 +113,26 @@ class UpgradeDraft7To201909 final : public SchemaTransformRule { static constexpr std::string_view VOCAB_2019_09_CONTENT_URL{ "https://json-schema.org/draft/2019-09/vocab/content"}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array SHADOW_EXEMPT_KEYWORDS{ {"$schema", "$id", "title", "description", "default", "examples", "$comment", "readOnly", "writeOnly", "deprecated", "contentMediaType", "contentEncoding"}}; static inline const std::array + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) PROMOTED_2019_09_KEYWORDS{{"$anchor", "$recursiveAnchor", "$recursiveRef", "$vocabulary", "$defs", "dependentSchemas", "dependentRequired", "unevaluatedItems", "unevaluatedProperties", "maxContains", "minContains", "contentSchema", "deprecated"}}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array PROMOTED_DRAFT_7_KEYWORDS{ {"$comment", "if", "then", "else", "readOnly", "writeOnly", "contentMediaType", "contentEncoding"}}; + // NOLINTNEXTLINE(bugprone-throwing-static-initialization) static inline const std::array PROMOTED_DRAFT_6_KEYWORDS{ {"const", "contains", "propertyNames", "examples"}}; @@ -139,9 +143,10 @@ class UpgradeDraft7To201909 final : public SchemaTransformRule { mutable bool metaschema_synthesis_pending_{false}; static auto is_shadow_exempt(const std::string_view keyword) -> bool { - return std::ranges::any_of( - SHADOW_EXEMPT_KEYWORDS, - [&keyword](const auto &candidate) { return candidate == keyword; }); + return std::ranges::any_of(SHADOW_EXEMPT_KEYWORDS, + [&keyword](const auto &candidate) -> auto { + return candidate == keyword; + }); } static auto is_plain_name_fragment(const std::string_view fragment) -> bool { diff --git a/vendor/blaze/src/alterschema/wrap.cc b/vendor/blaze/src/alterschema/wrap.cc index 9235cd89f..790a4cc5f 100644 --- a/vendor/blaze/src/alterschema/wrap.cc +++ b/vendor/blaze/src/alterschema/wrap.cc @@ -34,7 +34,7 @@ auto wrap(const sourcemeta::core::JSON &schema, const SchemaFrame &frame, assert(sourcemeta::core::try_get(schema, pointer)); const auto has_internal_references{ std::any_of(frame.references().cbegin(), frame.references().cend(), - [&pointer](const auto &reference) { + [&pointer](const auto &reference) -> auto { return reference.first.second.starts_with(pointer); })}; diff --git a/vendor/blaze/src/bundle/bundle.cc b/vendor/blaze/src/bundle/bundle.cc index d7d633d07..a27ca5fa0 100644 --- a/vendor/blaze/src/bundle/bundle.cc +++ b/vendor/blaze/src/bundle/bundle.cc @@ -49,7 +49,7 @@ auto dependencies_internal( found; frame.for_each_unresolved_reference([&](const auto &pointer, - const auto &reference) { + const auto &reference) -> void { // We don't want to report official schemas, as we can expect // virtually all implementations to understand them out of the box if (is_skippable_metaschema_reference( @@ -278,7 +278,7 @@ auto bundle_schema(sourcemeta::core::JSON &root, ref_rewrites; frame.for_each_unresolved_reference([&](const auto &pointer, - const auto &reference) { + const auto &reference) -> void { // We don't want to bundle official schemas, as we can expect // virtually all implementations to understand them out of the box. // Depending on the bundling strategy, we may skip meta-schemas entirely @@ -433,7 +433,7 @@ auto bundle(sourcemeta::core::JSON &schema, const SchemaWalker &walker, SchemaFrame initial_frame{SchemaFrame::Mode::Locations}; initial_frame.analyse(schema, walker, resolver, default_dialect, default_id, paths); - initial_frame.for_each_resource_uri([&bundled](const auto &uri) { + initial_frame.for_each_resource_uri([&bundled](const auto &uri) -> void { bundled.emplace(sourcemeta::core::JSON::String{uri}, sourcemeta::core::JSON::String{uri}); }); diff --git a/vendor/blaze/src/codegen/codegen.cc b/vendor/blaze/src/codegen/codegen.cc index 325055116..debc631e8 100644 --- a/vendor/blaze/src/codegen/codegen.cc +++ b/vendor/blaze/src/codegen/codegen.cc @@ -76,7 +76,7 @@ auto compile(const sourcemeta::core::JSON &input, [[maybe_unused]] const auto canonicalized{canonicalizer.apply( schema, walker, resolver, [](const auto &, const auto, const auto, const auto &, - [[maybe_unused]] const auto applied) { assert(applied); }, + [[maybe_unused]] const auto applied) -> auto { assert(applied); }, default_dialect, default_id)}; assert(canonicalized.first); @@ -129,10 +129,12 @@ auto compile(const sourcemeta::core::JSON &input, std::ranges::sort( result, [](const CodegenIREntity &left, const CodegenIREntity &right) -> bool { - return std::visit([](const auto &entry) { return entry.pointer; }, - right) < - std::visit([](const auto &entry) { return entry.pointer; }, - left); + return std::visit( + [](const auto &entry) -> auto { return entry.pointer; }, + right) < + std::visit( + [](const auto &entry) -> auto { return entry.pointer; }, + left); }); return result; diff --git a/vendor/blaze/src/codegen/codegen_typescript.cc b/vendor/blaze/src/codegen/codegen_typescript.cc index c632372c3..4015bef43 100644 --- a/vendor/blaze/src/codegen/codegen_typescript.cc +++ b/vendor/blaze/src/codegen/codegen_typescript.cc @@ -166,8 +166,8 @@ auto TypeScript::operator()(const CodegenIRObject &entry) -> void { this->output << ";\n"; } - const auto has_non_prefix_pattern{ - std::ranges::any_of(entry.pattern, [](const auto &pattern_property) { + const auto has_non_prefix_pattern{std::ranges::any_of( + entry.pattern, [](const auto &pattern_property) -> auto { return !pattern_property.prefix.has_value(); })}; diff --git a/vendor/blaze/src/compiler/compile.cc b/vendor/blaze/src/compiler/compile.cc index 2e3a3e165..65fcbdcfb 100644 --- a/vendor/blaze/src/compiler/compile.cc +++ b/vendor/blaze/src/compiler/compile.cc @@ -417,9 +417,10 @@ auto compile(const sourcemeta::core::JSON &schema, requires_evaluation(context, entrypoint_location.pointer) || // TODO: This expression should go away if we start properly compiling // `unevaluatedItems` like we compile `unevaluatedProperties` - std::ranges::any_of(context.unevaluated, [](const auto &dependency) { - return dependency.first.ends_with("unevaluatedItems"); - })}; + std::ranges::any_of( + context.unevaluated, [](const auto &dependency) -> auto { + return dependency.first.ends_with("unevaluatedItems"); + })}; return {.dynamic = uses_dynamic_scopes, .track = track, .targets = std::move(compiled_targets), diff --git a/vendor/blaze/src/compiler/compile_helpers.h b/vendor/blaze/src/compiler/compile_helpers.h index a36190e9e..a2921279a 100644 --- a/vendor/blaze/src/compiler/compile_helpers.h +++ b/vendor/blaze/src/compiler/compile_helpers.h @@ -17,9 +17,11 @@ namespace sourcemeta::blaze { // Static keyword strings for use in DynamicContext references static const sourcemeta::core::JSON::String KEYWORD_EMPTY{}; +// NOLINTBEGIN(bugprone-throwing-static-initialization) static const sourcemeta::core::JSON::String KEYWORD_PROPERTIES{"properties"}; static const sourcemeta::core::JSON::String KEYWORD_THEN{"then"}; static const sourcemeta::core::JSON::String KEYWORD_ELSE{"else"}; +// NOLINTEND(bugprone-throwing-static-initialization) // Helper to create a single-element WeakPointer from a property name reference inline auto make_weak_pointer(const std::string &property) @@ -284,11 +286,11 @@ inline auto find_adjacent(const Context &context, const auto subschema_vocabularies{ context.frame.vocabularies(frame_entry, context.resolver)}; - if (std::ranges::any_of(vocabularies, - [&subschema_vocabularies](const auto &vocabulary) { - return subschema_vocabularies.contains( - vocabulary); - }) && + if (std::ranges::any_of( + vocabularies, + [&subschema_vocabularies](const auto &vocabulary) -> auto { + return subschema_vocabularies.contains(vocabulary); + }) && subschema.type() == type) { result.emplace_back(subschema); } @@ -336,6 +338,13 @@ inline auto requires_evaluation(const Context &context, return requires_evaluation(context, entry.pointer); } +inline auto annotations_enabled(const Context &context, + const std::string_view keyword) -> bool { + return context.mode == Mode::Exhaustive && + (!context.tweaks.annotations.has_value() || + context.tweaks.annotations.value().contains(keyword)); +} + // TODO: Elevate to Core and test inline auto diff --git a/vendor/blaze/src/compiler/compile_json.cc b/vendor/blaze/src/compiler/compile_json.cc index 89a847b06..7a16ed95c 100644 --- a/vendor/blaze/src/compiler/compile_json.cc +++ b/vendor/blaze/src/compiler/compile_json.cc @@ -27,7 +27,9 @@ auto to_json(const sourcemeta::blaze::Instruction &instruction, // Don't encode empty values, which tend to happen a lot if (value_index != 0) { value.push_back(std::visit( - [](const auto &variant) { return sourcemeta::core::to_json(variant); }, + [](const auto &variant) -> auto { + return sourcemeta::core::to_json(variant); + }, instruction.value)); } assert(value.is_array()); @@ -38,7 +40,7 @@ auto to_json(const sourcemeta::blaze::Instruction &instruction, if (!instruction.children.empty()) { auto children_json{sourcemeta::core::JSON::make_array()}; result.push_back(sourcemeta::core::to_json( - instruction.children, [&extra](const auto &subinstruction) { + instruction.children, [&extra](const auto &subinstruction) -> auto { return to_json(subinstruction, extra); })); } @@ -61,7 +63,7 @@ auto to_json(const Template &schema_template) -> sourcemeta::core::JSON { auto targets{sourcemeta::core::JSON::make_array()}; for (const auto &target : schema_template.targets) { targets.push_back(sourcemeta::core::to_json( - target, [&schema_template](const auto &instruction) { + target, [&schema_template](const auto &instruction) -> auto { return ::to_json(instruction, schema_template.extra); })); } diff --git a/vendor/blaze/src/compiler/default_compiler.cc b/vendor/blaze/src/compiler/default_compiler.cc index 5fe917fec..192b6a174 100644 --- a/vendor/blaze/src/compiler/default_compiler.cc +++ b/vendor/blaze/src/compiler/default_compiler.cc @@ -626,7 +626,7 @@ auto sourcemeta::blaze::default_schema_compiler( return {}; } - if (context.mode == Mode::FastValidation || + if (!annotations_enabled(context, dynamic_context.keyword) || schema_context.is_property_name) { return {}; } diff --git a/vendor/blaze/src/compiler/default_compiler_2019_09.h b/vendor/blaze/src/compiler/default_compiler_2019_09.h index 133ece3cd..87b6f066c 100644 --- a/vendor/blaze/src/compiler/default_compiler_2019_09.h +++ b/vendor/blaze/src/compiler/default_compiler_2019_09.h @@ -158,7 +158,7 @@ auto compiler_2019_09_applicator_contains_with_options( sourcemeta::core::empty_weak_pointer, sourcemeta::core::empty_weak_pointer)}; - if (annotate) { + if (annotate && annotations_enabled(context, dynamic_context.keyword)) { children.push_back( make(sourcemeta::blaze::InstructionIndex::AnnotationBasenameToParent, context, schema_context, relative_dynamic_context(), ValueNone{})); @@ -214,8 +214,8 @@ auto compiler_2019_09_applicator_items(const Context &context, const Instructions &) -> Instructions { // TODO: Be smarter about how we treat `unevaluatedItems` like how we do for // `unevaluatedProperties` - const bool track{ - std::ranges::any_of(context.unevaluated, [](const auto &dependency) { + const bool track{std::ranges::any_of( + context.unevaluated, [](const auto &dependency) -> auto { return dependency.first.ends_with("unevaluatedItems"); })}; @@ -237,8 +237,8 @@ auto compiler_2019_09_applicator_additionalitems( -> Instructions { // TODO: Be smarter about how we treat `unevaluatedItems` like how we do for // `unevaluatedProperties` - const bool track{ - std::ranges::any_of(context.unevaluated, [](const auto &dependency) { + const bool track{std::ranges::any_of( + context.unevaluated, [](const auto &dependency) -> auto { return dependency.first.ends_with("unevaluatedItems"); })}; @@ -282,7 +282,7 @@ auto compiler_2019_09_applicator_unevaluateditems( sourcemeta::core::empty_weak_pointer, sourcemeta::core::empty_weak_pointer)}; - if (context.mode == Mode::Exhaustive) { + if (annotations_enabled(context, dynamic_context.keyword)) { children.push_back( make(sourcemeta::blaze::InstructionIndex::AnnotationToParent, context, schema_context, relative_dynamic_context(), @@ -322,7 +322,7 @@ auto compiler_2019_09_applicator_unevaluatedproperties( sourcemeta::core::empty_weak_pointer, sourcemeta::core::empty_weak_pointer)}; - if (context.mode == Mode::Exhaustive) { + if (annotations_enabled(context, dynamic_context.keyword)) { children.push_back( make(sourcemeta::blaze::InstructionIndex::AnnotationBasenameToParent, context, schema_context, relative_dynamic_context(), ValueNone{})); @@ -358,11 +358,12 @@ auto compiler_2019_09_applicator_unevaluatedproperties( static const std::string pattern_properties_keyword{ "patternProperties"}; filter_regexes.push_back( - {parse_regex(property.first, schema_context.base, - schema_context.relative_pointer.initial().concat( - sourcemeta::blaze::make_weak_pointer( - pattern_properties_keyword))), - property.first}); + {.first = parse_regex( + property.first, schema_context.base, + schema_context.relative_pointer.initial().concat( + sourcemeta::blaze::make_weak_pointer( + pattern_properties_keyword))), + .second = property.first}); } } } @@ -451,7 +452,7 @@ auto compiler_2019_09_content_contentencoding( const Context &context, const SchemaContext &schema_context, const DynamicContext &dynamic_context, const Instructions &) -> Instructions { - if (context.mode == Mode::FastValidation) { + if (!annotations_enabled(context, dynamic_context.keyword)) { return {}; } @@ -470,7 +471,7 @@ auto compiler_2019_09_content_contentmediatype( const Context &context, const SchemaContext &schema_context, const DynamicContext &dynamic_context, const Instructions &) -> Instructions { - if (context.mode == Mode::FastValidation) { + if (!annotations_enabled(context, dynamic_context.keyword)) { return {}; } @@ -489,7 +490,7 @@ auto compiler_2019_09_content_contentschema( const Context &context, const SchemaContext &schema_context, const DynamicContext &dynamic_context, const Instructions &) -> Instructions { - if (context.mode == Mode::FastValidation) { + if (!annotations_enabled(context, dynamic_context.keyword)) { return {}; } diff --git a/vendor/blaze/src/compiler/default_compiler_2020_12.h b/vendor/blaze/src/compiler/default_compiler_2020_12.h index f1c8cf803..f7abcbd41 100644 --- a/vendor/blaze/src/compiler/default_compiler_2020_12.h +++ b/vendor/blaze/src/compiler/default_compiler_2020_12.h @@ -16,8 +16,8 @@ auto compiler_2020_12_applicator_prefixitems( -> Instructions { // TODO: Be smarter about how we treat `unevaluatedItems` like how we do for // `unevaluatedProperties` - const bool track{ - std::ranges::any_of(context.unevaluated, [](const auto &dependency) { + const bool track{std::ranges::any_of( + context.unevaluated, [](const auto &dependency) -> auto { return dependency.first.ends_with("unevaluatedItems"); })}; @@ -37,8 +37,8 @@ auto compiler_2020_12_applicator_items(const Context &context, // TODO: Be smarter about how we treat `unevaluatedItems` like how we do for // `unevaluatedProperties` - const bool track{ - std::ranges::any_of(context.unevaluated, [](const auto &dependency) { + const bool track{std::ranges::any_of( + context.unevaluated, [](const auto &dependency) -> auto { return dependency.first.ends_with("unevaluatedItems"); })}; @@ -55,8 +55,8 @@ auto compiler_2020_12_applicator_contains(const Context &context, -> Instructions { // TODO: Be smarter about how we treat `unevaluatedItems` like how we do for // `unevaluatedProperties` - const bool track{ - std::ranges::any_of(context.unevaluated, [](const auto &dependency) { + const bool track{std::ranges::any_of( + context.unevaluated, [](const auto &dependency) -> auto { return dependency.first.ends_with("unevaluatedItems"); })}; diff --git a/vendor/blaze/src/compiler/default_compiler_draft3.h b/vendor/blaze/src/compiler/default_compiler_draft3.h index 198d2d7ae..fdbd980a5 100644 --- a/vendor/blaze/src/compiler/default_compiler_draft3.h +++ b/vendor/blaze/src/compiler/default_compiler_draft3.h @@ -36,7 +36,7 @@ relative_schema_location_size(const sourcemeta::blaze::Context &context, static auto defines_direct_enumeration(const sourcemeta::blaze::Instructions &steps) -> std::optional { - const auto iterator{std::ranges::find_if(steps, [](const auto &step) { + const auto iterator{std::ranges::find_if(steps, [](const auto &step) -> auto { return step.type == sourcemeta::blaze::InstructionIndex::AssertionEqual || step.type == sourcemeta::blaze::InstructionIndex::AssertionEqualsAny; })}; @@ -77,7 +77,7 @@ is_closed_properties_required(const sourcemeta::core::JSON &schema, !schema.at("additionalProperties").to_boolean() && schema.defines("properties") && schema.at("properties").is_object() && schema.at("properties").size() == required.size() && - std::ranges::all_of(required, [&schema](const auto &property) { + std::ranges::all_of(required, [&schema](const auto &property) -> auto { return schema.at("properties") .defines(property.first, property.second); }); @@ -104,37 +104,38 @@ compile_properties(const sourcemeta::blaze::Context &context, // we prefer to evaluate smaller subschemas first, in the hope of failing // earlier without spending a lot of time on other subschemas if (context.tweaks.properties_reorder) { - std::ranges::sort(properties, [&context](const auto &left, - const auto &right) { - const auto left_size{recursive_template_size(left.second)}; - const auto right_size{recursive_template_size(right.second)}; - if (left_size == right_size) { - const auto left_direct_enumeration{ - defines_direct_enumeration(left.second)}; - const auto right_direct_enumeration{ - defines_direct_enumeration(right.second)}; - - // Enumerations always take precedence - if (left_direct_enumeration.has_value() && - right_direct_enumeration.has_value()) { - // If both options have a direct enumeration, we choose - // the one with the shorter relative schema location - return relative_schema_location_size( - context, left.second.at(left_direct_enumeration.value())) < - relative_schema_location_size( - context, - right.second.at(right_direct_enumeration.value())); - } else if (left_direct_enumeration.has_value()) { - return true; - } else if (right_direct_enumeration.has_value()) { - return false; - } + std::ranges::sort( + properties, [&context](const auto &left, const auto &right) -> auto { + const auto left_size{recursive_template_size(left.second)}; + const auto right_size{recursive_template_size(right.second)}; + if (left_size == right_size) { + const auto left_direct_enumeration{ + defines_direct_enumeration(left.second)}; + const auto right_direct_enumeration{ + defines_direct_enumeration(right.second)}; + + // Enumerations always take precedence + if (left_direct_enumeration.has_value() && + right_direct_enumeration.has_value()) { + // If both options have a direct enumeration, we choose + // the one with the shorter relative schema location + return relative_schema_location_size( + context, + left.second.at(left_direct_enumeration.value())) < + relative_schema_location_size( + context, + right.second.at(right_direct_enumeration.value())); + } else if (left_direct_enumeration.has_value()) { + return true; + } else if (right_direct_enumeration.has_value()) { + return false; + } - return left.first < right.first; - } else { - return left_size < right_size; - } - }); + return left.first < right.first; + } else { + return left_size < right_size; + } + }); } return properties; @@ -145,7 +146,7 @@ static auto to_string_hashes( sourcemeta::blaze::ValueStringSet::hash_type>> &hashes) -> sourcemeta::blaze::ValueStringHashes { assert(!hashes.empty()); - std::ranges::sort(hashes, [](const auto &left, const auto &right) { + std::ranges::sort(hashes, [](const auto &left, const auto &right) -> auto { return left.first.size() < right.first.size(); }); @@ -204,10 +205,13 @@ auto compile_required_assertions(const Context &context, if (is_closed_properties_required(schema_context.schema, properties_set)) { if (context.mode == Mode::FastValidation && assume_object) { static const std::string properties_keyword{"properties"}; + // `SchemaContext::relative_pointer` is a reference, so the concatenated + // pointer must outlive `new_schema_context` + const auto properties_pointer{ + schema_context.relative_pointer.initial().concat( + sourcemeta::blaze::make_weak_pointer(properties_keyword))}; const SchemaContext new_schema_context{ - .relative_pointer = - schema_context.relative_pointer.initial().concat( - sourcemeta::blaze::make_weak_pointer(properties_keyword)), + .relative_pointer = properties_pointer, .schema = schema_context.schema, .vocabularies = schema_context.vocabularies, .base = schema_context.base, @@ -218,7 +222,7 @@ auto compile_required_assertions(const Context &context, .base_instance_location = sourcemeta::core::empty_weak_pointer}; auto properties{compile_properties(context, new_schema_context, new_dynamic_context, current)}; - if (std::ranges::all_of(properties, [](const auto &property) { + if (std::ranges::all_of(properties, [](const auto &property) -> auto { return property.second.size() == 1 && property.second.front().type == InstructionIndex::AssertionTypeStrict; @@ -238,7 +242,7 @@ auto compile_required_assertions(const Context &context, if (context.mode == Mode::FastValidation && properties_set.size() == 3 && std::ranges::all_of(properties_set, - [&hasher](const auto &property) { + [&hasher](const auto &property) -> auto { return hasher.is_perfect(property.second); })) { std::vector> hashes; @@ -347,7 +351,7 @@ auto properties_as_loop(const Context &context, // Check if any reference from `anyOf` or `oneOf` points to us std::ranges::any_of( context.frame.references(), - [&context, ¤t_entry](const auto &reference) { + [&context, ¤t_entry](const auto &reference) -> auto { if (!context.frame.locations().contains( {sourcemeta::blaze::SchemaReferenceType::Static, reference.second.destination})) { @@ -381,12 +385,13 @@ auto properties_as_loop(const Context &context, // Always unroll inside `oneOf` or `anyOf`, to have a // better chance at quickly short-circuiting (!inside_disjunctor || - std::ranges::none_of(properties.as_object(), [&](const auto &pair) { - return pair.second.is_object() && - ((imports_validation_vocabulary && - pair.second.defines("enum")) || - (imports_const && pair.second.defines("const"))); - })); + std::ranges::none_of( + properties.as_object(), [&](const auto &pair) -> auto { + return pair.second.is_object() && + ((imports_validation_vocabulary && + pair.second.defines("enum")) || + (imports_const && pair.second.defines("const"))); + })); } auto draft3_any_type_instructions(const Context &context, @@ -543,6 +548,9 @@ auto compiler_draft3_applicator_properties_with_options( return {}; } + const bool emit_annotation{ + annotate && annotations_enabled(context, dynamic_context.keyword)}; + if (properties_as_loop(context, schema_context, schema_context.schema.at(dynamic_context.keyword))) { ValueNamedIndexes indexes; @@ -559,7 +567,7 @@ auto compiler_draft3_applicator_properties_with_options( schema_context, relative_dynamic_context(), ValuePointer{name})); } - if (annotate) { + if (emit_annotation) { substeps.push_back( make(sourcemeta::blaze::InstructionIndex::AnnotationEmit, context, schema_context, relative_dynamic_context(), @@ -612,10 +620,11 @@ auto compiler_draft3_applicator_properties_with_options( !schema_context.schema.at("additionalProperties").to_boolean() && required.size() == schema_context.schema.at(dynamic_context.keyword).size() && - std::ranges::all_of(properties, [&required](const auto &property) { - return required.contains(property.first); - })) { - if (std::ranges::all_of(properties, [](const auto &property) { + std::ranges::all_of(properties, + [&required](const auto &property) -> auto { + return required.contains(property.first); + })) { + if (std::ranges::all_of(properties, [](const auto &property) -> auto { return property.second.size() == 1 && property.second.front().type == InstructionIndex::AssertionTypeStrict; @@ -661,7 +670,7 @@ auto compiler_draft3_applicator_properties_with_options( } } - if (std::ranges::all_of(properties, [](const auto &property) { + if (std::ranges::all_of(properties, [](const auto &property) -> auto { return property.second.size() == 1 && property.second.front().type == InstructionIndex::AssertionType; @@ -712,7 +721,7 @@ auto compiler_draft3_applicator_properties_with_options( bool fusion_possible{attempt_object_fusion}; for (auto &&[name, substeps] : properties) { - if (annotate) { + if (emit_annotation) { substeps.push_back( make(sourcemeta::blaze::InstructionIndex::AnnotationEmit, context, schema_context, effective_dynamic_context, @@ -846,11 +855,11 @@ auto compiler_draft3_applicator_properties_with_options( } if (fusion_possible && substeps.size() >= 2 && - std::ranges::any_of(substeps, [](const auto &step) { + std::ranges::any_of(substeps, [](const auto &step) -> auto { return step.type == InstructionIndex::AssertionObjectPropertiesSimple; })) { - std::erase_if(substeps, [](const auto &step) { + std::erase_if(substeps, [](const auto &step) -> auto { if (step.type == InstructionIndex::AssertionDefinesAllStrict || step.type == InstructionIndex::AssertionDefinesAll) { return true; @@ -1013,7 +1022,7 @@ auto compiler_draft3_applicator_patternproperties_with_options( auto substeps{compile(context, schema_context, relative_dynamic_context(), sourcemeta::blaze::make_weak_pointer(pattern))}; - if (annotate) { + if (annotate && annotations_enabled(context, dynamic_context.keyword)) { substeps.push_back(make( sourcemeta::blaze::InstructionIndex::AnnotationBasenameToParent, context, schema_context, relative_dynamic_context(), ValueNone{})); @@ -1073,6 +1082,56 @@ auto compiler_draft3_applicator_patternproperties( context, schema_context, dynamic_context, false, false); } +// Determine whether the `properties` keyword on its own enforces a closed +// object (i.e. compiles to one of the `LoopPropertiesExactly*` forms). This +// happens when every property subschema reduces to a single strict type +// assertion of the same type. In that case `additionalProperties: false` is +// already enforced by `properties` and does not need to emit anything. +inline auto +properties_enforce_closed_object(const Context &context, + const SchemaContext &schema_context) -> bool { + const bool assume_object{schema_context.schema.defines("type") && + schema_context.schema.at("type").is_string() && + schema_context.schema.at("type").to_string() == + "object"}; + if (!assume_object || !schema_context.schema.defines("properties") || + !schema_context.schema.at("properties").is_object()) { + return false; + } + + // `SchemaContext::relative_pointer` is a reference, so the concatenated + // pointer must outlive `new_schema_context` + const auto properties_pointer{ + schema_context.relative_pointer.initial().concat( + sourcemeta::blaze::make_weak_pointer(KEYWORD_PROPERTIES))}; + const SchemaContext new_schema_context{ + .relative_pointer = properties_pointer, + .schema = schema_context.schema, + .vocabularies = schema_context.vocabularies, + .base = schema_context.base, + .is_property_name = schema_context.is_property_name}; + const DynamicContext new_dynamic_context{ + .keyword = KEYWORD_PROPERTIES, + .base_schema_location = sourcemeta::core::empty_weak_pointer, + .base_instance_location = sourcemeta::core::empty_weak_pointer}; + const auto properties{ + compile_properties(context, new_schema_context, new_dynamic_context, {})}; + if (!std::ranges::all_of(properties, [](const auto &property) -> auto { + return property.second.size() == 1 && + property.second.front().type == + InstructionIndex::AssertionTypeStrict; + })) { + return false; + } + + std::set types; + for (const auto &property : properties) { + types.insert(std::get(property.second.front().value)); + } + + return types.size() == 1; +} + auto compiler_draft3_applicator_additionalproperties_with_options( const Context &context, const SchemaContext &schema_context, const DynamicContext &dynamic_context, const bool annotate, @@ -1088,7 +1147,7 @@ auto compiler_draft3_applicator_additionalproperties_with_options( sourcemeta::core::empty_weak_pointer, sourcemeta::core::empty_weak_pointer)}; - if (annotate) { + if (annotate && annotations_enabled(context, dynamic_context.keyword)) { children.push_back( make(sourcemeta::blaze::InstructionIndex::AnnotationBasenameToParent, context, schema_context, relative_dynamic_context(), ValueNone{})); @@ -1117,11 +1176,12 @@ auto compiler_draft3_applicator_additionalproperties_with_options( static const std::string pattern_properties_keyword{ "patternProperties"}; filter_regexes.push_back( - {parse_regex(entry.first, schema_context.base, - schema_context.relative_pointer.initial().concat( - sourcemeta::blaze::make_weak_pointer( - pattern_properties_keyword))), - entry.first}); + {.first = + parse_regex(entry.first, schema_context.base, + schema_context.relative_pointer.initial().concat( + sourcemeta::blaze::make_weak_pointer( + pattern_properties_keyword))), + .second = entry.first}); } } } @@ -1145,17 +1205,24 @@ auto compiler_draft3_applicator_additionalproperties_with_options( return {}; } - // When all properties are required and `additionalProperties: false`, - // the `required` keyword compiles to `AssertionDefinesExactly` which already - // checks that the object has exactly the required properties, so we don't - // need to emit anything for `additionalProperties` + // When all properties are required and `additionalProperties: false`, the + // object is closed by another keyword, so we don't need to emit anything for + // `additionalProperties`. This happens either because `required` compiles to + // an `AssertionDefinesExactly` variant (only when there is more than one + // required property) or because `properties` itself compiles to a closed + // form. With a single required property `required` only compiles to + // `AssertionDefinesStrict`, which does not reject unknown properties, so we + // must still emit the closure unless `properties` enforces it. if (context.mode == Mode::FastValidation && children.size() == 1 && children.front().type == InstructionIndex::AssertionFail && !filter_strings.empty() && filter_prefixes.empty() && - filter_regexes.empty() && - is_closed_properties_required(schema_context.schema, - required_properties(schema_context))) { - return {}; + filter_regexes.empty()) { + const auto required{required_properties(schema_context)}; + if (is_closed_properties_required(schema_context.schema, required) && + (required.size() > 1 || + properties_enforce_closed_object(context, schema_context))) { + return {}; + } } if (context.mode == Mode::FastValidation && filter_strings.empty() && @@ -1256,6 +1323,9 @@ auto compiler_draft3_applicator_items_array( return {}; } + const bool emit_annotation{ + annotate && annotations_enabled(context, dynamic_context.keyword)}; + // Precompile subschemas std::vector subschemas; subschemas.reserve(items_size); @@ -1276,7 +1346,7 @@ auto compiler_draft3_applicator_items_array( } } - if (annotate) { + if (emit_annotation) { subchildren.push_back( make(sourcemeta::blaze::InstructionIndex::AnnotationEmit, context, schema_context, relative_dynamic_context(), @@ -1295,7 +1365,7 @@ auto compiler_draft3_applicator_items_array( } } - if (annotate) { + if (emit_annotation) { tail.push_back(make(sourcemeta::blaze::InstructionIndex::AnnotationEmit, context, schema_context, relative_dynamic_context(), sourcemeta::core::JSON{children.size() - 1})); @@ -1373,8 +1443,11 @@ auto compiler_draft3_applicator_items_with_options( return {}; } + const bool emit_annotation{ + annotate && annotations_enabled(context, dynamic_context.keyword)}; + if (is_schema(schema_context.schema.at(dynamic_context.keyword))) { - if (annotate || track_evaluation) { + if (emit_annotation || track_evaluation) { Instructions subchildren{compile(context, schema_context, relative_dynamic_context(), sourcemeta::core::empty_weak_pointer, @@ -1388,13 +1461,13 @@ auto compiler_draft3_applicator_items_with_options( ValueNone{}, std::move(subchildren))); } - if (!annotate && !track_evaluation) { + if (!emit_annotation && !track_evaluation) { return children; } Instructions tail; - if (annotate) { + if (emit_annotation) { tail.push_back(make(sourcemeta::blaze::InstructionIndex::AnnotationEmit, context, schema_context, relative_dynamic_context(), sourcemeta::core::JSON{true})); @@ -1502,6 +1575,9 @@ auto compiler_draft3_applicator_additionalitems_from_cursor( return {}; } + const bool emit_annotation{ + annotate && annotations_enabled(context, dynamic_context.keyword)}; + Instructions subchildren{compile(context, schema_context, relative_dynamic_context(), sourcemeta::core::empty_weak_pointer, @@ -1526,13 +1602,13 @@ auto compiler_draft3_applicator_additionalitems_from_cursor( } // Avoid one extra wrapper instruction if possible - if (!annotate && !track_evaluation) { + if (!emit_annotation && !track_evaluation) { return children; } Instructions tail; - if (annotate) { + if (emit_annotation) { tail.push_back(make(sourcemeta::blaze::InstructionIndex::AnnotationEmit, context, schema_context, relative_dynamic_context(), sourcemeta::core::JSON{true})); @@ -1979,9 +2055,10 @@ auto compiler_draft3_validation_type(const Context &context, if (context.mode == Mode::FastValidation && schema_context.schema.defines("enum") && schema_context.schema.at("enum").is_array() && - std::ranges::all_of( - schema_context.schema.at("enum").as_array(), - [](const auto &candidate) { return candidate.is_null(); })) { + std::ranges::all_of(schema_context.schema.at("enum").as_array(), + [](const auto &candidate) -> auto { + return candidate.is_null(); + })) { return {}; } @@ -1992,9 +2069,10 @@ auto compiler_draft3_validation_type(const Context &context, if (context.mode == Mode::FastValidation && schema_context.schema.defines("enum") && schema_context.schema.at("enum").is_array() && - std::ranges::all_of( - schema_context.schema.at("enum").as_array(), - [](const auto &candidate) { return candidate.is_boolean(); })) { + std::ranges::all_of(schema_context.schema.at("enum").as_array(), + [](const auto &candidate) -> auto { + return candidate.is_boolean(); + })) { return {}; } @@ -2026,9 +2104,10 @@ auto compiler_draft3_validation_type(const Context &context, if (context.mode == Mode::FastValidation && schema_context.schema.defines("enum") && schema_context.schema.at("enum").is_array() && - std::ranges::all_of( - schema_context.schema.at("enum").as_array(), - [](const auto &candidate) { return candidate.is_object(); })) { + std::ranges::all_of(schema_context.schema.at("enum").as_array(), + [](const auto &candidate) -> auto { + return candidate.is_object(); + })) { return {}; } @@ -2063,9 +2142,10 @@ auto compiler_draft3_validation_type(const Context &context, if (context.mode == Mode::FastValidation && schema_context.schema.defines("enum") && schema_context.schema.at("enum").is_array() && - std::ranges::all_of( - schema_context.schema.at("enum").as_array(), - [](const auto &candidate) { return candidate.is_array(); })) { + std::ranges::all_of(schema_context.schema.at("enum").as_array(), + [](const auto &candidate) -> auto { + return candidate.is_array(); + })) { return {}; } @@ -2076,9 +2156,10 @@ auto compiler_draft3_validation_type(const Context &context, if (context.mode == Mode::FastValidation && schema_context.schema.defines("enum") && schema_context.schema.at("enum").is_array() && - std::ranges::all_of( - schema_context.schema.at("enum").as_array(), - [](const auto &candidate) { return candidate.is_number(); })) { + std::ranges::all_of(schema_context.schema.at("enum").as_array(), + [](const auto &candidate) -> auto { + return candidate.is_number(); + })) { return {}; } @@ -2092,9 +2173,10 @@ auto compiler_draft3_validation_type(const Context &context, if (context.mode == Mode::FastValidation && schema_context.schema.defines("enum") && schema_context.schema.at("enum").is_array() && - std::ranges::all_of( - schema_context.schema.at("enum").as_array(), - [](const auto &candidate) { return candidate.is_integer(); })) { + std::ranges::all_of(schema_context.schema.at("enum").as_array(), + [](const auto &candidate) -> auto { + return candidate.is_integer(); + })) { return {}; } @@ -2124,9 +2206,10 @@ auto compiler_draft3_validation_type(const Context &context, if (context.mode == Mode::FastValidation && schema_context.schema.defines("enum") && schema_context.schema.at("enum").is_array() && - std::ranges::all_of( - schema_context.schema.at("enum").as_array(), - [](const auto &candidate) { return candidate.is_string(); })) { + std::ranges::all_of(schema_context.schema.at("enum").as_array(), + [](const auto &candidate) -> auto { + return candidate.is_string(); + })) { return {}; } @@ -2218,7 +2301,7 @@ auto compiler_draft3_validation_disallow(const Context &context, const auto contains_any{ (value.is_string() && value.to_string() == "any") || (value.is_array() && - std::ranges::any_of(value.as_array(), [](const auto &element) { + std::ranges::any_of(value.as_array(), [](const auto &element) -> auto { return element.is_string() && element.to_string() == "any"; }))}; if (contains_any) { @@ -2517,7 +2600,7 @@ auto compiler_draft3_validation_format(const Context &context, make(sourcemeta::blaze::InstructionIndex::AssertionStringType, context, schema_context, dynamic_context, type)}; - if (context.mode == Mode::Exhaustive) { + if (annotations_enabled(context, dynamic_context.keyword)) { Instructions annotation_children{ make(sourcemeta::blaze::InstructionIndex::AnnotationEmit, context, schema_context, dynamic_context, @@ -2533,7 +2616,7 @@ auto compiler_draft3_validation_format(const Context &context, } if (is_2019_09_format || is_2020_12_format_annotation) { - if (context.mode == Mode::FastValidation) { + if (!annotations_enabled(context, dynamic_context.keyword)) { return {}; } diff --git a/vendor/blaze/src/compiler/default_compiler_draft4.h b/vendor/blaze/src/compiler/default_compiler_draft4.h index 995c3f961..4bca15e89 100644 --- a/vendor/blaze/src/compiler/default_compiler_draft4.h +++ b/vendor/blaze/src/compiler/default_compiler_draft4.h @@ -96,7 +96,7 @@ auto compiler_draft4_applicator_anyof(const Context &context, } if (context.mode == Mode::FastValidation && - std::ranges::all_of(disjunctors, [](const auto &instruction) { + std::ranges::all_of(disjunctors, [](const auto &instruction) -> auto { return instruction.children.size() == 1 && (instruction.children.front().type == sourcemeta::blaze::InstructionIndex::AssertionTypeStrict || @@ -191,8 +191,8 @@ auto compiler_draft4_applicator_not(const Context &context, // TODO: Be smarter about how we treat `unevaluatedItems` like how we do for // `unevaluatedProperties` - const bool track_items{ - std::ranges::any_of(context.unevaluated, [](const auto &dependency) { + const bool track_items{std::ranges::any_of( + context.unevaluated, [](const auto &dependency) -> auto { return dependency.first.ends_with("unevaluatedItems"); })}; diff --git a/vendor/blaze/src/compiler/default_compiler_draft6.h b/vendor/blaze/src/compiler/default_compiler_draft6.h index 0388199d3..25a1d754e 100644 --- a/vendor/blaze/src/compiler/default_compiler_draft6.h +++ b/vendor/blaze/src/compiler/default_compiler_draft6.h @@ -25,7 +25,7 @@ auto compiler_draft6_validation_type(const Context &context, schema_context.schema.at("enum").is_array() && std::ranges::all_of( schema_context.schema.at("enum").as_array(), - [](const auto &value) { return value.is_null(); })) { + [](const auto &value) -> auto { return value.is_null(); })) { return {}; } @@ -44,7 +44,7 @@ auto compiler_draft6_validation_type(const Context &context, schema_context.schema.at("enum").is_array() && std::ranges::all_of( schema_context.schema.at("enum").as_array(), - [](const auto &value) { return value.is_boolean(); })) { + [](const auto &value) -> auto { return value.is_boolean(); })) { return {}; } @@ -82,7 +82,7 @@ auto compiler_draft6_validation_type(const Context &context, schema_context.schema.at("enum").is_array() && std::ranges::all_of( schema_context.schema.at("enum").as_array(), - [](const auto &value) { return value.is_object(); })) { + [](const auto &value) -> auto { return value.is_object(); })) { return {}; } @@ -141,7 +141,7 @@ auto compiler_draft6_validation_type(const Context &context, schema_context.schema.at("enum").is_array() && std::ranges::all_of( schema_context.schema.at("enum").as_array(), - [](const auto &value) { return value.is_array(); })) { + [](const auto &value) -> auto { return value.is_array(); })) { return {}; } @@ -160,7 +160,7 @@ auto compiler_draft6_validation_type(const Context &context, schema_context.schema.at("enum").is_array() && std::ranges::all_of( schema_context.schema.at("enum").as_array(), - [](const auto &value) { return value.is_number(); })) { + [](const auto &value) -> auto { return value.is_number(); })) { return {}; } @@ -182,7 +182,7 @@ auto compiler_draft6_validation_type(const Context &context, schema_context.schema.at("enum").is_array() && std::ranges::all_of( schema_context.schema.at("enum").as_array(), - [](const auto &value) { return value.is_integral(); })) { + [](const auto &value) -> auto { return value.is_integral(); })) { return {}; } @@ -224,7 +224,7 @@ auto compiler_draft6_validation_type(const Context &context, schema_context.schema.at("enum").is_array() && std::ranges::all_of( schema_context.schema.at("enum").as_array(), - [](const auto &value) { return value.is_string(); })) { + [](const auto &value) -> auto { return value.is_string(); })) { return {}; } diff --git a/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler.h b/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler.h index f823bc30d..8fcf6c897 100644 --- a/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler.h +++ b/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler.h @@ -25,6 +25,7 @@ #include // std::string_view #include // std::tuple #include // std::unordered_map +#include // std::unordered_set #include // std::vector /// @defgroup compiler Compiler @@ -98,6 +99,10 @@ struct Tweaks { std::size_t target_inline_threshold{50}; /// When set, force `format` to be compiled as an assertion bool format_assertion{false}; + /// Select which keywords emit annotations in exhaustive mode. When not set, + /// every annotation keyword is emitted + std::optional> + annotations{}; }; /// @ingroup compiler diff --git a/vendor/blaze/src/compiler/postprocess.h b/vendor/blaze/src/compiler/postprocess.h index a959457ae..70504705a 100644 --- a/vendor/blaze/src/compiler/postprocess.h +++ b/vendor/blaze/src/compiler/postprocess.h @@ -67,7 +67,10 @@ is_parent_to_children_instruction(const InstructionIndex type) noexcept inline auto convert_to_property_type_assertions(Instructions &instructions) -> void { for (auto &instruction : instructions) { - if (!instruction.relative_instance_location.empty()) { + if (!instruction.relative_instance_location.empty() && + std::ranges::all_of( + instruction.relative_instance_location, + [](const auto &token) -> auto { return token.is_property(); })) { switch (instruction.type) { case InstructionIndex::AssertionTypeStrict: instruction.type = InstructionIndex::AssertionPropertyTypeStrict; @@ -334,7 +337,7 @@ transform_instruction(Instruction &instruction, Instructions &output, if (!instruction.relative_instance_location.empty() && std::ranges::all_of( instruction.relative_instance_location, - [](const auto &token) { return token.is_property(); })) { + [](const auto &token) -> auto { return token.is_property(); })) { switch (instruction.type) { case InstructionIndex::AssertionTypeStrict: instruction.type = InstructionIndex::AssertionPropertyTypeStrict; diff --git a/vendor/blaze/src/compiler/unevaluated.cc b/vendor/blaze/src/compiler/unevaluated.cc index 66d2f747d..89333ab0c 100644 --- a/vendor/blaze/src/compiler/unevaluated.cc +++ b/vendor/blaze/src/compiler/unevaluated.cc @@ -8,8 +8,10 @@ using namespace sourcemeta::core; using namespace sourcemeta::blaze; using Known = Vocabularies::Known; +// NOLINTBEGIN(bugprone-throwing-static-initialization) static const std::string UNEVALUATED_PROPERTIES{"unevaluatedProperties"}; static const std::string UNEVALUATED_ITEMS{"unevaluatedItems"}; +// NOLINTEND(bugprone-throwing-static-initialization) auto find_adjacent_dependencies( const JSON::String ¤t, const JSON &schema, const SchemaFrame &frame, diff --git a/vendor/blaze/src/configuration/configuration.cc b/vendor/blaze/src/configuration/configuration.cc index 1344d55bb..b6db30db9 100644 --- a/vendor/blaze/src/configuration/configuration.cc +++ b/vendor/blaze/src/configuration/configuration.cc @@ -74,7 +74,7 @@ auto Configuration::applies_to(const std::filesystem::path &path) const const std::string filename{path.filename().string()}; return std::ranges::any_of(this->extension, - [&path, &filename](const auto &suffix) { + [&path, &filename](const auto &suffix) -> auto { if (suffix.empty()) { return path.extension().empty(); } diff --git a/vendor/blaze/src/documentation/documentation.cc b/vendor/blaze/src/documentation/documentation.cc index 83470ef94..19cf2f3c2 100644 --- a/vendor/blaze/src/documentation/documentation.cc +++ b/vendor/blaze/src/documentation/documentation.cc @@ -1545,7 +1545,7 @@ auto to_documentation(const sourcemeta::core::JSON &schema, [[maybe_unused]] const auto canonicalized{canonicalizer.apply( canonical, walker, resolver, [](const auto &, const auto, const auto, const auto &, - [[maybe_unused]] const auto applied) { assert(applied); })}; + [[maybe_unused]] const auto applied) -> auto { assert(applied); })}; assert(canonicalized.first); sourcemeta::blaze::SchemaFrame frame{ diff --git a/vendor/blaze/src/editor/editor.cc b/vendor/blaze/src/editor/editor.cc index 7b3f31660..e5078012f 100644 --- a/vendor/blaze/src/editor/editor.cc +++ b/vendor/blaze/src/editor/editor.cc @@ -115,12 +115,17 @@ auto for_editor(sourcemeta::core::JSON &schema, } reference_changes.push_back( - {sourcemeta::core::to_pointer(key.second), - sourcemeta::core::to_uri(destination.value().get()).recompose(), - keyword, true}); + {.pointer = sourcemeta::core::to_pointer(key.second), + .new_value = sourcemeta::core::to_uri(destination.value().get()) + .recompose(), + .keyword = keyword, + .rename_to_ref = true}); } else { reference_changes.push_back( - {sourcemeta::core::to_pointer(key.second), "", keyword, true}); + {.pointer = sourcemeta::core::to_pointer(key.second), + .new_value = "", + .keyword = keyword, + .rename_to_ref = true}); } } else { if (keyword == "$schema") { @@ -130,10 +135,12 @@ auto for_editor(sourcemeta::core::JSON &schema, const auto origin{frame.traverse(uri_it->second.get())}; assert(origin.has_value()); reference_changes.push_back( - {sourcemeta::core::to_pointer(key.second), - sourcemeta::core::JSON::String{sourcemeta::blaze::to_string( - origin.value().get().base_dialect)}, - keyword, false}); + {.pointer = sourcemeta::core::to_pointer(key.second), + .new_value = + sourcemeta::core::JSON::String{sourcemeta::blaze::to_string( + origin.value().get().base_dialect)}, + .keyword = keyword, + .rename_to_ref = false}); continue; } @@ -142,13 +149,18 @@ auto for_editor(sourcemeta::core::JSON &schema, const bool should_rename = keyword == "$dynamicRef" || keyword == "$recursiveRef"; reference_changes.push_back( - {sourcemeta::core::to_pointer(key.second), - sourcemeta::core::to_uri(result.value().get().pointer) - .recompose(), - keyword, should_rename}); + {.pointer = sourcemeta::core::to_pointer(key.second), + .new_value = + sourcemeta::core::to_uri(result.value().get().pointer) + .recompose(), + .keyword = keyword, + .rename_to_ref = should_rename}); } else { - reference_changes.push_back({sourcemeta::core::to_pointer(key.second), - reference.destination, keyword, false}); + reference_changes.push_back( + {.pointer = sourcemeta::core::to_pointer(key.second), + .new_value = reference.destination, + .keyword = keyword, + .rename_to_ref = false}); } } } @@ -173,12 +185,15 @@ auto for_editor(sourcemeta::core::JSON &schema, const auto vocabularies{frame.vocabularies(entry.second, resolver)}; subschema_changes.push_back( - {sourcemeta::core::to_pointer(entry.second.pointer), - entry.second.base_dialect, add_schema, - vocabularies.contains(sourcemeta::blaze::Vocabularies::Known:: - JSON_Schema_2020_12_Core), - vocabularies.contains(sourcemeta::blaze::Vocabularies::Known:: - JSON_Schema_2019_09_Core)}); + {.pointer = sourcemeta::core::to_pointer(entry.second.pointer), + .base_dialect = entry.second.base_dialect, + .add_schema_declaration = add_schema, + .erase_2020_12_keywords = + vocabularies.contains(sourcemeta::blaze::Vocabularies::Known:: + JSON_Schema_2020_12_Core), + .erase_2019_09_keywords = + vocabularies.contains(sourcemeta::blaze::Vocabularies::Known:: + JSON_Schema_2019_09_Core)}); } } diff --git a/vendor/blaze/src/evaluator/evaluator_describe.cc b/vendor/blaze/src/evaluator/evaluator_describe.cc index e970478c3..cd108e419 100644 --- a/vendor/blaze/src/evaluator/evaluator_describe.cc +++ b/vendor/blaze/src/evaluator/evaluator_describe.cc @@ -67,18 +67,19 @@ auto escape_string(const std::string_view input) -> std::string { } std::string result; - result.resize_and_overwrite(size, [&](char *buffer, std::size_t) { - auto *cursor{buffer}; - *cursor++ = '"'; - for (const auto character : input) { - if (character == '"') { - *cursor++ = '\\'; - } - *cursor++ = character; - } - *cursor++ = '"'; - return static_cast(cursor - buffer); - }); + result.resize_and_overwrite( + size, [&](char *buffer, std::size_t) -> std::size_t { + auto *cursor{buffer}; + *cursor++ = '"'; + for (const auto character : input) { + if (character == '"') { + *cursor++ = '\\'; + } + *cursor++ = character; + } + *cursor++ = '"'; + return static_cast(cursor - buffer); + }); return result; } @@ -285,9 +286,10 @@ auto describe_reference(const sourcemeta::core::JSON &target) -> std::string { auto is_within_keyword(const sourcemeta::core::WeakPointer &evaluate_path, const std::string &keyword) -> bool { - return std::ranges::any_of(evaluate_path, [&keyword](const auto &token) { - return token.is_property() && token.to_property() == keyword; - }); + return std::ranges::any_of( + evaluate_path, [&keyword](const auto &token) -> bool { + return token.is_property() && token.to_property() == keyword; + }); } auto unknown() -> std::string { @@ -391,7 +393,7 @@ auto describe(const bool valid, const Instruction &step, std::ostringstream message; if (std::ranges::any_of(evaluate_path, - [](const auto &token) { + [](const auto &token) -> bool { return token.is_property() && token.to_property() == "propertyNames"; }) && @@ -1498,7 +1500,7 @@ auto describe(const bool valid, const Instruction &step, if (step.type == sourcemeta::blaze::InstructionIndex::AssertionType || step.type == sourcemeta::blaze::InstructionIndex::AssertionTypeStrict) { if (std::ranges::any_of(evaluate_path, - [](const auto &token) { + [](const auto &token) -> bool { return token.is_property() && token.to_property() == "propertyNames"; }) && @@ -1639,7 +1641,7 @@ auto describe(const bool valid, const Instruction &step, if (step.type == sourcemeta::blaze::InstructionIndex::AssertionRegex) { if (std::ranges::any_of(evaluate_path, - [](const auto &token) { + [](const auto &token) -> bool { return token.is_property() && token.to_property() == "propertyNames"; }) && @@ -1912,7 +1914,7 @@ auto describe(const bool valid, const Instruction &step, const auto &value{instruction_value(step)}; if (std::ranges::any_of(evaluate_path, - [](const auto &token) { + [](const auto &token) -> bool { return token.is_property() && token.to_property() == "propertyNames"; }) && @@ -2040,7 +2042,7 @@ auto describe(const bool valid, const Instruction &step, assert(!value.empty()); if (std::ranges::any_of(evaluate_path, - [](const auto &token) { + [](const auto &token) -> bool { return token.is_property() && token.to_property() == "propertyNames"; }) && @@ -2087,7 +2089,7 @@ auto describe(const bool valid, const Instruction &step, assert(!value.empty()); if (std::ranges::any_of(evaluate_path, - [](const auto &token) { + [](const auto &token) -> bool { return token.is_property() && token.to_property() == "propertyNames"; }) && @@ -2660,7 +2662,7 @@ auto describe(const bool valid, const Instruction &step, if (step.type == sourcemeta::blaze::InstructionIndex::ControlJump) { if (std::ranges::any_of(evaluate_path, - [](const auto &token) { + [](const auto &token) -> bool { return token.is_property() && token.to_property() == "propertyNames"; }) && diff --git a/vendor/blaze/src/evaluator/evaluator_json.cc b/vendor/blaze/src/evaluator/evaluator_json.cc index d0b4d2ea6..872310234 100644 --- a/vendor/blaze/src/evaluator/evaluator_json.cc +++ b/vendor/blaze/src/evaluator/evaluator_json.cc @@ -108,10 +108,12 @@ auto instructions_from_json( .schema_resource = std::move(schema_resource_result).value()}); // TODO: Maybe we should emplace here? - result.push_back({std::move(type_result).value(), - std::move(relative_instance_location_result).value(), - std::move(value_result).value(), - std::move(children_result).value(), extra_index}); + result.push_back({.type = std::move(type_result).value(), + .relative_instance_location = + std::move(relative_instance_location_result).value(), + .value = std::move(value_result).value(), + .children = std::move(children_result).value(), + .extra_index = extra_index}); } return result; diff --git a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator.h b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator.h index 39f0b46ad..837a4ba57 100644 --- a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator.h +++ b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator.h @@ -193,10 +193,10 @@ class SOURCEMETA_BLAZE_EVALUATOR_EXPORT Evaluator { const sourcemeta::core::JSON &instance, const Callback *callback) -> bool; - // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) + // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables,bugprone-throwing-static-initialization) static inline const sourcemeta::core::JSON null{nullptr}; static inline const sourcemeta::core::JSON empty_string{""}; - // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) + // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables,bugprone-throwing-static-initialization) // Compute a hash that fits within the IEEE 754 double-precision safe // integer range (2^53 - 1), ensuring the serialized template labels diff --git a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_dispatch.h b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_dispatch.h index bf9fa62dc..9ef01708b 100644 --- a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_dispatch.h +++ b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_dispatch.h @@ -1454,7 +1454,7 @@ INSTRUCTION_HANDLER(ControlDynamicAnchorJump) { const auto label{Evaluator::hash(resource, value)}; const auto match{std::ranges::find_if( context.schema->labels, - [&label](const auto &entry) { return entry.first == label; })}; + [&label](const auto &entry) -> bool { return entry.first == label; })}; if (match != context.schema->labels.cend()) [[likely]] { result = true; assert(match->second < context.schema->targets.size()); @@ -1605,15 +1605,17 @@ INSTRUCTION_HANDLER(LoopPropertiesUnevaluatedExcept) { continue; } - if (std::ranges::any_of(filter_prefixes, [&entry](const auto &prefix) { - return entry.first.starts_with(prefix); - })) { + if (std::ranges::any_of(filter_prefixes, + [&entry](const auto &prefix) -> bool { + return entry.first.starts_with(prefix); + })) { continue; } - if (std::ranges::any_of(filter_regexes, [&entry](const auto &pattern) { - return matches(pattern.first, entry.first); - })) { + if (std::ranges::any_of(filter_regexes, + [&entry](const auto &pattern) -> bool { + return matches(pattern.first, entry.first); + })) { continue; } @@ -1906,15 +1908,17 @@ INSTRUCTION_HANDLER(LoopPropertiesExcept) { continue; } - if (std::ranges::any_of(filter_prefixes, [&entry](const auto &prefix) { - return entry.first.starts_with(prefix); - })) { + if (std::ranges::any_of(filter_prefixes, + [&entry](const auto &prefix) -> bool { + return entry.first.starts_with(prefix); + })) { continue; } - if (std::ranges::any_of(filter_regexes, [&entry](const auto &pattern) { - return matches(pattern.first, entry.first); - })) { + if (std::ranges::any_of(filter_regexes, + [&entry](const auto &pattern) -> bool { + return matches(pattern.first, entry.first); + })) { continue; } @@ -2053,7 +2057,7 @@ INSTRUCTION_HANDLER(LoopPropertiesExactlyTypeStrictHash) { for (; iterator != object.cend(); ++iterator) { // NOLINTNEXTLINE(modernize-use-ranges) if (std::ranges::none_of(value.second.first, - [&iterator](const auto &entry) { + [&iterator](const auto &entry) -> bool { return entry.first == iterator->hash; })) { result = false; @@ -2415,10 +2419,11 @@ INSTRUCTION_HANDLER(LoopItemsPropertiesExactlyTypeStrictHash) { } else if (entry.hash == value.second.first[index].first) { index += 1; continue; - } else if (!std::ranges::any_of(value.second.first, - [&entry](const auto &hash_entry) { - return hash_entry.first == entry.hash; - })) { + } else if (!std::ranges::any_of( + value.second.first, + [&entry](const auto &hash_entry) -> bool { + return hash_entry.first == entry.hash; + })) { result = false; EVALUATE_END(LoopItemsPropertiesExactlyTypeStrictHash); } else { diff --git a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_string_set.h b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_string_set.h index 9386a2e32..964d85be3 100644 --- a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_string_set.h +++ b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_string_set.h @@ -58,18 +58,20 @@ class SOURCEMETA_BLAZE_EVALUATOR_EXPORT StringSet { const auto hash{this->hasher(value)}; if (!this->contains(value, hash)) { this->data.emplace_back(value, hash); - std::ranges::sort(this->data, [](const auto &left, const auto &right) { - return left.first < right.first; - }); + std::ranges::sort(this->data, + [](const auto &left, const auto &right) -> bool { + return left.first < right.first; + }); } } inline auto insert(string_type &&value) -> void { const auto hash{this->hasher(value)}; if (!this->contains(value, hash)) { this->data.emplace_back(std::move(value), hash); - std::ranges::sort(this->data, [](const auto &left, const auto &right) { - return left.first < right.first; - }); + std::ranges::sort(this->data, + [](const auto &left, const auto &right) -> bool { + return left.first < right.first; + }); } } @@ -94,7 +96,7 @@ class SOURCEMETA_BLAZE_EVALUATOR_EXPORT StringSet { } [[nodiscard]] auto to_json() const -> sourcemeta::core::JSON { - return sourcemeta::core::to_json(this->data, [](const auto &item) { + return sourcemeta::core::to_json(this->data, [](const auto &item) -> auto { return sourcemeta::core::to_json(item.first); }); } diff --git a/vendor/blaze/src/foundation/foundation.cc b/vendor/blaze/src/foundation/foundation.cc index e3f54b0ad..589c875f1 100644 --- a/vendor/blaze/src/foundation/foundation.cc +++ b/vendor/blaze/src/foundation/foundation.cc @@ -809,14 +809,16 @@ auto sourcemeta::blaze::schema_keyword_priority( const auto &result{walker(keyword, vocabularies)}; const auto priority_from_dependencies{std::ranges::fold_left( result.dependencies, static_cast(0), - [&vocabularies, &walker](const auto accumulator, const auto &dependency) { + [&vocabularies, &walker](const auto accumulator, + const auto &dependency) -> std::uint64_t { return std::max( accumulator, schema_keyword_priority(dependency, vocabularies, walker) + 1); })}; const auto priority_from_order_dependencies{std::ranges::fold_left( result.order_dependencies, static_cast(0), - [&vocabularies, &walker](const auto accumulator, const auto &dependency) { + [&vocabularies, &walker](const auto accumulator, + const auto &dependency) -> std::uint64_t { return std::max( accumulator, schema_keyword_priority(dependency, vocabularies, walker) + 1); diff --git a/vendor/blaze/src/foundation/known_walker.cc b/vendor/blaze/src/foundation/known_walker.cc index 0185bf942..7c4f1c4b3 100644 --- a/vendor/blaze/src/foundation/known_walker.cc +++ b/vendor/blaze/src/foundation/known_walker.cc @@ -10,9 +10,11 @@ using Known = Vocabularies::Known; using KeywordHandler = const SchemaWalkerResult &(*)(const Vocabularies &vocabularies); +// NOLINTNEXTLINE(bugprone-throwing-static-initialization) static const SchemaWalkerResult UNKNOWN_RESULT{ SchemaKeywordType::Unknown, std::nullopt, {}, {}, {}}; +// NOLINTNEXTLINE(bugprone-throwing-static-initialization) static const SchemaWalkerResult UNKNOWN_WITH_REF_RESULT{ SchemaKeywordType::Unknown, std::nullopt, {"$ref"}, {}, {}}; diff --git a/vendor/blaze/src/foundation/vocabularies.cc b/vendor/blaze/src/foundation/vocabularies.cc index 62b9ad041..081d1d50c 100644 --- a/vendor/blaze/src/foundation/vocabularies.cc +++ b/vendor/blaze/src/foundation/vocabularies.cc @@ -104,6 +104,7 @@ sourcemeta::blaze::Vocabularies::Vocabularies( } } +// NOLINTNEXTLINE(bugprone-exception-escape) auto sourcemeta::blaze::Vocabularies::contains( const sourcemeta::core::JSON::String &uri) const noexcept -> bool { if (this->unknown.has_value()) { @@ -142,6 +143,7 @@ auto sourcemeta::blaze::Vocabularies::contains_any( return false; } +// NOLINTNEXTLINE(bugprone-exception-escape) auto sourcemeta::blaze::Vocabularies::insert( const sourcemeta::core::JSON::String &uri, bool required) noexcept -> void { // We NEED to allow official vocabulary string URIs here, as that's how @@ -171,6 +173,7 @@ auto sourcemeta::blaze::Vocabularies::insert(Known vocabulary, assert((this->required_known & this->optional_known).none()); } +// NOLINTNEXTLINE(bugprone-exception-escape) auto sourcemeta::blaze::Vocabularies::get( const sourcemeta::core::JSON::String &uri) const noexcept -> std::optional { @@ -218,7 +221,7 @@ auto sourcemeta::blaze::Vocabularies::empty() const noexcept -> bool { } auto sourcemeta::blaze::Vocabularies::has_unknown() const noexcept -> bool { - return this->unknown.has_value() && !this->unknown.value().empty(); + return this->unknown.has_value() && !this->unknown->empty(); } auto sourcemeta::blaze::operator<<(std::ostream &stream, diff --git a/vendor/blaze/src/foundation/walker.cc b/vendor/blaze/src/foundation/walker.cc index 39463db07..869435db0 100644 --- a/vendor/blaze/src/foundation/walker.cc +++ b/vendor/blaze/src/foundation/walker.cc @@ -86,8 +86,22 @@ auto walk(const std::optional &parent, const auto current_dialect{entry.dialect}; const auto current_base_dialect{entry.base_dialect}; + // A subschema may be an embedded resource that pins its own custom + // meta-schema inside its own `$defs`/`definitions`. Probe for it here, the + // same way we do at the document root, so that nested self-contained + // meta-schemas resolve to their embedded definition before the resolver const auto vocabularies{sourcemeta::blaze::vocabularies( - resolver, current_base_dialect, current_dialect)}; + [&subschema, &resolver](const std::string_view identifier) + -> std::optional { + const auto *embedded{sourcemeta::blaze::metaschema_try_embedded( + subschema, identifier, resolver)}; + if (embedded) { + return *embedded; + } + + return resolver(identifier); + }, + current_base_dialect, current_dialect)}; if (type == SchemaWalkerType_t::Deep || level > 0) { sourcemeta::blaze::SchemaIteratorEntry iterator_entry{ diff --git a/vendor/blaze/src/frame/frame.cc b/vendor/blaze/src/frame/frame.cc index 4fca9ba59..690e3ec45 100644 --- a/vendor/blaze/src/frame/frame.cc +++ b/vendor/blaze/src/frame/frame.cc @@ -18,10 +18,12 @@ enum class AnchorType : std::uint8_t { Static, Dynamic, All }; // Static keyword strings for reference pointers +// NOLINTBEGIN(bugprone-throwing-static-initialization) static const std::string KEYWORD_SCHEMA{"$schema"}; static const std::string KEYWORD_REF{"$ref"}; static const std::string KEYWORD_RECURSIVE_REF{"$recursiveRef"}; static const std::string KEYWORD_DYNAMIC_REF{"$dynamicRef"}; +// NOLINTEND(bugprone-throwing-static-initialization) namespace { @@ -612,7 +614,7 @@ auto SchemaFrame::analyse(const sourcemeta::core::JSON &root, for (const auto &path : paths) { // Passing paths that overlap is undefined behavior. No path should // start with another one, else you are doing something wrong - assert(std::ranges::all_of(paths, [&path](const auto &other) { + assert(std::ranges::all_of(paths, [&path](const auto &other) -> bool { return path == other || !path.starts_with(other); })); @@ -683,6 +685,25 @@ auto SchemaFrame::analyse(const sourcemeta::core::JSON &root, DialectAtPointer{.dialects = {entry.dialect}, .base_dialect = entry.base_dialect.value()}}); + // A nested resource may pin a custom meta-schema inside its own + // containers. Cache it here, just like we do for the root, so that later + // vocabulary lookups (which consult this cache) can resolve it. If the + // same meta-schema identifier is embedded in more than one place with a + // different definition, that is an ambiguity we refuse to resolve + if (!sourcemeta::blaze::to_base_dialect(entry.dialect).has_value()) { + const sourcemeta::core::JSON::String dialect_key{entry.dialect}; + const auto *embedded{sourcemeta::blaze::metaschema_try_embedded( + entry.subschema.get(), entry.dialect, resolver)}; + if (embedded) { + const auto match{this->probed_metaschemas_.find(dialect_key)}; + if (match == this->probed_metaschemas_.cend()) { + this->probed_metaschemas_.emplace(dialect_key, embedded); + } else if (*(match->second) != *embedded) { + throw_already_exists(dialect_key); + } + } + } + // Schema identifier // We need to store the default_id in a local variable to ensure // it survives the identify() call, as identify() returns a string_view @@ -1208,8 +1229,8 @@ auto SchemaFrame::analyse(const sourcemeta::core::JSON &root, } // A schema is standalone if all references can be resolved within itself - this->standalone_ = - std::ranges::all_of(this->references_, [&](const auto &reference) { + this->standalone_ = std::ranges::all_of( + this->references_, [&](const auto &reference) -> bool { assert(!reference.first.second.empty()); assert(reference.first.second.back().is_property()); // TODO: This check might need to be more elaborate given @@ -1265,8 +1286,10 @@ auto SchemaFrame::analyse(const sourcemeta::core::JSON &root, SchemaFrame::References::key_type{SchemaReferenceType::Static, reference.first.second}, SchemaFrame::References::mapped_type{ - reference.second.original, *match->second.front(), - std::string_view{}, std::nullopt}); + .original = reference.second.original, + .destination = *match->second.front(), + .base = std::string_view{}, + .fragment = std::nullopt}); } // Because we can't mutate a map as we are traversing it diff --git a/vendor/blaze/src/output/output_simple.cc b/vendor/blaze/src/output/output_simple.cc index c0c49f771..bc0f0b93f 100644 --- a/vendor/blaze/src/output/output_simple.cc +++ b/vendor/blaze/src/output/output_simple.cc @@ -139,10 +139,11 @@ auto SimpleOutput::operator()( for (const auto &mask_entry : this->mask) { if (evaluate_path.starts_with(mask_entry.first)) { this->masked_traces[mask_entry].push_back( - {describe(result, step, evaluate_path, instance_location, - this->instance_, annotation), - instance_location, std::move(effective_evaluate_path), - step_metadata.keyword_location}); + {.message = describe(result, step, evaluate_path, instance_location, + this->instance_, annotation), + .instance_location = instance_location, + .evaluate_path = std::move(effective_evaluate_path), + .schema_location = step_metadata.keyword_location}); return; } @@ -150,10 +151,11 @@ auto SimpleOutput::operator()( } this->output.push_back( - {describe(result, step, evaluate_path, instance_location, this->instance_, - annotation), - instance_location, std::move(effective_evaluate_path), - step_metadata.keyword_location}); + {.message = describe(result, step, evaluate_path, instance_location, + this->instance_, annotation), + .instance_location = instance_location, + .evaluate_path = std::move(effective_evaluate_path), + .schema_location = step_metadata.keyword_location}); } } // namespace sourcemeta::blaze diff --git a/vendor/core/CMakeLists.txt b/vendor/core/CMakeLists.txt index 158c5bcfe..22f9a9a78 100644 --- a/vendor/core/CMakeLists.txt +++ b/vendor/core/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.24) project(core VERSION 0.0.0 LANGUAGES C CXX ASM_MASM DESCRIPTION "Sourcemeta Core") list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") @@ -22,6 +22,7 @@ option(SOURCEMETA_CORE_IP "Build the Sourcemeta Core IP library" ON) option(SOURCEMETA_CORE_IDNA "Build the Sourcemeta Core IDNA library" ON) option(SOURCEMETA_CORE_DNS "Build the Sourcemeta Core DNS library" ON) option(SOURCEMETA_CORE_EMAIL "Build the Sourcemeta Core Email library" ON) +option(SOURCEMETA_CORE_LANGTAG "Build the Sourcemeta Core LangTag library" ON) option(SOURCEMETA_CORE_URI "Build the Sourcemeta Core URI library" ON) option(SOURCEMETA_CORE_URITEMPLATE "Build the Sourcemeta Core URI Template library" ON) option(SOURCEMETA_CORE_JSON "Build the Sourcemeta Core JSON library" ON) @@ -150,6 +151,10 @@ if(SOURCEMETA_CORE_EMAIL) add_subdirectory(src/core/email) endif() +if(SOURCEMETA_CORE_LANGTAG) + add_subdirectory(src/core/langtag) +endif() + if(SOURCEMETA_CORE_URI) add_subdirectory(src/core/uri) endif() @@ -313,6 +318,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/email) endif() + if(SOURCEMETA_CORE_LANGTAG) + add_subdirectory(test/langtag) + endif() + if(SOURCEMETA_CORE_URI) add_subdirectory(test/uri) endif() diff --git a/vendor/core/DEPENDENCIES b/vendor/core/DEPENDENCIES index fb683d434..c50b56236 100644 --- a/vendor/core/DEPENDENCIES +++ b/vendor/core/DEPENDENCIES @@ -11,3 +11,4 @@ googlebenchmark https://github.com/google/benchmark 378fe693a1ef51500db21b11ff05 libdeflate https://github.com/ebiggers/libdeflate v1.25 unicodetools https://github.com/unicode-org/unicodetools final-17.0-20250910 jose-cookbook https://github.com/ietf-jose/cookbook 13692b68bfc18b99557a5b1ed311fd5077bfff04 +w3c-json-ld https://github.com/w3c/json-ld-api 8654ac22b6cf4f441d2fee915ae634d36b5a8067 diff --git a/vendor/core/cmake/FindGoogleTest.cmake b/vendor/core/cmake/FindGoogleTest.cmake index 18eb1184e..8559f2299 100644 --- a/vendor/core/cmake/FindGoogleTest.cmake +++ b/vendor/core/cmake/FindGoogleTest.cmake @@ -3,4 +3,13 @@ if(NOT GoogleTest_FOUND) set(INSTALL_GTEST OFF CACHE BOOL "disable installation") add_subdirectory("${PROJECT_SOURCE_DIR}/vendor/googletest") set(GoogleTest_FOUND ON) + + # GoogleTest is vendored as-is and is not held to this project's strict + # warning policy, so do not let its own diagnostics fail the build + foreach(googletest_target gtest gtest_main gmock gmock_main) + if(TARGET ${googletest_target}) + set_target_properties("${googletest_target}" + PROPERTIES COMPILE_WARNING_AS_ERROR OFF) + endif() + endforeach() endif() diff --git a/vendor/core/cmake/common/clang-tidy.cmake b/vendor/core/cmake/common/clang-tidy.cmake index 7967f1044..f9d78a731 100644 --- a/vendor/core/cmake/common/clang-tidy.cmake +++ b/vendor/core/cmake/common/clang-tidy.cmake @@ -5,19 +5,19 @@ function(sourcemeta_clang_tidy_attempt_install) endif() # See https://pypi.org/project/clang-tidy/ - set(CLANG_TIDY_BINARY_VERSION "20.1.0") + set(CLANG_TIDY_BINARY_VERSION "22.1.7") set(CLANG_TIDY_BINARY_Windows_AMD64 "clang_tidy-${CLANG_TIDY_BINARY_VERSION}-py2.py3-none-win_amd64.whl") set(CLANG_TIDY_BINARY_MSYS_x86_64 "clang_tidy-${CLANG_TIDY_BINARY_VERSION}-py2.py3-none-win_amd64.whl") set(CLANG_TIDY_BINARY_Darwin_arm64 "clang_tidy-${CLANG_TIDY_BINARY_VERSION}-py2.py3-none-macosx_11_0_arm64.whl") set(CLANG_TIDY_BINARY_Darwin_x86_64 "clang_tidy-${CLANG_TIDY_BINARY_VERSION}-py2.py3-none-macosx_10_9_x86_64.whl") - set(CLANG_TIDY_BINARY_Linux_aarch64 "clang_tidy-${CLANG_TIDY_BINARY_VERSION}-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl") - set(CLANG_TIDY_BINARY_Linux_x86_64 "clang_tidy-${CLANG_TIDY_BINARY_VERSION}-py2.py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl") - set(CLANG_TIDY_BINARY_CHECKSUM_Windows_AMD64 "02/f0/dd985d9d9b76f8c39f1995aa475d8d5aabbea0d3e0cf498df44dc7bf1cb0") - set(CLANG_TIDY_BINARY_CHECKSUM_MSYS_x86_64 "02/f0/dd985d9d9b76f8c39f1995aa475d8d5aabbea0d3e0cf498df44dc7bf1cb0") - set(CLANG_TIDY_BINARY_CHECKSUM_Darwin_arm64 "95/02/838baf08764b08327322096bda55e8d1e2344e4a13b9308e5642cfaafd8e") - set(CLANG_TIDY_BINARY_CHECKSUM_Darwin_x86_64 "6d/5b/dcfc84b895d8544e00186738ca85132bbd14db4d11dbe39502630ece5391") - set(CLANG_TIDY_BINARY_CHECKSUM_Linux_aarch64 "be/61/9e1a0797639e81c41d38d7b8b2508a9be4b05b9a23baa9d64e7284d07238") - set(CLANG_TIDY_BINARY_CHECKSUM_Linux_x86_64 "52/76/42c61be1c1fdf8bacdbb265f0cd3e11321fee7362f91fa840717a6a41ad6") + set(CLANG_TIDY_BINARY_Linux_aarch64 "clang_tidy-${CLANG_TIDY_BINARY_VERSION}-py2.py3-none-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl") + set(CLANG_TIDY_BINARY_Linux_x86_64 "clang_tidy-${CLANG_TIDY_BINARY_VERSION}-py2.py3-none-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl") + set(CLANG_TIDY_BINARY_CHECKSUM_Windows_AMD64 "51/9b/755d77e51e8aebd03e287dc19727d5c4ca286fd419ff4cf5bd904f7b0366") + set(CLANG_TIDY_BINARY_CHECKSUM_MSYS_x86_64 "51/9b/755d77e51e8aebd03e287dc19727d5c4ca286fd419ff4cf5bd904f7b0366") + set(CLANG_TIDY_BINARY_CHECKSUM_Darwin_arm64 "e2/eb/bd4179187eb12348350d9de3a3124fb1466f0fd5ff33619ed45575752683") + set(CLANG_TIDY_BINARY_CHECKSUM_Darwin_x86_64 "bc/4e/de59bc2bda314fa0622b967d0542e16c78e156038827422740fc9749a3b0") + set(CLANG_TIDY_BINARY_CHECKSUM_Linux_aarch64 "27/8d/f21976010dfd1746d5eac80bccaa364f56df61ab968c6d9279513e21f945") + set(CLANG_TIDY_BINARY_CHECKSUM_Linux_x86_64 "43/69/3be98747ee4e8aedcfc4525a8e0c1576118bcd4de9a560d84a2a398158ce") set(CLANG_TIDY_BINARY_NAME_Windows_AMD64 "clang-tidy.exe") set(CLANG_TIDY_BINARY_NAME_MSYS_x86_64 "clang-tidy.exe") set(CLANG_TIDY_BINARY_NAME_Darwin_arm64 "clang-tidy") @@ -44,7 +44,10 @@ function(sourcemeta_clang_tidy_attempt_install) # Download and extract the pre-built binary ZIP if needed set(CLANG_TIDY_BINARY_NAME "${${CLANG_TIDY_BINARY_NAME_VAR}}") set(CLANG_TIDY_BINARY_OUTPUT "${SOURCEMETA_TARGET_CLANG_TIDY_ATTEMPT_INSTALL_OUTPUT_DIRECTORY}/${CLANG_TIDY_BINARY_NAME}") - if(EXISTS "${CLANG_TIDY_BINARY_OUTPUT}") + get_filename_component(CLANG_TIDY_BINARY_ROOT + "${SOURCEMETA_TARGET_CLANG_TIDY_ATTEMPT_INSTALL_OUTPUT_DIRECTORY}" DIRECTORY) + set(CLANG_TIDY_BINARY_RESOURCE_DIRECTORY "${CLANG_TIDY_BINARY_ROOT}/lib/clang") + if(EXISTS "${CLANG_TIDY_BINARY_OUTPUT}" AND IS_DIRECTORY "${CLANG_TIDY_BINARY_RESOURCE_DIRECTORY}") message(STATUS "Found existing `clang-tidy` pre-built binary at ${CLANG_TIDY_BINARY_OUTPUT}") return() endif() @@ -67,12 +70,18 @@ function(sourcemeta_clang_tidy_attempt_install) file(MAKE_DIRECTORY "${CLANG_TIDY_BINARY_EXTRACT_DIR}") file(ARCHIVE_EXTRACT INPUT "${CLANG_TIDY_BINARY_WHEEL}" DESTINATION "${CLANG_TIDY_BINARY_EXTRACT_DIR}") - # Install the binary + # Install the binary alongside the resource directory that ships in the + # wheel, replicating the upstream `bin` and `lib` layout so that clang-tidy + # discovers its own matching built-in headers. Otherwise it falls back to the + # system compiler resource directory, which breaks whenever that compiler is + # newer than the clang-tidy front-end file(MAKE_DIRECTORY "${SOURCEMETA_TARGET_CLANG_TIDY_ATTEMPT_INSTALL_OUTPUT_DIRECTORY}") file(COPY "${CLANG_TIDY_BINARY_EXTRACT_DIR}/clang_tidy/data/bin/${CLANG_TIDY_BINARY_NAME}" DESTINATION "${SOURCEMETA_TARGET_CLANG_TIDY_ATTEMPT_INSTALL_OUTPUT_DIRECTORY}") file(CHMOD "${CLANG_TIDY_BINARY_OUTPUT}" PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + file(COPY "${CLANG_TIDY_BINARY_EXTRACT_DIR}/clang_tidy/data/lib" + DESTINATION "${CLANG_TIDY_BINARY_ROOT}") message(STATUS "Installed `clang-tidy` pre-built binary to ${CLANG_TIDY_BINARY_OUTPUT}") endfunction() @@ -102,13 +111,10 @@ function(sourcemeta_clang_tidy_attempt_enable) set(CLANG_TIDY_CONFIG "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/clang-tidy.json") execute_process(COMMAND xcrun --show-sdk-path OUTPUT_VARIABLE MACOSX_SDK_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) - execute_process(COMMAND "${CMAKE_CXX_COMPILER}" -print-resource-dir - OUTPUT_VARIABLE MACOSX_RESOURCE_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) set(SOURCEMETA_CXX_CLANG_TIDY "${CLANG_TIDY_BIN};--config-file=${CLANG_TIDY_CONFIG};-header-filter=${PROJECT_SOURCE_DIR}/src/*" "--extra-arg=-isysroot" "--extra-arg=${MACOSX_SDK_PATH}" - "--extra-arg=-resource-dir=${MACOSX_RESOURCE_PATH}" CACHE STRING "CXX_CLANG_TIDY") endif() diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index 16d87fed7..5867d812e 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -18,6 +18,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS idna) list(APPEND SOURCEMETA_CORE_COMPONENTS dns) list(APPEND SOURCEMETA_CORE_COMPONENTS email) + list(APPEND SOURCEMETA_CORE_COMPONENTS langtag) list(APPEND SOURCEMETA_CORE_COMPONENTS uri) list(APPEND SOURCEMETA_CORE_COMPONENTS uritemplate) list(APPEND SOURCEMETA_CORE_COMPONENTS json) @@ -84,14 +85,19 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_punycode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_idna.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_dns.cmake") elseif(component STREQUAL "email") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_punycode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_idna.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_dns.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_email.cmake") + elseif(component STREQUAL "langtag") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_langtag.cmake") elseif(component STREQUAL "uri") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") @@ -184,6 +190,7 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jose.cmake") elseif(component STREQUAL "semver") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_semver.cmake") elseif(component STREQUAL "gzip") find_dependency(LibDeflate CONFIG) diff --git a/vendor/core/src/core/dns/CMakeLists.txt b/vendor/core/src/core/dns/CMakeLists.txt index 1154d4114..3368ace46 100644 --- a/vendor/core/src/core/dns/CMakeLists.txt +++ b/vendor/core/src/core/dns/CMakeLists.txt @@ -8,4 +8,5 @@ endif() target_link_libraries(sourcemeta_core_dns PRIVATE sourcemeta::core::unicode PRIVATE sourcemeta::core::punycode - PRIVATE sourcemeta::core::idna) + PRIVATE sourcemeta::core::idna + PRIVATE sourcemeta::core::text) diff --git a/vendor/core/src/core/dns/hostname.cc b/vendor/core/src/core/dns/hostname.cc index 4a980f7e2..31aa097c4 100644 --- a/vendor/core/src/core/dns/hostname.cc +++ b/vendor/core/src/core/dns/hostname.cc @@ -1,19 +1,12 @@ #include #include +#include #include // std::string #include // std::string_view namespace sourcemeta::core { -// RFC 952 §B: let-dig = ALPHA / DIGIT -// RFC 1123 §2.1: first character of a label is letter or digit -static constexpr auto is_let_dig(const char character) -> bool { - return (character >= 'A' && character <= 'Z') || - (character >= 'a' && character <= 'z') || - (character >= '0' && character <= '9'); -} - auto is_hostname(const std::string_view value) -> bool { // RFC 952 §B: requires at least one if (value.empty()) { @@ -46,7 +39,8 @@ auto is_hostname(const std::string_view value) -> bool { continue; } - if (is_let_dig(character)) { + // RFC 952 §B: let-dig = ALPHA / DIGIT + if (is_alphanum(character)) { last_was_hyphen = false; position += 1; label_has_content = true; diff --git a/vendor/core/src/core/email/CMakeLists.txt b/vendor/core/src/core/email/CMakeLists.txt index 22828bcf3..dcbc083e9 100644 --- a/vendor/core/src/core/email/CMakeLists.txt +++ b/vendor/core/src/core/email/CMakeLists.txt @@ -11,3 +11,5 @@ target_link_libraries(sourcemeta_core_email PRIVATE sourcemeta::core::ip) target_link_libraries(sourcemeta_core_email PRIVATE sourcemeta::core::unicode) +target_link_libraries(sourcemeta_core_email + PRIVATE sourcemeta::core::text) diff --git a/vendor/core/src/core/email/helpers.h b/vendor/core/src/core/email/helpers.h index dd93f5bf4..0a37ae526 100644 --- a/vendor/core/src/core/email/helpers.h +++ b/vendor/core/src/core/email/helpers.h @@ -2,6 +2,7 @@ #define SOURCEMETA_CORE_EMAIL_HELPERS_H_ #include +#include #include // std::uint8_t, std::uint16_t #include // std::string_view @@ -34,9 +35,7 @@ inline constexpr auto is_atext(const char character) -> bool { case '~': return true; default: - return (character >= 'A' && character <= 'Z') || - (character >= 'a' && character <= 'z') || - (character >= '0' && character <= '9'); + return sourcemeta::core::is_alphanum(character); } } @@ -47,13 +46,6 @@ inline constexpr auto is_qtext_smtp(const unsigned char character) -> bool { (character >= 93 && character <= 126); } -// RFC 5321 §4.1.2: Let-dig = ALPHA / DIGIT -inline constexpr auto is_let_dig(const char character) -> bool { - return (character >= 'A' && character <= 'Z') || - (character >= 'a' && character <= 'z') || - (character >= '0' && character <= '9'); -} - // RFC 5321 §4.1.3: dcontent = %d33-90 / %d94-126 inline constexpr auto is_dcontent(const unsigned char character) -> bool { return (character >= 33 && character <= 90) || @@ -63,13 +55,13 @@ inline constexpr auto is_dcontent(const unsigned char character) -> bool { // RFC 5321 §4.1.2: Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig // RFC 5321 §4.1.3: Standardized-tag = Ldh-str inline constexpr auto is_ldh_str(const std::string_view value) -> bool { - if (value.empty() || !is_let_dig(value.back())) { + if (value.empty() || !sourcemeta::core::is_alphanum(value.back())) { return false; } for (std::string_view::size_type position{0}; position + 1 < value.size(); position += 1) { const auto character{value[position]}; - if (!is_let_dig(character) && character != '-') { + if (!sourcemeta::core::is_alphanum(character) && character != '-') { return false; } } diff --git a/vendor/core/src/core/gzip/gzip.cc b/vendor/core/src/core/gzip/gzip.cc index 9ee3375a3..a27104218 100644 --- a/vendor/core/src/core/gzip/gzip.cc +++ b/vendor/core/src/core/gzip/gzip.cc @@ -24,7 +24,8 @@ auto gzip(const std::uint8_t *input, const std::size_t size, const int level) // avoids zero-filling multi-megabyte allocations that are immediately // discarded output.resize_and_overwrite( - max_size, [&](char *const buffer, const std::size_t capacity) { + max_size, + [&](char *const buffer, const std::size_t capacity) -> std::size_t { actual_size = libdeflate_gzip_compress(compressor.get(), input, size, buffer, capacity); return capacity; @@ -57,12 +58,14 @@ auto gunzip(const std::uint8_t *input, const std::size_t size, // libdeflate writes only the decompressed bytes, so leaving the buffer // uninitialised avoids zero-filling multi-megabyte allocations on every // retry of the doubling loop - output.resize_and_overwrite(capacity, [&](char *const buffer, - const std::size_t buffer_size) { - result = libdeflate_gzip_decompress(decompressor.get(), input, size, - buffer, buffer_size, &actual_size); - return buffer_size; - }); + output.resize_and_overwrite( + capacity, + [&](char *const buffer, const std::size_t buffer_size) -> std::size_t { + result = + libdeflate_gzip_decompress(decompressor.get(), input, size, + buffer, buffer_size, &actual_size); + return buffer_size; + }); if (result == LIBDEFLATE_SUCCESS) { output.resize(actual_size); diff --git a/vendor/core/src/core/html/escape.cc b/vendor/core/src/core/html/escape.cc index 57ffbc822..5c0bc1c27 100644 --- a/vendor/core/src/core/html/escape.cc +++ b/vendor/core/src/core/html/escape.cc @@ -35,63 +35,64 @@ auto html_escape(std::string &text) -> void { } // Write escaped characters backwards to avoid overwriting unprocessed data - text.resize_and_overwrite(required_size, - [original_size](char *buffer, std::size_t count) { - auto read_position = original_size; - auto write_position = count; - - while (read_position > 0) { - --read_position; - const auto character = buffer[read_position]; - - switch (character) { - case '&': - write_position -= 5; - buffer[write_position] = '&'; - buffer[write_position + 1] = 'a'; - buffer[write_position + 2] = 'm'; - buffer[write_position + 3] = 'p'; - buffer[write_position + 4] = ';'; - break; - case '<': - write_position -= 4; - buffer[write_position] = '&'; - buffer[write_position + 1] = 'l'; - buffer[write_position + 2] = 't'; - buffer[write_position + 3] = ';'; - break; - case '>': - write_position -= 4; - buffer[write_position] = '&'; - buffer[write_position + 1] = 'g'; - buffer[write_position + 2] = 't'; - buffer[write_position + 3] = ';'; - break; - case '"': - write_position -= 6; - buffer[write_position] = '&'; - buffer[write_position + 1] = 'q'; - buffer[write_position + 2] = 'u'; - buffer[write_position + 3] = 'o'; - buffer[write_position + 4] = 't'; - buffer[write_position + 5] = ';'; - break; - case '\'': - write_position -= 5; - buffer[write_position] = '&'; - buffer[write_position + 1] = '#'; - buffer[write_position + 2] = '3'; - buffer[write_position + 3] = '9'; - buffer[write_position + 4] = ';'; - break; - default: - --write_position; - buffer[write_position] = character; - } - } - - return count; - }); + text.resize_and_overwrite( + required_size, + [original_size](char *buffer, std::size_t count) -> std::size_t { + auto read_position = original_size; + auto write_position = count; + + while (read_position > 0) { + --read_position; + const auto character = buffer[read_position]; + + switch (character) { + case '&': + write_position -= 5; + buffer[write_position] = '&'; + buffer[write_position + 1] = 'a'; + buffer[write_position + 2] = 'm'; + buffer[write_position + 3] = 'p'; + buffer[write_position + 4] = ';'; + break; + case '<': + write_position -= 4; + buffer[write_position] = '&'; + buffer[write_position + 1] = 'l'; + buffer[write_position + 2] = 't'; + buffer[write_position + 3] = ';'; + break; + case '>': + write_position -= 4; + buffer[write_position] = '&'; + buffer[write_position + 1] = 'g'; + buffer[write_position + 2] = 't'; + buffer[write_position + 3] = ';'; + break; + case '"': + write_position -= 6; + buffer[write_position] = '&'; + buffer[write_position + 1] = 'q'; + buffer[write_position + 2] = 'u'; + buffer[write_position + 3] = 'o'; + buffer[write_position + 4] = 't'; + buffer[write_position + 5] = ';'; + break; + case '\'': + write_position -= 5; + buffer[write_position] = '&'; + buffer[write_position + 1] = '#'; + buffer[write_position + 2] = '3'; + buffer[write_position + 3] = '9'; + buffer[write_position + 4] = ';'; + break; + default: + --write_position; + buffer[write_position] = character; + } + } + + return count; + }); } static auto needs_escape(const std::string_view input) -> bool { diff --git a/vendor/core/src/core/http/accept_includes_all.cc b/vendor/core/src/core/http/accept_includes_all.cc index da5a37dee..d64145b38 100644 --- a/vendor/core/src/core/http/accept_includes_all.cc +++ b/vendor/core/src/core/http/accept_includes_all.cc @@ -28,7 +28,8 @@ auto http_accept_includes_all( std::uint8_t best_specificity{0}; http_for_each_accept_entry( accept_header, - [&](const std::string_view value, const float quality) noexcept { + [&](const std::string_view value, + const float quality) noexcept -> void { const std::uint8_t specificity{ http_media_specificity(value, media_type)}; if (specificity == 0) { diff --git a/vendor/core/src/core/http/field_list.cc b/vendor/core/src/core/http/field_list.cc index 265081f7d..6e94a8203 100644 --- a/vendor/core/src/core/http/field_list.cc +++ b/vendor/core/src/core/http/field_list.cc @@ -12,17 +12,18 @@ auto http_field_list_contains_any( const std::string_view header_value, std::initializer_list tokens) noexcept -> bool { bool found{false}; - http_for_each_field_value(header_value, [&](const std::string_view value) { - if (found) { - return; - } - for (const auto token : tokens) { - if (value == token) { - found = true; - return; - } - } - }); + http_for_each_field_value(header_value, + [&](const std::string_view value) -> void { + if (found) { + return; + } + for (const auto token : tokens) { + if (value == token) { + found = true; + return; + } + } + }); return found; } diff --git a/vendor/core/src/core/http/helpers.h b/vendor/core/src/core/http/helpers.h index 40d78b2b5..826db7ecc 100644 --- a/vendor/core/src/core/http/helpers.h +++ b/vendor/core/src/core/http/helpers.h @@ -219,8 +219,9 @@ inline auto http_extract_quality(const std::string_view parameters) noexcept -> float { float quality{1.0f}; http_for_each_parameter( - parameters, [&quality](const std::string_view name, - const std::string_view value) noexcept { + parameters, + [&quality](const std::string_view name, + const std::string_view value) noexcept -> void { if (name.size() == 1 && (name[0] == 'q' || name[0] == 'Q')) { quality = http_parse_qvalue(value); } @@ -231,24 +232,26 @@ inline auto http_extract_quality(const std::string_view parameters) noexcept template inline auto http_for_each_accept_entry(const std::string_view header, Visitor visit) -> void { - http_for_each_list_entry(header, [&visit](const std::string_view entry) { - const auto [value, parameters] = http_split_entry(entry); - if (!value.empty()) { - visit(value, http_extract_quality(parameters)); - } - }); + http_for_each_list_entry( + header, [&visit](const std::string_view entry) -> auto { + const auto [value, parameters] = http_split_entry(entry); + if (!value.empty()) { + visit(value, http_extract_quality(parameters)); + } + }); } template inline auto http_for_each_field_value(const std::string_view header, Visitor visit) -> void { - http_for_each_list_entry(header, [&visit](const std::string_view entry) { - const auto [value, parameters] = http_split_entry(entry); - (void)parameters; - if (!value.empty()) { - visit(value); - } - }); + http_for_each_list_entry( + header, [&visit](const std::string_view entry) -> auto { + const auto [value, parameters] = http_split_entry(entry); + (void)parameters; + if (!value.empty()) { + visit(value); + } + }); } } // namespace sourcemeta::core diff --git a/vendor/core/src/core/http/include/sourcemeta/core/http_message.h b/vendor/core/src/core/http/include/sourcemeta/core/http_message.h index 29f8763a8..c6a62322b 100644 --- a/vendor/core/src/core/http/include/sourcemeta/core/http_message.h +++ b/vendor/core/src/core/http/include/sourcemeta/core/http_message.h @@ -175,24 +175,27 @@ template } inline auto http_parse_headers(const std::string_view input, Container &headers) -> void { - http_parse_headers(input, [&headers](const std::string_view name, - const std::string_view value) { - if (name.empty()) { - // RFC 9112 §5.2 mandates replacing "each received obs-fold with - // one or more SP octets prior to interpreting the field value" - if (!headers.empty()) { - auto &previous_value{headers.back().second}; - previous_value += ' '; - previous_value += value; - } + http_parse_headers(input, + [&headers](const std::string_view name, + const std::string_view value) -> auto { + if (name.empty()) { + // RFC 9112 §5.2 mandates replacing "each received + // obs-fold with one or more SP octets prior to + // interpreting the field value" + if (!headers.empty()) { + auto &previous_value{headers.back().second}; + previous_value += ' '; + previous_value += value; + } - return; - } + return; + } - std::string header_name{name}; - to_lowercase(header_name); - headers.emplace_back(std::move(header_name), std::string{value}); - }); + std::string header_name{name}; + to_lowercase(header_name); + headers.emplace_back(std::move(header_name), + std::string{value}); + }); } /// @ingroup http diff --git a/vendor/core/src/core/http/match_accept.cc b/vendor/core/src/core/http/match_accept.cc index bebedcbe4..c066e9b3b 100644 --- a/vendor/core/src/core/http/match_accept.cc +++ b/vendor/core/src/core/http/match_accept.cc @@ -34,19 +34,21 @@ auto http_match_accept(const std::string_view accept_header, assert(candidate.find_first_of(" \t,;*") == std::string_view::npos); float candidate_quality{0.0f}; std::uint8_t candidate_specificity{0}; - http_for_each_accept_entry(accept_header, [&](const std::string_view value, - const float quality) { - const std::uint8_t specificity{http_media_specificity(value, candidate)}; - if (specificity == 0) { - return; - } - if (specificity > candidate_specificity || - (specificity == candidate_specificity && - quality > candidate_quality)) { - candidate_quality = quality; - candidate_specificity = specificity; - } - }); + http_for_each_accept_entry( + accept_header, + [&](const std::string_view value, const float quality) -> void { + const std::uint8_t specificity{ + http_media_specificity(value, candidate)}; + if (specificity == 0) { + return; + } + if (specificity > candidate_specificity || + (specificity == candidate_specificity && + quality > candidate_quality)) { + candidate_quality = quality; + candidate_specificity = specificity; + } + }); if (candidate_quality > 0.0f && (candidate_quality > best_quality || (candidate_quality == best_quality && diff --git a/vendor/core/src/core/http/match_accept_language.cc b/vendor/core/src/core/http/match_accept_language.cc index 548624621..caece96ce 100644 --- a/vendor/core/src/core/http/match_accept_language.cc +++ b/vendor/core/src/core/http/match_accept_language.cc @@ -57,7 +57,7 @@ auto http_match_accept_language( std::size_t candidate_specificity{0}; http_for_each_accept_entry( accept_language_header, - [&](const std::string_view value, const float quality) { + [&](const std::string_view value, const float quality) -> void { const std::size_t specificity{language_specificity(value, candidate)}; if (specificity == 0) { return; diff --git a/vendor/core/src/core/http/negotiate_encoding.cc b/vendor/core/src/core/http/negotiate_encoding.cc index 12420d5d4..09f89fcc0 100644 --- a/vendor/core/src/core/http/negotiate_encoding.cc +++ b/vendor/core/src/core/http/negotiate_encoding.cc @@ -25,7 +25,7 @@ auto http_negotiate_encoding( http_for_each_accept_entry( accept_encoding_header, - [&](const std::string_view token, const float quality) { + [&](const std::string_view token, const float quality) -> void { if (http_iequals_ascii(token, "gzip") || http_iequals_ascii(token, "x-gzip")) { gzip_listed = true; diff --git a/vendor/core/src/core/http/problem.cc b/vendor/core/src/core/http/problem.cc index 589403757..1aee8c9f1 100644 --- a/vendor/core/src/core/http/problem.cc +++ b/vendor/core/src/core/http/problem.cc @@ -1,15 +1,18 @@ #include -#include // assert -#include // std::int64_t +#include // assert +#include // std::int64_t +#include // std::string_view, std::string_view_literals namespace { - -const auto HTTP_HASH_TYPE{sourcemeta::core::JSON::Object::hash("type")}; -const auto HTTP_HASH_TITLE{sourcemeta::core::JSON::Object::hash("title")}; -const auto HTTP_HASH_STATUS{sourcemeta::core::JSON::Object::hash("status")}; -const auto HTTP_HASH_DETAIL{sourcemeta::core::JSON::Object::hash("detail")}; -const auto HTTP_HASH_INSTANCE{sourcemeta::core::JSON::Object::hash("instance")}; +using namespace std::string_view_literals; + +const auto HTTP_HASH_TYPE{sourcemeta::core::JSON::Object::hash("type"sv)}; +const auto HTTP_HASH_TITLE{sourcemeta::core::JSON::Object::hash("title"sv)}; +const auto HTTP_HASH_STATUS{sourcemeta::core::JSON::Object::hash("status"sv)}; +const auto HTTP_HASH_DETAIL{sourcemeta::core::JSON::Object::hash("detail"sv)}; +const auto HTTP_HASH_INSTANCE{ + sourcemeta::core::JSON::Object::hash("instance"sv)}; } // namespace diff --git a/vendor/core/src/core/jose/include/sourcemeta/core/jose_jwk.h b/vendor/core/src/core/jose/include/sourcemeta/core/jose_jwk.h index 879873b9d..c6b41134f 100644 --- a/vendor/core/src/core/jose/include/sourcemeta/core/jose_jwk.h +++ b/vendor/core/src/core/jose/include/sourcemeta/core/jose_jwk.h @@ -65,7 +65,7 @@ class SOURCEMETA_CORE_JOSE_EXPORT JWK { [[nodiscard]] auto key_id() const noexcept -> std::optional { if (this->key_id_.has_value()) { - return std::string_view{this->key_id_.value()}; + return std::string_view{*this->key_id_}; } return std::nullopt; diff --git a/vendor/core/src/core/jose/jose_jwk.cc b/vendor/core/src/core/jose/jose_jwk.cc index 5e2496034..b34477fda 100644 --- a/vendor/core/src/core/jose/jose_jwk.cc +++ b/vendor/core/src/core/jose/jose_jwk.cc @@ -8,22 +8,23 @@ #include // std::move, std::unreachable namespace { - -const auto HASH_KTY{sourcemeta::core::JSON::Object::hash("kty")}; -const auto HASH_N{sourcemeta::core::JSON::Object::hash("n")}; -const auto HASH_E{sourcemeta::core::JSON::Object::hash("e")}; -const auto HASH_CRV{sourcemeta::core::JSON::Object::hash("crv")}; -const auto HASH_X{sourcemeta::core::JSON::Object::hash("x")}; -const auto HASH_Y{sourcemeta::core::JSON::Object::hash("y")}; -const auto HASH_KID{sourcemeta::core::JSON::Object::hash("kid")}; -const auto HASH_ALG{sourcemeta::core::JSON::Object::hash("alg")}; -const auto HASH_D{sourcemeta::core::JSON::Object::hash("d")}; -const auto HASH_P{sourcemeta::core::JSON::Object::hash("p")}; -const auto HASH_Q{sourcemeta::core::JSON::Object::hash("q")}; -const auto HASH_DP{sourcemeta::core::JSON::Object::hash("dp")}; -const auto HASH_DQ{sourcemeta::core::JSON::Object::hash("dq")}; -const auto HASH_QI{sourcemeta::core::JSON::Object::hash("qi")}; -const auto HASH_OTH{sourcemeta::core::JSON::Object::hash("oth")}; +using namespace std::string_view_literals; + +const auto HASH_KTY{sourcemeta::core::JSON::Object::hash("kty"sv)}; +const auto HASH_N{sourcemeta::core::JSON::Object::hash("n"sv)}; +const auto HASH_E{sourcemeta::core::JSON::Object::hash("e"sv)}; +const auto HASH_CRV{sourcemeta::core::JSON::Object::hash("crv"sv)}; +const auto HASH_X{sourcemeta::core::JSON::Object::hash("x"sv)}; +const auto HASH_Y{sourcemeta::core::JSON::Object::hash("y"sv)}; +const auto HASH_KID{sourcemeta::core::JSON::Object::hash("kid"sv)}; +const auto HASH_ALG{sourcemeta::core::JSON::Object::hash("alg"sv)}; +const auto HASH_D{sourcemeta::core::JSON::Object::hash("d"sv)}; +const auto HASH_P{sourcemeta::core::JSON::Object::hash("p"sv)}; +const auto HASH_Q{sourcemeta::core::JSON::Object::hash("q"sv)}; +const auto HASH_DP{sourcemeta::core::JSON::Object::hash("dp"sv)}; +const auto HASH_DQ{sourcemeta::core::JSON::Object::hash("dq"sv)}; +const auto HASH_QI{sourcemeta::core::JSON::Object::hash("qi"sv)}; +const auto HASH_OTH{sourcemeta::core::JSON::Object::hash("oth"sv)}; // The RSA algorithms only require an RSA key, each ECDSA algorithm is tied to a // specific curve (RFC 7518 Section 3.1), and the Edwards-curve algorithm diff --git a/vendor/core/src/core/jose/jose_jwks.cc b/vendor/core/src/core/jose/jose_jwks.cc index 17afbe79b..83aa68f2f 100644 --- a/vendor/core/src/core/jose/jose_jwks.cc +++ b/vendor/core/src/core/jose/jose_jwks.cc @@ -5,8 +5,9 @@ #include // std::move namespace { +using namespace std::string_view_literals; -const auto HASH_KEYS{sourcemeta::core::JSON::Object::hash("keys")}; +const auto HASH_KEYS{sourcemeta::core::JSON::Object::hash("keys"sv)}; } // namespace @@ -61,7 +62,7 @@ auto JWKS::from(JSON &&value) -> std::optional { return from(value); } auto JWKS::find(const std::string_view key_id) const noexcept -> const JWK * { for (const auto &key : this->keys_) { const auto candidate{key.key_id()}; - if (candidate.has_value() && candidate.value() == key_id) { + if (candidate.has_value() && *candidate == key_id) { return &key; } } diff --git a/vendor/core/src/core/jose/jose_jwt.cc b/vendor/core/src/core/jose/jose_jwt.cc index 20bd6e1d0..9f53f2a11 100644 --- a/vendor/core/src/core/jose/jose_jwt.cc +++ b/vendor/core/src/core/jose/jose_jwt.cc @@ -11,18 +11,19 @@ #include // std::move namespace { - -const auto HASH_ALG{sourcemeta::core::JSON::Object::hash("alg")}; -const auto HASH_CRIT{sourcemeta::core::JSON::Object::hash("crit")}; -const auto HASH_KID{sourcemeta::core::JSON::Object::hash("kid")}; -const auto HASH_TYP{sourcemeta::core::JSON::Object::hash("typ")}; -const auto HASH_ISS{sourcemeta::core::JSON::Object::hash("iss")}; -const auto HASH_SUB{sourcemeta::core::JSON::Object::hash("sub")}; -const auto HASH_AUD{sourcemeta::core::JSON::Object::hash("aud")}; -const auto HASH_EXP{sourcemeta::core::JSON::Object::hash("exp")}; -const auto HASH_NBF{sourcemeta::core::JSON::Object::hash("nbf")}; -const auto HASH_IAT{sourcemeta::core::JSON::Object::hash("iat")}; -const auto HASH_JTI{sourcemeta::core::JSON::Object::hash("jti")}; +using namespace std::string_view_literals; + +const auto HASH_ALG{sourcemeta::core::JSON::Object::hash("alg"sv)}; +const auto HASH_CRIT{sourcemeta::core::JSON::Object::hash("crit"sv)}; +const auto HASH_KID{sourcemeta::core::JSON::Object::hash("kid"sv)}; +const auto HASH_TYP{sourcemeta::core::JSON::Object::hash("typ"sv)}; +const auto HASH_ISS{sourcemeta::core::JSON::Object::hash("iss"sv)}; +const auto HASH_SUB{sourcemeta::core::JSON::Object::hash("sub"sv)}; +const auto HASH_AUD{sourcemeta::core::JSON::Object::hash("aud"sv)}; +const auto HASH_EXP{sourcemeta::core::JSON::Object::hash("exp"sv)}; +const auto HASH_NBF{sourcemeta::core::JSON::Object::hash("nbf"sv)}; +const auto HASH_IAT{sourcemeta::core::JSON::Object::hash("iat"sv)}; +const auto HASH_JTI{sourcemeta::core::JSON::Object::hash("jti"sv)}; auto string_claim(const sourcemeta::core::JSON &object, const sourcemeta::core::JSON::StringView name, @@ -157,7 +158,7 @@ auto JWT::token_id() const noexcept -> std::optional { } auto JWT::has_audience(const std::string_view audience) const noexcept -> bool { - const auto *member{this->payload_.try_at("aud", HASH_AUD)}; + const auto *member{this->payload_.try_at("aud"sv, HASH_AUD)}; if (member == nullptr) { return false; } diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h b/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h index 84edb5992..94fffbec4 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h @@ -599,8 +599,9 @@ auto from_json(const JSON &value) -> std::optional { /// @ingroup json template auto to_json(const T &value) -> JSON { auto tuple = JSON::make_array(); - std::apply([&](const auto &element) { tuple.push_back(to_json(element)); }, - value); + std::apply( + [&](const auto &element) -> auto { tuple.push_back(to_json(element)); }, + value); return tuple; } @@ -623,7 +624,7 @@ auto from_json(const JSON &value) -> std::optional { template auto to_json(const T &value) -> JSON { auto tuple = JSON::make_array(); std::apply( - [&tuple](const auto &...elements) { + [&tuple](const auto &...elements) -> auto { (tuple.push_back(to_json(elements)), ...); }, value); @@ -662,7 +663,7 @@ auto to_json(const T &value) -> JSON { auto result{JSON::make_array()}; result.push_back(JSON{static_cast(value.index())}); std::visit( - [&result](const auto &alternative) { + [&result](const auto &alternative) -> auto { result.push_back(to_json(alternative)); }, value); diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_object.h b/vendor/core/src/core/json/include/sourcemeta/core/json_object.h index 79eb8795c..48dbdda5b 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_object.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_object.h @@ -726,7 +726,7 @@ template class JSONObject { /// Reorder object properties by keys according to a comparator function template auto reorder(const Compare &compare) -> void { std::sort(this->data.begin(), this->data.end(), - [&compare](const auto &left, const auto &right) { + [&compare](const auto &left, const auto &right) -> auto { return compare(left.first, right.first); }); } diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_value.h b/vendor/core/src/core/json/include/sourcemeta/core/json_value.h index 392bae36b..ce5fe9d51 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_value.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_value.h @@ -1521,8 +1521,9 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { template [[nodiscard]] auto defines_any(Iterator begin, Iterator end) const -> bool { assert(this->is_object()); - return std::any_of(begin, end, - [this](const auto &key) { return this->defines(key); }); + return std::any_of(begin, end, [this](const auto &key) -> auto { + return this->defines(key); + }); } /// This method checks whether an input JSON object defines at least one given diff --git a/vendor/core/src/core/json/json_value.cc b/vendor/core/src/core/json/json_value.cc index 26f9dfa02..3fd229376 100644 --- a/vendor/core/src/core/json/json_value.cc +++ b/vendor/core/src/core/json/json_value.cc @@ -159,7 +159,7 @@ JSON::JSON(const JSON &other) { }; std::vector tasks; tasks.reserve(16); - tasks.push_back({&other, this}); + tasks.push_back({.source = &other, .destination = this}); try { while (!tasks.empty()) { @@ -200,7 +200,8 @@ JSON::JSON(const JSON &other) { destination_data.emplace_back(nullptr); } for (std::size_t index = 0; index < source_data.size(); ++index) { - tasks.push_back({&source_data[index], &destination_data[index]}); + tasks.push_back({.source = &source_data[index], + .destination = &destination_data[index]}); } break; } @@ -215,8 +216,8 @@ JSON::JSON(const JSON &other) { entry.hash); } for (std::size_t index = 0; index < source_data.size(); ++index) { - tasks.push_back( - {&source_data[index].second, &destination_data[index].second}); + tasks.push_back({.source = &source_data[index].second, + .destination = &destination_data[index].second}); } break; } @@ -630,18 +631,17 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { // which we are not taking into account here, as its typically // implementation dependent. This function is just a rough estimate. if (this->is_object()) { - return std::ranges::fold_left(this->as_object(), - static_cast(0), - [](const std::uint64_t accumulator, - const typename Object::value_type &pair) { - return accumulator + - (pair.first.size() * sizeof(Char)) + - pair.second.estimated_byte_size(); - }); + return std::ranges::fold_left( + this->as_object(), static_cast(0), + [](const std::uint64_t accumulator, + const typename Object::value_type &pair) -> std::uint64_t { + return accumulator + (pair.first.size() * sizeof(Char)) + + pair.second.estimated_byte_size(); + }); } else if (this->is_array()) { return std::ranges::fold_left( this->as_array(), static_cast(0), - [](const std::uint64_t accumulator, const JSON &item) { + [](const std::uint64_t accumulator, const JSON &item) -> std::uint64_t { return accumulator + item.estimated_byte_size(); }); } else if (this->is_string()) { @@ -673,16 +673,17 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { case Type::String: return 3 + this->byte_size(); case Type::Array: - return std::ranges::fold_left( - this->as_array(), static_cast(6), - [](const std::uint64_t accumulator, const JSON &item) { - return accumulator + 1 + item.fast_hash(); - }); + return std::ranges::fold_left(this->as_array(), + static_cast(6), + [](const std::uint64_t accumulator, + const JSON &item) -> std::uint64_t { + return accumulator + 1 + item.fast_hash(); + }); case Type::Object: return std::ranges::fold_left( this->as_object(), static_cast(7), [](const std::uint64_t accumulator, - const typename Object::value_type &pair) { + const typename Object::value_type &pair) -> std::uint64_t { return accumulator + 1 + pair.first.size() + pair.second.fast_hash(); }); diff --git a/vendor/core/src/core/json/parser.h b/vendor/core/src/core/json/parser.h index e40228020..188c3f2b5 100644 --- a/vendor/core/src/core/json/parser.h +++ b/vendor/core/src/core/json/parser.h @@ -496,15 +496,33 @@ inline auto scan_json(const char *&cursor, const char *end, switch (character) { case internal::token_true: internal::scan_true(line, column, cursor, end); - tape.push_back({TapeType::True, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::True, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); return; case internal::token_false: internal::scan_false(line, column, cursor, end); - tape.push_back({TapeType::False, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::False, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); return; case internal::token_null: internal::scan_null(line, column, cursor, end); - tape.push_back({TapeType::Null, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::Null, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); return; case internal::token_string_quote: { const auto string_start{ @@ -556,7 +574,13 @@ inline auto scan_json(const char *&cursor, const char *end, do_scan_array: { const auto start_index{tape.size()}; - tape.push_back({TapeType::ArrayStart, 0, 0, 0, 0, line, column}); + tape.push_back({.type = TapeType::ArrayStart, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = line, + .column = column}); container_stack.push_back({start_index, 0}); internal::skip_whitespace(cursor, end, line, column); @@ -573,7 +597,13 @@ do_scan_array: { } cursor++; tape[start_index].count = 0; - tape.push_back({TapeType::ArrayEnd, 0, 0, 0, 0, line, column}); + tape.push_back({.type = TapeType::ArrayEnd, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = line, + .column = column}); container_stack.pop_back(); goto do_scan_container_end; } @@ -607,15 +637,33 @@ do_scan_array: { goto do_scan_object; case internal::token_true: internal::scan_true(line, column, cursor, end); - tape.push_back({TapeType::True, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::True, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); goto do_scan_array_item_separator; case internal::token_false: internal::scan_false(line, column, cursor, end); - tape.push_back({TapeType::False, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::False, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); goto do_scan_array_item_separator; case internal::token_null: internal::scan_null(line, column, cursor, end); - tape.push_back({TapeType::Null, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::Null, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); goto do_scan_array_item_separator; case internal::token_string_quote: { const auto string_start{ @@ -676,7 +724,13 @@ do_scan_array: { assert(!container_stack.empty()); auto &frame{container_stack.back()}; tape[frame.tape_index].count = frame.child_count; - tape.push_back({TapeType::ArrayEnd, 0, 0, 0, 0, line, column}); + tape.push_back({.type = TapeType::ArrayEnd, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = line, + .column = column}); container_stack.pop_back(); goto do_scan_container_end; } @@ -690,7 +744,13 @@ do_scan_array: { do_scan_object: { const auto start_index{tape.size()}; - tape.push_back({TapeType::ObjectStart, 0, 0, 0, 0, line, column}); + tape.push_back({.type = TapeType::ObjectStart, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = line, + .column = column}); container_stack.push_back({start_index, 0}); internal::skip_whitespace(cursor, end, line, column); @@ -707,7 +767,13 @@ do_scan_object: { } cursor++; tape[start_index].count = 0; - tape.push_back({TapeType::ObjectEnd, 0, 0, 0, 0, line, column}); + tape.push_back({.type = TapeType::ObjectEnd, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = line, + .column = column}); container_stack.pop_back(); goto do_scan_container_end; } @@ -791,15 +857,33 @@ do_scan_object: { goto do_scan_object; case internal::token_true: internal::scan_true(line, column, cursor, end); - tape.push_back({TapeType::True, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::True, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); goto do_scan_object_property_end; case internal::token_false: internal::scan_false(line, column, cursor, end); - tape.push_back({TapeType::False, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::False, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); goto do_scan_object_property_end; case internal::token_null: internal::scan_null(line, column, cursor, end); - tape.push_back({TapeType::Null, 0, 0, 0, 0, value_line, value_column}); + tape.push_back({.type = TapeType::Null, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = value_line, + .column = value_column}); goto do_scan_object_property_end; case internal::token_string_quote: { const auto string_start{ @@ -860,7 +944,13 @@ do_scan_object: { assert(!container_stack.empty()); auto &frame{container_stack.back()}; tape[frame.tape_index].count = frame.child_count; - tape.push_back({TapeType::ObjectEnd, 0, 0, 0, 0, line, column}); + tape.push_back({.type = TapeType::ObjectEnd, + .flags = 0, + .offset = 0, + .length = 0, + .count = 0, + .line = line, + .column = column}); container_stack.pop_back(); goto do_scan_container_end; } diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h index d28ccd4ce..5fa7e6804 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h @@ -35,7 +35,7 @@ template class GenericPointer { /// const sourcemeta::core::Pointer pointer; /// assert(pointer.empty()); /// ``` - GenericPointer() : data{} {} + GenericPointer() noexcept : data{} {} /// This constructor is the preferred way of creating a pointer. /// For example: diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_token.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_token.h index 01f4d020d..0f97c7bd7 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_token.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_token.h @@ -275,7 +275,7 @@ template class GenericToken { private: // We need this as a member for making WeakPointer work - inline static const Value::String DEFAULT_PROPERTY = ""; + inline static const Value::String DEFAULT_PROPERTY{}; inline static const Hash hasher; bool as_property; diff --git a/vendor/core/src/core/jsonpointer/jsonpointer.cc b/vendor/core/src/core/jsonpointer/jsonpointer.cc index 6677cb508..21eb6a1aa 100644 --- a/vendor/core/src/core/jsonpointer/jsonpointer.cc +++ b/vendor/core/src/core/jsonpointer/jsonpointer.cc @@ -536,7 +536,8 @@ auto is_relative_pointer(const std::string_view input) noexcept -> bool { } if (input[position] == '/') { - return is_pointer(input.substr(position)); + return is_pointer( + std::string_view{input.data() + position, size - position}); } return false; diff --git a/vendor/core/src/core/jsonpointer/position.cc b/vendor/core/src/core/jsonpointer/position.cc index 7b51bf89c..648ee18fb 100644 --- a/vendor/core/src/core/jsonpointer/position.cc +++ b/vendor/core/src/core/jsonpointer/position.cc @@ -124,10 +124,11 @@ auto PointerPositionTracker::get(const Pointer &pointer) const } auto PointerPositionTracker::size() const -> std::size_t { - return static_cast(std::count_if( - this->events.cbegin(), this->events.cend(), [](const Event &event) { - return event.phase == JSON::ParsePhase::Post; - })); + return static_cast( + std::count_if(this->events.cbegin(), this->events.cend(), + [](const Event &event) -> bool { + return event.phase == JSON::ParsePhase::Post; + })); } auto PointerPositionTracker::to_json() const -> JSON { diff --git a/vendor/core/src/core/jsonrpc/jsonrpc.cc b/vendor/core/src/core/jsonrpc/jsonrpc.cc index 89724d34e..e64c0d929 100644 --- a/vendor/core/src/core/jsonrpc/jsonrpc.cc +++ b/vendor/core/src/core/jsonrpc/jsonrpc.cc @@ -2,23 +2,28 @@ #include -#include // std::int64_t -#include // std::optional, std::nullopt -#include // std::move +#include // std::int64_t +#include // std::optional, std::nullopt +#include // std::string_view_literals +#include // std::move namespace { +using namespace std::string_view_literals; -const auto JSONRPC_HASH_ID{sourcemeta::core::JSON::Object::hash("id")}; +const auto JSONRPC_HASH_ID{sourcemeta::core::JSON::Object::hash("id"sv)}; const auto JSONRPC_HASH_JSONRPC{ - sourcemeta::core::JSON::Object::hash("jsonrpc")}; -const auto JSONRPC_HASH_METHOD{sourcemeta::core::JSON::Object::hash("method")}; -const auto JSONRPC_HASH_RESULT{sourcemeta::core::JSON::Object::hash("result")}; -const auto JSONRPC_HASH_ERROR{sourcemeta::core::JSON::Object::hash("error")}; -const auto JSONRPC_HASH_CODE{sourcemeta::core::JSON::Object::hash("code")}; + sourcemeta::core::JSON::Object::hash("jsonrpc"sv)}; +const auto JSONRPC_HASH_METHOD{ + sourcemeta::core::JSON::Object::hash("method"sv)}; +const auto JSONRPC_HASH_RESULT{ + sourcemeta::core::JSON::Object::hash("result"sv)}; +const auto JSONRPC_HASH_ERROR{sourcemeta::core::JSON::Object::hash("error"sv)}; +const auto JSONRPC_HASH_CODE{sourcemeta::core::JSON::Object::hash("code"sv)}; const auto JSONRPC_HASH_MESSAGE{ - sourcemeta::core::JSON::Object::hash("message")}; -const auto JSONRPC_HASH_DATA{sourcemeta::core::JSON::Object::hash("data")}; -const auto JSONRPC_HASH_PARAMS{sourcemeta::core::JSON::Object::hash("params")}; + sourcemeta::core::JSON::Object::hash("message"sv)}; +const auto JSONRPC_HASH_DATA{sourcemeta::core::JSON::Object::hash("data"sv)}; +const auto JSONRPC_HASH_PARAMS{ + sourcemeta::core::JSON::Object::hash("params"sv)}; } // namespace diff --git a/vendor/core/src/core/langtag/CMakeLists.txt b/vendor/core/src/core/langtag/CMakeLists.txt new file mode 100644 index 000000000..9ec88c581 --- /dev/null +++ b/vendor/core/src/core/langtag/CMakeLists.txt @@ -0,0 +1,9 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME langtag + SOURCES langtag.cc) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME langtag) +endif() + +target_link_libraries(sourcemeta_core_langtag + PRIVATE sourcemeta::core::text) diff --git a/vendor/core/src/core/langtag/include/sourcemeta/core/langtag.h b/vendor/core/src/core/langtag/include/sourcemeta/core/langtag.h new file mode 100644 index 000000000..7019df93e --- /dev/null +++ b/vendor/core/src/core/langtag/include/sourcemeta/core/langtag.h @@ -0,0 +1,47 @@ +#ifndef SOURCEMETA_CORE_LANGTAG_H_ +#define SOURCEMETA_CORE_LANGTAG_H_ + +#ifndef SOURCEMETA_CORE_LANGTAG_EXPORT +#include +#endif + +#include // std::string_view + +/// @defgroup langtag LangTag +/// @brief BCP 47 language tag validation utilities. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup langtag +/// Check whether the given string is a well-formed language tag per RFC 5646 +/// (BCP 47). In addition to the grammar, the two duplication errors that the +/// specification forbids without consulting any registry are rejected: a +/// repeated variant subtag (RFC 5646 Section 2.2.5) and more than one extension +/// for the same singleton (RFC 5646 Section 2.2.6). Validity against the IANA +/// Language Subtag Registry is not checked, so a structurally well-formed tag +/// whose subtags are not registered is still accepted. Comparison is +/// case-insensitive, as language tags are. For example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_langtag("en")); +/// assert(sourcemeta::core::is_langtag("zh-Hant-HK")); +/// assert(sourcemeta::core::is_langtag("x-private")); +/// assert(!sourcemeta::core::is_langtag("en-")); +/// assert(!sourcemeta::core::is_langtag("de-1996-1996")); +/// ``` +SOURCEMETA_CORE_LANGTAG_EXPORT +auto is_langtag(const std::string_view value) -> bool; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/langtag/langtag.cc b/vendor/core/src/core/langtag/langtag.cc new file mode 100644 index 000000000..ed00282e4 --- /dev/null +++ b/vendor/core/src/core/langtag/langtag.cc @@ -0,0 +1,252 @@ +#include +#include + +#include // std::array +#include // std::size_t +#include // std::uint64_t +#include // std::string_view + +namespace { + +// The irregular grandfathered tags (RFC 5646 Section 2.2.8) do not fit the +// langtag grammar and must be matched literally. The regular grandfathered +// tags are intentionally omitted, as they are already accepted by the langtag +// grammar. +constexpr std::array irregular_grandfathered{ + {"en-GB-oed", "i-ami", "i-bnn", "i-default", "i-enochian", "i-hak", + "i-klingon", "i-lux", "i-mingo", "i-navajo", "i-pwn", "i-tao", "i-tay", + "i-tsu", "sgn-BE-FR", "sgn-BE-NL", "sgn-CH-DE"}}; + +// The subtag starting at the given position, up to the next hyphen or the end. +auto subtag_at(const std::string_view value, const std::size_t position) + -> std::string_view { + auto end{position}; + while (end < value.size() && value[end] != '-') { + end += 1; + } + return value.substr(position, end - position); +} + +// Advance past a subtag of the given length, and the separating hyphen if any. +auto advance(const std::string_view value, std::size_t &position, + const std::size_t length) -> void { + position += length; + if (position < value.size()) { + position += 1; + } +} + +// singleton = DIGIT / a-w / y-z, mapped to the range [0, 35] +auto singleton_index(const char character) -> std::size_t { + const auto lowered{sourcemeta::core::to_lowercase(character)}; + if (sourcemeta::core::is_digit(lowered)) { + return static_cast(lowered - '0'); + } + return static_cast(lowered - 'a') + 10; +} + +// language = 2*3ALPHA ["-" extlang] / 4ALPHA / 5*8ALPHA +auto parse_language(const std::string_view value, std::size_t &position) + -> bool { + const auto primary{subtag_at(value, position)}; + if (!sourcemeta::core::is_alpha(primary)) { + return false; + } + if (primary.size() >= 2 && primary.size() <= 3) { + advance(value, position, primary.size()); + // extlang = 3ALPHA *2("-" 3ALPHA), so up to three subtags of three letters + for (std::size_t count{0}; count < 3; count += 1) { + const auto extlang{subtag_at(value, position)}; + if (extlang.size() != 3 || !sourcemeta::core::is_alpha(extlang)) { + break; + } + advance(value, position, 3); + } + return true; + } + if (primary.size() == 4 || (primary.size() >= 5 && primary.size() <= 8)) { + advance(value, position, primary.size()); + return true; + } + return false; +} + +// script = 4ALPHA +auto parse_script(const std::string_view value, std::size_t &position) -> void { + const auto script{subtag_at(value, position)}; + if (script.size() == 4 && sourcemeta::core::is_alpha(script)) { + advance(value, position, 4); + } +} + +// region = 2ALPHA / 3DIGIT +auto parse_region(const std::string_view value, std::size_t &position) -> void { + const auto region{subtag_at(value, position)}; + if ((region.size() == 2 && sourcemeta::core::is_alpha(region)) || + (region.size() == 3 && sourcemeta::core::is_digit(region))) { + advance(value, position, region.size()); + } +} + +// Whether the candidate variant already appears among the subtags in +// [begin, end). The variant region is contiguous, so it is re-scanned rather +// than stored. +auto seen_variant(const std::string_view value, const std::size_t begin, + const std::size_t end, const std::string_view candidate) + -> bool { + std::size_t cursor{begin}; + while (cursor < end) { + auto stop{cursor}; + while (stop < end && value[stop] != '-') { + stop += 1; + } + if (sourcemeta::core::equals_ignore_case( + value.substr(cursor, stop - cursor), candidate)) { + return true; + } + cursor = stop + 1; + } + return false; +} + +// variant = 5*8alphanum / (DIGIT 3alphanum), with no repeats +auto parse_variants(const std::string_view value, std::size_t &position) + -> bool { + const auto begin{position}; + while (true) { + const auto variant{subtag_at(value, position)}; + const bool matches{(variant.size() >= 5 && variant.size() <= 8 && + sourcemeta::core::is_alphanum(variant)) || + (variant.size() == 4 && + sourcemeta::core::is_digit(variant.front()) && + sourcemeta::core::is_alphanum(variant))}; + if (!matches) { + break; + } + if (seen_variant(value, begin, position, variant)) { + return false; + } + advance(value, position, variant.size()); + } + return true; +} + +// Consume one or more alphanumeric subtags whose length is in +// [minimum_length, 8], requiring at least one. +auto consume_subtags(const std::string_view value, std::size_t &position, + const std::size_t minimum_length) -> bool { + std::size_t count{0}; + while (true) { + const auto subtag{subtag_at(value, position)}; + if (subtag.size() < minimum_length || subtag.size() > 8 || + !sourcemeta::core::is_alphanum(subtag)) { + break; + } + advance(value, position, subtag.size()); + count += 1; + } + return count > 0; +} + +// extension = singleton 1*("-" (2*8alphanum)), with each singleton at most +// once. The singleton "x" is excluded as it introduces the private use. +auto parse_extensions(const std::string_view value, std::size_t &position) + -> bool { + std::uint64_t seen{0}; + while (true) { + const auto singleton{subtag_at(value, position)}; + if (singleton.size() != 1) { + break; + } + const auto character{singleton.front()}; + if (!sourcemeta::core::is_alphanum(character) || + sourcemeta::core::to_lowercase(character) == 'x') { + break; + } + const auto bit{std::uint64_t{1} << singleton_index(character)}; + if ((seen & bit) != 0) { + return false; + } + seen |= bit; + advance(value, position, 1); + if (!consume_subtags(value, position, 2)) { + return false; + } + } + return true; +} + +// privateuse = "x" 1*("-" (1*8alphanum)) +auto parse_privateuse(const std::string_view value, std::size_t &position) + -> bool { + const auto singleton{subtag_at(value, position)}; + if (singleton.size() != 1 || + sourcemeta::core::to_lowercase(singleton.front()) != 'x') { + return true; + } + advance(value, position, 1); + return consume_subtags(value, position, 1); +} + +// Language-Tag = langtag (RFC 5646 Section 2.1) +auto valid_langtag(const std::string_view value) -> bool { + std::size_t position{0}; + if (!parse_language(value, position)) { + return false; + } + parse_script(value, position); + parse_region(value, position); + if (!parse_variants(value, position)) { + return false; + } + if (!parse_extensions(value, position)) { + return false; + } + if (!parse_privateuse(value, position)) { + return false; + } + return position >= value.size(); +} + +// Language-Tag = privateuse (RFC 5646 Section 2.1) +auto valid_privateuse(const std::string_view value) -> bool { + std::size_t position{0}; + const auto singleton{subtag_at(value, position)}; + if (singleton.size() != 1 || + sourcemeta::core::to_lowercase(singleton.front()) != 'x') { + return false; + } + advance(value, position, 1); + return consume_subtags(value, position, 1) && position >= value.size(); +} + +auto is_irregular_grandfathered(const std::string_view value) -> bool { + for (const auto entry : irregular_grandfathered) { + if (sourcemeta::core::equals_ignore_case(value, entry)) { + return true; + } + } + return false; +} + +} // namespace + +namespace sourcemeta::core { + +auto is_langtag(const std::string_view value) -> bool { + if (value.empty() || value.front() == '-' || value.back() == '-') { + return false; + } + // Language-Tag = langtag / privateuse / grandfathered. The regular + // grandfathered tags are covered by the langtag alternative, so only the + // irregular ones need a literal fallback. + if (valid_langtag(value)) { + return true; + } + if (valid_privateuse(value)) { + return true; + } + return is_irregular_grandfathered(value); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/markdown/markdown.cc b/vendor/core/src/core/markdown/markdown.cc index 25aa4da70..9ee1965f7 100644 --- a/vendor/core/src/core/markdown/markdown.cc +++ b/vendor/core/src/core/markdown/markdown.cc @@ -6,17 +6,22 @@ #include // std::array #include // std::free +#include // std::mutex, std::scoped_lock #include // std::string -namespace { -const bool cmark_initialized = - (cmark_gfm_core_extensions_ensure_registered(), true); -} - namespace sourcemeta::core { auto markdown_to_html(const std::string_view input, const bool safe) -> std::string { + [[maybe_unused]] static const bool cmark_initialized{ + (cmark_gfm_core_extensions_ensure_registered(), true)}; + + // cmark-gfm toggles process-global special-character tables when syntax + // extensions are attached and detached, so parser construction through + // teardown cannot run concurrently + static std::mutex cmark_mutex; + const std::scoped_lock lock{cmark_mutex}; + static constexpr auto base_options{ CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_FOOTNOTES | CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE | CMARK_OPT_GITHUB_PRE_LANG}; diff --git a/vendor/core/src/core/mcp/mcp.cc b/vendor/core/src/core/mcp/mcp.cc index 247e268f5..a3934b964 100644 --- a/vendor/core/src/core/mcp/mcp.cc +++ b/vendor/core/src/core/mcp/mcp.cc @@ -3,68 +3,73 @@ #include #include -#include // std::clamp -#include // assert -#include // std::isnan -#include // std::size_t -#include // std::optional -#include // std::ostringstream -#include // std::string -#include // std::move +#include // std::clamp +#include // assert +#include // std::isnan +#include // std::size_t +#include // std::optional +#include // std::ostringstream +#include // std::string +#include // std::string_view_literals +#include // std::move namespace { +using namespace std::string_view_literals; const auto MCP_HASH_ANNOTATIONS{ - sourcemeta::core::JSON::Object::hash("annotations")}; + sourcemeta::core::JSON::Object::hash("annotations"sv)}; const auto MCP_HASH_ARGUMENTS{ - sourcemeta::core::JSON::Object::hash("arguments")}; + sourcemeta::core::JSON::Object::hash("arguments"sv)}; const auto MCP_HASH_CAPABILITIES{ - sourcemeta::core::JSON::Object::hash("capabilities")}; + sourcemeta::core::JSON::Object::hash("capabilities"sv)}; const auto MCP_HASH_COMPLETIONS{ - sourcemeta::core::JSON::Object::hash("completions")}; -const auto MCP_HASH_CONTENT{sourcemeta::core::JSON::Object::hash("content")}; -const auto MCP_HASH_CONTENTS{sourcemeta::core::JSON::Object::hash("contents")}; + sourcemeta::core::JSON::Object::hash("completions"sv)}; +const auto MCP_HASH_CONTENT{sourcemeta::core::JSON::Object::hash("content"sv)}; +const auto MCP_HASH_CONTENTS{ + sourcemeta::core::JSON::Object::hash("contents"sv)}; const auto MCP_HASH_DESCRIPTION{ - sourcemeta::core::JSON::Object::hash("description")}; + sourcemeta::core::JSON::Object::hash("description"sv)}; const auto MCP_HASH_DESTRUCTIVE_HINT{ - sourcemeta::core::JSON::Object::hash("destructiveHint")}; + sourcemeta::core::JSON::Object::hash("destructiveHint"sv)}; const auto MCP_HASH_IDEMPOTENT_HINT{ - sourcemeta::core::JSON::Object::hash("idempotentHint")}; + sourcemeta::core::JSON::Object::hash("idempotentHint"sv)}; const auto MCP_HASH_INPUT_SCHEMA{ - sourcemeta::core::JSON::Object::hash("inputSchema")}; + sourcemeta::core::JSON::Object::hash("inputSchema"sv)}; const auto MCP_HASH_INSTRUCTIONS{ - sourcemeta::core::JSON::Object::hash("instructions")}; -const auto MCP_HASH_IS_ERROR{sourcemeta::core::JSON::Object::hash("isError")}; -const auto MCP_HASH_LOGGING{sourcemeta::core::JSON::Object::hash("logging")}; -const auto MCP_HASH_MIME_TYPE{sourcemeta::core::JSON::Object::hash("mimeType")}; -const auto MCP_HASH_NAME{sourcemeta::core::JSON::Object::hash("name")}; + sourcemeta::core::JSON::Object::hash("instructions"sv)}; +const auto MCP_HASH_IS_ERROR{sourcemeta::core::JSON::Object::hash("isError"sv)}; +const auto MCP_HASH_LOGGING{sourcemeta::core::JSON::Object::hash("logging"sv)}; +const auto MCP_HASH_MIME_TYPE{ + sourcemeta::core::JSON::Object::hash("mimeType"sv)}; +const auto MCP_HASH_NAME{sourcemeta::core::JSON::Object::hash("name"sv)}; const auto MCP_HASH_OPEN_WORLD_HINT{ - sourcemeta::core::JSON::Object::hash("openWorldHint")}; + sourcemeta::core::JSON::Object::hash("openWorldHint"sv)}; const auto MCP_HASH_OUTPUT_SCHEMA{ - sourcemeta::core::JSON::Object::hash("outputSchema")}; -const auto MCP_HASH_PRIORITY{sourcemeta::core::JSON::Object::hash("priority")}; -const auto MCP_HASH_PROMPTS{sourcemeta::core::JSON::Object::hash("prompts")}; + sourcemeta::core::JSON::Object::hash("outputSchema"sv)}; +const auto MCP_HASH_PRIORITY{ + sourcemeta::core::JSON::Object::hash("priority"sv)}; +const auto MCP_HASH_PROMPTS{sourcemeta::core::JSON::Object::hash("prompts"sv)}; const auto MCP_HASH_PROTOCOL_VERSION{ - sourcemeta::core::JSON::Object::hash("protocolVersion")}; + sourcemeta::core::JSON::Object::hash("protocolVersion"sv)}; const auto MCP_HASH_READ_ONLY_HINT{ - sourcemeta::core::JSON::Object::hash("readOnlyHint")}; + sourcemeta::core::JSON::Object::hash("readOnlyHint"sv)}; const auto MCP_HASH_RESOURCES{ - sourcemeta::core::JSON::Object::hash("resources")}; + sourcemeta::core::JSON::Object::hash("resources"sv)}; const auto MCP_HASH_SERVER_INFO{ - sourcemeta::core::JSON::Object::hash("serverInfo")}; -const auto MCP_HASH_SIZE{sourcemeta::core::JSON::Object::hash("size")}; + sourcemeta::core::JSON::Object::hash("serverInfo"sv)}; +const auto MCP_HASH_SIZE{sourcemeta::core::JSON::Object::hash("size"sv)}; const auto MCP_HASH_STRUCTURED_CONTENT{ - sourcemeta::core::JSON::Object::hash("structuredContent")}; -const auto MCP_HASH_TEXT{sourcemeta::core::JSON::Object::hash("text")}; -const auto MCP_HASH_TITLE{sourcemeta::core::JSON::Object::hash("title")}; -const auto MCP_HASH_TOOLS{sourcemeta::core::JSON::Object::hash("tools")}; -const auto MCP_HASH_TYPE{sourcemeta::core::JSON::Object::hash("type")}; -const auto MCP_HASH_URI{sourcemeta::core::JSON::Object::hash("uri")}; + sourcemeta::core::JSON::Object::hash("structuredContent"sv)}; +const auto MCP_HASH_TEXT{sourcemeta::core::JSON::Object::hash("text"sv)}; +const auto MCP_HASH_TITLE{sourcemeta::core::JSON::Object::hash("title"sv)}; +const auto MCP_HASH_TOOLS{sourcemeta::core::JSON::Object::hash("tools"sv)}; +const auto MCP_HASH_TYPE{sourcemeta::core::JSON::Object::hash("type"sv)}; +const auto MCP_HASH_URI{sourcemeta::core::JSON::Object::hash("uri"sv)}; const auto MCP_HASH_URI_TEMPLATE{ - sourcemeta::core::JSON::Object::hash("uriTemplate")}; -const auto MCP_HASH_VERSION{sourcemeta::core::JSON::Object::hash("version")}; + sourcemeta::core::JSON::Object::hash("uriTemplate"sv)}; +const auto MCP_HASH_VERSION{sourcemeta::core::JSON::Object::hash("version"sv)}; const auto MCP_HASH_WEBSITE_URL{ - sourcemeta::core::JSON::Object::hash("websiteUrl")}; + sourcemeta::core::JSON::Object::hash("websiteUrl"sv)}; } // namespace diff --git a/vendor/core/src/core/semver/CMakeLists.txt b/vendor/core/src/core/semver/CMakeLists.txt index 3141a6695..edf268a24 100644 --- a/vendor/core/src/core/semver/CMakeLists.txt +++ b/vendor/core/src/core/semver/CMakeLists.txt @@ -3,6 +3,8 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME semver SOURCES semver.cc) target_link_libraries(sourcemeta_core_semver PUBLIC sourcemeta::core::preprocessor) +target_link_libraries(sourcemeta_core_semver + PRIVATE sourcemeta::core::text) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME semver) diff --git a/vendor/core/src/core/semver/semver.cc b/vendor/core/src/core/semver/semver.cc index 43d335c78..d40013e6c 100644 --- a/vendor/core/src/core/semver/semver.cc +++ b/vendor/core/src/core/semver/semver.cc @@ -1,4 +1,5 @@ #include +#include #include // std::array #include // std::to_chars @@ -8,17 +9,8 @@ namespace { -auto is_digit(const char character) -> bool { - return character >= '0' && character <= '9'; -} - -auto is_letter(const char character) -> bool { - return (character >= 'A' && character <= 'Z') || - (character >= 'a' && character <= 'z'); -} - auto is_identifier_character(const char character) -> bool { - return is_digit(character) || is_letter(character) || character == '-'; + return sourcemeta::core::is_alphanum(character) || character == '-'; } constexpr auto UINT64_MAX_VALUE = std::numeric_limits::max(); @@ -30,17 +22,19 @@ enum class NumericParseResult : std::uint8_t { success, invalid, overflow }; auto parse_numeric_identifier(const std::string_view input, std::size_t &position, std::uint64_t &result) -> NumericParseResult { - if (position >= input.size() || !is_digit(input[position])) { + if (position >= input.size() || + !sourcemeta::core::is_digit(input[position])) { return NumericParseResult::invalid; } if (input[position] == '0' && position + 1 < input.size() && - is_digit(input[position + 1])) { + sourcemeta::core::is_digit(input[position + 1])) { return NumericParseResult::invalid; } std::uint64_t value = 0; - while (position < input.size() && is_digit(input[position])) { + while (position < input.size() && + sourcemeta::core::is_digit(input[position])) { const auto digit = static_cast(input[position] - '0'); if (value > UINT64_MAX_DIV_10 || (value == UINT64_MAX_DIV_10 && digit > UINT64_MAX_MOD_10)) { @@ -67,7 +61,7 @@ auto validate_pre_release_identifier(const std::string_view identifier) return false; } - if (!is_digit(character)) { + if (!sourcemeta::core::is_digit(character)) { has_non_digit = true; } } @@ -106,7 +100,8 @@ auto validate_dot_separated(const std::string_view input) -> bool { dot_position = input.size(); } - if (!validator(input.substr(start, dot_position - start))) { + if (!validator( + std::string_view{input.data() + start, dot_position - start})) { return false; } @@ -129,7 +124,7 @@ auto classify_identifier(const std::string_view identifier) noexcept -> IdentifierInfo { std::uint64_t value = 0; for (const auto character : identifier) { - if (!is_digit(character)) { + if (!sourcemeta::core::is_digit(character)) { return {.is_numeric = false, .overflowed = false, .numeric_value = 0}; } @@ -342,7 +337,9 @@ auto parse_semver(const std::string_view input, std::uint64_t &major, ++position; } - pre_release = input.substr(start, position - start); + // The scan above keeps the bounds within the input, so build the view + // directly to keep this path non-throwing + pre_release = std::string_view{input.data() + start, position - start}; if (!validate_dot_separated(pre_release)) { if constexpr (should_throw) { throw sourcemeta::core::SemVerParseError(start + 1); @@ -357,7 +354,7 @@ auto parse_semver(const std::string_view input, std::uint64_t &major, const auto start = position; position = input.size(); - build = input.substr(start, position - start); + build = std::string_view{input.data() + start, position - start}; if (!validate_dot_separated(build)) { if constexpr (should_throw) { throw sourcemeta::core::SemVerParseError(start + 1); diff --git a/vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h b/vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h index a2f4f689c..aaedd2848 100644 --- a/vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h +++ b/vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h @@ -14,6 +14,7 @@ #include // std::ostream #include // std::string, std::u32string, std::wstring #include // std::string_view, std::wstring_view +#include // std::pair, std::make_pair /// @defgroup unicode Unicode /// @brief Unicode encoding utilities. @@ -571,6 +572,52 @@ utf8_codepoint_length(const std::string_view input, return size; } +/// @ingroup unicode +/// Decode the single UTF-8 codepoint that begins at the given position within +/// the input, returning the codepoint together with the number of bytes it +/// occupies, or an empty result when the bytes at that position do not start a +/// valid UTF-8 codepoint (RFC 3629 Section 4, excluding overlong encodings, +/// surrogates, and code points above U+10FFFF). For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// const auto result{sourcemeta::core::utf8_decode("\xCE\xB1", 0)}; +/// assert(result.has_value()); +/// assert(result.value().first == 0x03B1); +/// assert(result.value().second == 2); +/// assert(!sourcemeta::core::utf8_decode("\xED\xA0\x80", 0).has_value()); +/// ``` +inline constexpr auto utf8_decode(const std::string_view input, + const std::string_view::size_type position) + -> std::optional> { + const auto size{utf8_codepoint_length(input, position)}; + if (size == 0) { + return std::nullopt; + } + + const auto lead{static_cast(input[position])}; + char32_t codepoint{0}; + if (size == 1) { + codepoint = static_cast(lead); + } else if (size == 2) { + codepoint = static_cast(lead & 0x1FU); + } else if (size == 3) { + codepoint = static_cast(lead & 0x0FU); + } else { + codepoint = static_cast(lead & 0x07U); + } + + for (std::size_t index{1}; index < size; ++index) { + const auto continuation{ + static_cast(input[position + index])}; + codepoint = (codepoint << 6) | static_cast(continuation & 0x3FU); + } + + return std::make_pair(codepoint, size); +} + } // namespace sourcemeta::core #endif diff --git a/vendor/core/src/core/unicode/nfc.cc b/vendor/core/src/core/unicode/nfc.cc index 672da8e67..7e0d70f86 100644 --- a/vendor/core/src/core/unicode/nfc.cc +++ b/vendor/core/src/core/unicode/nfc.cc @@ -61,11 +61,12 @@ auto canonical_reorder(std::u32string &buffer) -> void { while (end < buffer.size() && combining_class(buffer[end]) != 0) { end += 1; } - std::stable_sort(buffer.begin() + static_cast(index), - buffer.begin() + static_cast(end), - [](const char32_t left, const char32_t right) noexcept { - return combining_class(left) < combining_class(right); - }); + std::stable_sort( + buffer.begin() + static_cast(index), + buffer.begin() + static_cast(end), + [](const char32_t left, const char32_t right) noexcept -> bool { + return combining_class(left) < combining_class(right); + }); index = end; } } diff --git a/vendor/core/src/core/uri/accessors.cc b/vendor/core/src/core/uri/accessors.cc index 6ba7d0ca4..a2d5d2f50 100644 --- a/vendor/core/src/core/uri/accessors.cc +++ b/vendor/core/src/core/uri/accessors.cc @@ -13,6 +13,8 @@ auto URI::is_absolute() const noexcept -> bool { return this->scheme_.has_value(); } +auto URI::is_internationalized() const noexcept -> bool { return this->iri_; } + auto URI::is_urn() const -> bool { const auto scheme{this->scheme()}; return scheme.has_value() && scheme.value() == "urn"; diff --git a/vendor/core/src/core/uri/canonicalize.cc b/vendor/core/src/core/uri/canonicalize.cc index f623d4517..0b0c62d66 100644 --- a/vendor/core/src/core/uri/canonicalize.cc +++ b/vendor/core/src/core/uri/canonicalize.cc @@ -1,14 +1,114 @@ #include +#include #include #include "escaping.h" #include "normalize.h" +#include // std::array +#include // std::uint8_t #include // std::optional #include // std::string namespace sourcemeta::core { +namespace { + +// Percent-decode the octets that an IRI may carry unencoded: the URI unreserved +// characters plus the non-ASCII characters permitted by RFC 3987. A +// percent-encoded ASCII unreserved octet is decoded on its own. A run of +// percent-encoded octets that together form a valid non-ASCII UTF-8 character +// is decoded to its literal bytes. Everything else, including private-use and +// reserved characters, is left percent-encoded. +// See https://www.rfc-editor.org/rfc/rfc3987#section-5.3.2.3 +auto unescape_iunreserved_inplace(std::string &input) -> void { + std::string output; + output.reserve(input.size()); + + for (std::string::size_type position{0}; position < input.size();) { + if (!uri_is_percent_encoded(input, position)) { + output += input[position]; + position += 1; + continue; + } + + const auto lead{ + static_cast((uri_hex_to_int(input[position + 1]) << 4) | + uri_hex_to_int(input[position + 2]))}; + const auto length{sourcemeta::core::utf8_lead_byte_size(lead)}; + + if (length == 1) { + if (uri_is_unreserved(static_cast(lead))) { + output += static_cast(lead); + } else { + output += input[position]; + output += input[position + 1]; + output += input[position + 2]; + } + position += 3; + continue; + } + + std::array bytes{}; + bytes[0] = lead; + char32_t codepoint{0}; + if (length == 2) { + codepoint = static_cast(lead & 0x1FU); + } else if (length == 3) { + codepoint = static_cast(lead & 0x0FU); + } else { + codepoint = static_cast(lead & 0x07U); + } + + bool decodable{length >= 2 && length <= 4}; + for (std::uint8_t offset{1}; decodable && offset < length; offset += 1) { + const auto continuation_position{ + position + (static_cast(offset) * 3)}; + if (!uri_is_percent_encoded(input, continuation_position)) { + decodable = false; + break; + } + const auto continuation{static_cast( + (uri_hex_to_int(input[continuation_position + 1]) << 4) | + uri_hex_to_int(input[continuation_position + 2]))}; + if (!sourcemeta::core::is_utf8_continuation(continuation)) { + decodable = false; + break; + } + bytes[offset] = continuation; + codepoint = + (codepoint << 6) | static_cast(continuation & 0x3FU); + } + + if (decodable && sourcemeta::core::is_valid_codepoint(codepoint) && + sourcemeta::core::utf8_codepoint_byte_count(codepoint) == length && + sourcemeta::core::is_ucschar(codepoint)) { + for (std::uint8_t offset{0}; offset < length; offset += 1) { + output += static_cast(bytes[offset]); + } + position += static_cast(length) * 3; + } else { + output += input[position]; + output += input[position + 1]; + output += input[position + 2]; + position += 3; + } + } + + input = std::move(output); +} + +auto normalize_component(std::string &component, const bool iri) -> void { + uri_normalize_percent_encoding_inplace(component); + if (iri) { + unescape_iunreserved_inplace(component); + } else { + uri_unescape_unreserved_inplace(component); + } +} + +} // namespace + auto URI::canonicalize() -> URI & { // Lowercase scheme (schemes are case-insensitive per RFC 3986) if (this->scheme_.has_value()) { @@ -29,33 +129,30 @@ auto URI::canonicalize() -> URI & { this->fragment_ = std::nullopt; } - // Only unreserved characters may be percent-decoded during normalization + // Percent-decode octets that form unreserved characters, or, for an IRI, the + // wider set that also includes the non-ASCII characters permitted by RFC 3987 // See https://www.rfc-editor.org/rfc/rfc3986#section-6.2.2.2 + // See https://www.rfc-editor.org/rfc/rfc3987#section-5.3.2.3 if (this->path_.has_value()) { - uri_normalize_percent_encoding_inplace(this->path_.value()); - uri_unescape_unreserved_inplace(this->path_.value()); + normalize_component(this->path_.value(), this->iri_); } if (this->query_.has_value()) { - uri_normalize_percent_encoding_inplace(this->query_.value()); - uri_unescape_unreserved_inplace(this->query_.value()); + normalize_component(this->query_.value(), this->iri_); } if (this->fragment_.has_value()) { - uri_normalize_percent_encoding_inplace(this->fragment_.value()); - uri_unescape_unreserved_inplace(this->fragment_.value()); + normalize_component(this->fragment_.value(), this->iri_); } if (this->userinfo_.has_value()) { - uri_normalize_percent_encoding_inplace(this->userinfo_.value()); - uri_unescape_unreserved_inplace(this->userinfo_.value()); + normalize_component(this->userinfo_.value(), this->iri_); } // Hostnames are case-insensitive per RFC 3986, and the lowercasing must come // after decoding so that a percent-encoded uppercase letter is also folded if (this->host_.has_value()) { - uri_normalize_percent_encoding_inplace(this->host_.value()); - uri_unescape_unreserved_inplace(this->host_.value()); + normalize_component(this->host_.value(), this->iri_); sourcemeta::core::to_lowercase(this->host_.value()); } diff --git a/vendor/core/src/core/uri/filesystem.cc b/vendor/core/src/core/uri/filesystem.cc index c789fbb5b..636d1a45b 100644 --- a/vendor/core/src/core/uri/filesystem.cc +++ b/vendor/core/src/core/uri/filesystem.cc @@ -15,7 +15,7 @@ namespace { auto is_localhost_host(const std::string_view host) -> bool { constexpr std::string_view localhost{"localhost"}; return std::ranges::equal( - host, localhost, [](const char left, const char right) { + host, localhost, [](const char left, const char right) -> bool { return std::tolower(static_cast(left)) == right; }); } diff --git a/vendor/core/src/core/uri/grammar.h b/vendor/core/src/core/uri/grammar.h index a329cc955..1c36df336 100644 --- a/vendor/core/src/core/uri/grammar.h +++ b/vendor/core/src/core/uri/grammar.h @@ -1,7 +1,7 @@ #ifndef SOURCEMETA_CORE_URI_GRAMMAR_H_ #define SOURCEMETA_CORE_URI_GRAMMAR_H_ -#include // std::isalnum, std::isalpha, std::isdigit +#include namespace sourcemeta::core { @@ -50,7 +50,7 @@ constexpr char URI_PERCENT = '%'; // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" // See https://www.rfc-editor.org/rfc/rfc3986#section-2.3 inline auto uri_is_unreserved(const char character) -> bool { - if (std::isalnum(static_cast(character))) { + if (is_alphanum(character)) { return true; } @@ -89,7 +89,7 @@ inline auto uri_is_sub_delim(const char character) -> bool { // Scheme characters: ALPHA / DIGIT / "+" / "-" / "." // See https://www.rfc-editor.org/rfc/rfc3986#section-3.1 inline auto uri_is_scheme_char(const char character) -> bool { - if (std::isalnum(static_cast(character))) { + if (is_alphanum(character)) { return true; } diff --git a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h index 084a8dbcf..31415cf9d 100644 --- a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h +++ b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h @@ -664,8 +664,9 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// ``` auto userinfo(const std::string_view userinfo) -> URI &; - /// To support equality of URIs - auto operator==(const URI &other) const noexcept -> bool = default; + /// Two URIs are equal when their components match, independent of how the + /// input was parsed + auto operator==(const URI &other) const noexcept -> bool; /// To support ordering of URIs auto operator<(const URI &other) const noexcept -> bool; @@ -695,6 +696,34 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// ``` static auto from_path(const std::filesystem::path &path) -> URI; + /// Create a URI from a string that may be an Internationalized Resource + /// Identifier (IRI) as defined by RFC 3987, accepting the non-ASCII + /// characters that a plain URI does not permit. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const auto + /// uri{sourcemeta::core::URI::from_iri("https://example.com/café")}; + /// assert(uri.recompose() == "https://example.com/café"); + /// ``` + static auto from_iri(std::string_view input) -> URI; + + /// Check whether this object holds an Internationalized Resource Identifier + /// (IRI) as defined by RFC 3987, rather than a plain URI. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const auto + /// iri{sourcemeta::core::URI::from_iri("https://example.com/foo")}; + /// assert(iri.is_internationalized()); + /// assert(!sourcemeta::core::URI{"https://example.com/foo"}.is_internationalized()); + /// ``` + [[nodiscard]] auto is_internationalized() const noexcept -> bool; + /// A convenient method to canonicalize and recompose a URI from a string. For /// example: /// @@ -811,6 +840,9 @@ class SOURCEMETA_CORE_URI_EXPORT URI { std::optional fragment_{}; std::optional query_{}; bool ip_literal_{false}; + // Whether this object was parsed as an IRI (RFC 3987) rather than a URI + // (RFC 3986) + bool iri_{false}; #if defined(_MSC_VER) #pragma warning(default : 4251) #endif diff --git a/vendor/core/src/core/uri/parse.cc b/vendor/core/src/core/uri/parse.cc index 7786bda3b..0f90e9b8a 100644 --- a/vendor/core/src/core/uri/parse.cc +++ b/vendor/core/src/core/uri/parse.cc @@ -46,42 +46,14 @@ auto validate_percent_encoded_utf8(const std::string_view input, decode_utf8_codepoint(const std::string_view input, const std::string_view::size_type position) -> std::pair { - const auto lead = static_cast(input[position]); - const auto length = sourcemeta::core::utf8_lead_byte_size(lead); - if (length == 0 || position + length > input.size()) [[unlikely]] { + const auto decoded{sourcemeta::core::utf8_decode(input, position)}; + if (!decoded.has_value()) [[unlikely]] { throw sourcemeta::core::URIParseError{ static_cast(position + 1)}; } - char32_t codepoint{0}; - if (length == 1) { - codepoint = static_cast(lead); - } else if (length == 2) { - codepoint = static_cast(lead & 0x1FU); - } else if (length == 3) { - codepoint = static_cast(lead & 0x0FU); - } else { - codepoint = static_cast(lead & 0x07U); - } - - for (std::uint8_t offset{1}; offset < length; offset += 1) { - const auto continuation = - static_cast(input[position + offset]); - if (!sourcemeta::core::is_utf8_continuation(continuation)) [[unlikely]] { - throw sourcemeta::core::URIParseError{ - static_cast(position + 1)}; - } - codepoint = (codepoint << 6) | static_cast(continuation & 0x3FU); - } - - if (!sourcemeta::core::is_valid_codepoint(codepoint) || - sourcemeta::core::utf8_codepoint_byte_count(codepoint) != length) - [[unlikely]] { - throw sourcemeta::core::URIParseError{ - static_cast(position + 1)}; - } - - return {codepoint, length}; + return {decoded.value().first, + static_cast(decoded.value().second)}; } template @@ -662,9 +634,15 @@ auto URI::parse(const std::string_view input) -> void { assert(!this->path_.has_value()); assert(!this->query_.has_value()); assert(!this->fragment_.has_value()); - do_parse(input, this->scheme_, this->userinfo_, this->host_, - this->port_, this->path_, this->query_, - this->fragment_, this->ip_literal_); + if (this->iri_) { + do_parse(input, this->scheme_, this->userinfo_, this->host_, + this->port_, this->path_, this->query_, + this->fragment_, this->ip_literal_); + } else { + do_parse(input, this->scheme_, this->userinfo_, this->host_, + this->port_, this->path_, this->query_, + this->fragment_, this->ip_literal_); + } } auto URI::is_uri(const std::string_view input) noexcept -> bool { diff --git a/vendor/core/src/core/uri/path.cc b/vendor/core/src/core/uri/path.cc index eb61bef60..6273277a7 100644 --- a/vendor/core/src/core/uri/path.cc +++ b/vendor/core/src/core/uri/path.cc @@ -1,3 +1,4 @@ +#include #include #include "escaping.h" @@ -31,6 +32,15 @@ auto canonicalize_path(const std::string_view input, std::string &output) } else if (character == sourcemeta::core::URI_SLASH || sourcemeta::core::uri_is_pchar(character)) { ++index; + } else if ((static_cast(character) & 0x80U) != 0U) { + // Accept the non-ASCII characters permitted by RFC 3987, so that IRI + // paths are handled in addition to URI paths + const auto decoded{sourcemeta::core::utf8_decode(output, index)}; + if (!decoded.has_value() || + !sourcemeta::core::is_ucschar(decoded.value().first)) { + return false; + } + index += decoded.value().second; } else { return false; } diff --git a/vendor/core/src/core/uri/recompose.cc b/vendor/core/src/core/uri/recompose.cc index 702e1cf82..9dd8ca2c9 100644 --- a/vendor/core/src/core/uri/recompose.cc +++ b/vendor/core/src/core/uri/recompose.cc @@ -1,3 +1,4 @@ +#include #include #include "escaping.h" @@ -16,7 +17,8 @@ namespace sourcemeta::core { namespace { auto escape_component_to_string(std::string &output, std::string_view input, - const URIEscapeMode mode) -> void { + const URIEscapeMode mode, const bool iri, + const bool allow_iprivate = false) -> void { output.reserve(output.size() + input.size() * 3); for (std::string_view::size_type index = 0; index < input.size(); ++index) { @@ -33,6 +35,21 @@ auto escape_component_to_string(std::string &output, std::string_view input, continue; } + // In IRI mode, the non-ASCII characters permitted by RFC 3987, including + // private-use characters within the query, are preserved literally rather + // than percent-encoded + if (iri && (static_cast(character) & 0x80U) != 0U) { + const auto decoded{sourcemeta::core::utf8_decode(input, index)}; + if (decoded.has_value() && + (sourcemeta::core::is_ucschar(decoded.value().first) || + (allow_iprivate && + sourcemeta::core::is_iprivate(decoded.value().first)))) { + output.append(input.substr(index, decoded.value().second)); + index += decoded.value().second - 1; + continue; + } + } + if (uri_is_unreserved(character)) { output += character; continue; @@ -96,7 +113,7 @@ auto URI::recompose() const -> std::string { result.reserve(result.size() + this->fragment_.value().size() * 3 + 1); result += '#'; escape_component_to_string(result, this->fragment_.value(), - URIEscapeMode::Fragment); + URIEscapeMode::Fragment, this->iri_); return result; } @@ -128,17 +145,20 @@ auto URI::recompose_relative() const -> std::string { encoded += character; } } - escape_component_to_string(result, encoded, URIEscapeMode::Path); + escape_component_to_string(result, encoded, URIEscapeMode::Path, + this->iri_); if (first_slash != std::string::npos) { escape_component_to_string( result, std::string_view{path_value}.substr(first_slash), - URIEscapeMode::Path); + URIEscapeMode::Path, this->iri_); } } else { - escape_component_to_string(result, path_value, URIEscapeMode::Path); + escape_component_to_string(result, path_value, URIEscapeMode::Path, + this->iri_); } } else { - escape_component_to_string(result, path_value, URIEscapeMode::Path); + escape_component_to_string(result, path_value, URIEscapeMode::Path, + this->iri_); } } @@ -146,13 +166,13 @@ auto URI::recompose_relative() const -> std::string { if (result_query.has_value()) { result += '?'; escape_component_to_string(result, result_query.value().raw(), - URIEscapeMode::Fragment); + URIEscapeMode::Fragment, this->iri_, true); } if (this->fragment_.has_value()) { result += '#'; escape_component_to_string(result, this->fragment_.value(), - URIEscapeMode::Fragment); + URIEscapeMode::Fragment, this->iri_); } return result; @@ -183,7 +203,7 @@ auto URI::recompose_without_fragment() const -> std::optional { if (user_info.has_value()) { escape_component_to_string(result, user_info.value(), - URIEscapeMode::UserInfo); + URIEscapeMode::UserInfo, this->iri_); result += '@'; } @@ -195,7 +215,7 @@ auto URI::recompose_without_fragment() const -> std::optional { result += ']'; } else { escape_component_to_string(result, result_host.value(), - URIEscapeMode::SkipSubDelims); + URIEscapeMode::SkipSubDelims, this->iri_); } } @@ -214,7 +234,8 @@ auto URI::recompose_without_fragment() const -> std::optional { if (result_path.has_value()) { const auto &path_value = result_path.value(); - escape_component_to_string(result, path_value, URIEscapeMode::Path); + escape_component_to_string(result, path_value, URIEscapeMode::Path, + this->iri_); } // Query @@ -222,7 +243,7 @@ auto URI::recompose_without_fragment() const -> std::optional { if (result_query.has_value()) { result += '?'; escape_component_to_string(result, result_query.value().raw(), - URIEscapeMode::Fragment); + URIEscapeMode::Fragment, this->iri_, true); } if (result.empty()) { diff --git a/vendor/core/src/core/uri/resolution.cc b/vendor/core/src/core/uri/resolution.cc index d102adfd7..42118b103 100644 --- a/vendor/core/src/core/uri/resolution.cc +++ b/vendor/core/src/core/uri/resolution.cc @@ -35,6 +35,9 @@ auto merge_paths(const std::string &base_path, const std::string &ref_path, namespace sourcemeta::core { auto URI::resolve_from(const URI &base) -> URI & { + // Resolving against an IRI base, or resolving an IRI reference, yields an IRI + this->iri_ = this->iri_ || base.iri_; + // RFC 3986 Section 5.2.2: Transform References // Reference has a scheme - use as-is (already absolute) @@ -329,6 +332,9 @@ auto URI::rebase(const URI &base, const URI &new_base) -> URI & { this->userinfo_ = new_base.userinfo_; this->host_ = new_base.host_; this->port_ = new_base.port_; + // The new components come from the new base, so the result is an IRI if the + // new base is one + this->iri_ = this->iri_ || new_base.iri_; std::optional new_base_path_copy{new_base.path_}; merge_new_base_path(this->path_, std::move(new_base_path_copy), @@ -354,6 +360,9 @@ auto URI::rebase(const URI &base, URI &&new_base) -> URI & { this->userinfo_ = std::move(new_base.userinfo_); this->host_ = std::move(new_base.host_); this->port_ = new_base.port_; + // The new components come from the new base, so the result is an IRI if the + // new base is one + this->iri_ = this->iri_ || new_base.iri_; merge_new_base_path(this->path_, std::move(new_base.path_), std::move(saved_path)); diff --git a/vendor/core/src/core/uri/uri.cc b/vendor/core/src/core/uri/uri.cc index 346d13c31..d096ff3ab 100644 --- a/vendor/core/src/core/uri/uri.cc +++ b/vendor/core/src/core/uri/uri.cc @@ -15,11 +15,29 @@ auto URI::from_fragment(const std::string_view fragment) -> URI { return result; } +auto URI::from_iri(const std::string_view input) -> URI { + URI result; + result.iri_ = true; + result.parse(input); + return result; +} + +auto URI::operator==(const URI &other) const noexcept -> bool { + return std::tie(this->scheme_, this->userinfo_, this->host_, this->port_, + this->path_, this->query_, this->fragment_, + this->ip_literal_) == + std::tie(other.scheme_, other.userinfo_, other.host_, other.port_, + other.path_, other.query_, other.fragment_, + other.ip_literal_); +} + auto URI::operator<(const URI &other) const noexcept -> bool { return std::tie(this->scheme_, this->userinfo_, this->host_, this->port_, - this->path_, this->query_, this->fragment_) < + this->path_, this->query_, this->fragment_, + this->ip_literal_) < std::tie(other.scheme_, other.userinfo_, other.host_, other.port_, - other.path_, other.query_, other.fragment_); + other.path_, other.query_, other.fragment_, + other.ip_literal_); } } // namespace sourcemeta::core diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h index 982a9613a..769a5764e 100644 --- a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h @@ -120,6 +120,17 @@ class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouter { const Callback &callback) const -> std::pair; + /// Determine whether a path lies within the space described by the explicitly + /// registered routes, as a whole-segment prefix of one or more routes, as an + /// exact route, or as a path captured by a route expansion. The fallback + /// registered through the catch-all is never considered. Like matching, the + /// path is not normalized before evaluation. When a base path is given, it is + /// evaluated as if prepended to the path, avoiding a concatenation at the + /// call site + [[nodiscard]] auto + describes(const std::string_view path, + const std::string_view base_path = {}) const noexcept -> bool; + /// Access the root node of the trie [[nodiscard]] auto root() const noexcept -> const Node &; @@ -203,6 +214,17 @@ class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterView { -> std::pair; + /// Determine whether a path lies within the space described by the explicitly + /// registered routes, as a whole-segment prefix of one or more routes, as an + /// exact route, or as a path captured by a route expansion. The fallback + /// registered through the catch-all is never considered. Like matching, the + /// path is not normalized before evaluation. When a base path is given, it is + /// evaluated as if prepended to the path, avoiding a concatenation at the + /// call site + [[nodiscard]] auto + describes(const std::string_view path, + const std::string_view base_path = {}) const noexcept -> bool; + /// Access the stored arguments for a given route identifier auto arguments(const URITemplateRouter::Identifier identifier, const URITemplateRouter::ArgumentCallback &callback) const diff --git a/vendor/core/src/core/uritemplate/uritemplate.cc b/vendor/core/src/core/uritemplate/uritemplate.cc index 0f4373c6c..1605e5f27 100644 --- a/vendor/core/src/core/uritemplate/uritemplate.cc +++ b/vendor/core/src/core/uritemplate/uritemplate.cc @@ -115,7 +115,7 @@ auto URITemplate::expand( for (const auto &token : this->tokens_) { std::visit( - [&result, &callback](const auto &expansion) { + [&result, &callback](const auto &expansion) -> void { using T = std::decay_t; if constexpr (std::is_same_v) { result += expansion.value; diff --git a/vendor/core/src/core/uritemplate/uritemplate_router.cc b/vendor/core/src/core/uritemplate/uritemplate_router.cc index a1aea7367..dbba8cbd6 100644 --- a/vendor/core/src/core/uritemplate/uritemplate_router.cc +++ b/vendor/core/src/core/uritemplate/uritemplate_router.cc @@ -5,6 +5,7 @@ #include // std::ranges::lower_bound, std::ranges::find_if #include // assert +#include // std::uint8_t #include // std::numeric_limits #include // std::get, std::make_tuple @@ -49,6 +50,62 @@ inline auto is_expansion_type(const NodeType type) noexcept -> bool { return type == NodeType::Expansion || type == NodeType::OptionalExpansion; } +enum class DescribeWalk : std::uint8_t { NoMatch, Captured, Reached }; + +// Walk the segments of a rooted path fragment through the trie, starting from +// the given node. A null node means the root. On a successful walk, the node is +// advanced to where the fragment ends +auto walk_describe_fragment(const Node *¤t, const Node &root, + const std::string_view fragment) noexcept + -> DescribeWalk { + if (fragment.empty()) { + return DescribeWalk::Reached; + } + + if (fragment.front() != '/') { + return DescribeWalk::NoMatch; + } + + const char *position = fragment.data() + 1; + const char *const fragment_end = fragment.data() + fragment.size(); + + if (position >= fragment_end) { + return DescribeWalk::Reached; + } + + while (true) { + const char *segment_start = position; + while (position < fragment_end && *position != '/') { + ++position; + } + const std::string_view segment{ + segment_start, static_cast(position - segment_start)}; + + const auto &literal_children = current ? current->literals : root.literals; + const auto &variable_child = current ? current->variable : root.variable; + + const Node *next = find_literal_child(literal_children, segment); + if (next == nullptr) { + if (segment.empty() || !variable_child) { + return DescribeWalk::NoMatch; + } + if (is_expansion_type(variable_child->type)) { + return DescribeWalk::Captured; + } + next = variable_child.get(); + } + + current = next; + + if (position >= fragment_end) { + break; + } + ++position; + } + + return DescribeWalk::Reached; +} + auto find_or_create_variable_child(std::unique_ptr &variable, const std::string_view name, const NodeType type, @@ -160,7 +217,7 @@ auto URITemplateRouter::context(const Identifier identifier) const -> Identifier { assert(identifier > 0); const auto entry = std::ranges::find_if( - this->entries_, [&identifier](const auto &candidate) { + this->entries_, [&identifier](const auto &candidate) -> bool { return std::get<0>(candidate) == identifier; }); assert(entry != this->entries_.end()); @@ -170,7 +227,7 @@ auto URITemplateRouter::context(const Identifier identifier) const auto URITemplateRouter::path(const Identifier identifier) const -> std::string { assert(identifier > 0); const auto entry = std::ranges::find_if( - this->entries_, [&identifier](const auto &candidate) { + this->entries_, [&identifier](const auto &candidate) -> bool { return std::get<0>(candidate) == identifier; }); assert(entry != this->entries_.end()); @@ -191,8 +248,8 @@ auto URITemplateRouter::operation_id(const Identifier identifier) const if (identifier == 0) { return {}; } - const auto entry = - std::ranges::find_if(this->operations_, [&identifier](const auto &item) { + const auto entry = std::ranges::find_if( + this->operations_, [&identifier](const auto &item) -> bool { return item.second.first == identifier; }); if (entry == this->operations_.end()) { @@ -206,8 +263,10 @@ auto URITemplateRouter::otherwise(const Identifier context, -> void { this->otherwise_.context = context; - const auto existing = std::ranges::find_if( - this->arguments_, [](const auto &entry) { return entry.first == 0; }); + const auto existing = + std::ranges::find_if(this->arguments_, [](const auto &entry) -> bool { + return entry.first == 0; + }); if (existing == this->arguments_.end()) { if (!arguments.empty()) { this->arguments_.emplace_back( @@ -277,14 +336,15 @@ auto URITemplateRouter::add(const std::string_view uri_template, this->entries_.emplace_back(identifier, context, uri_template); } else { const auto existing = std::ranges::find_if( - this->entries_, [&previous_identifier](const auto &candidate) { + this->entries_, + [&previous_identifier](const auto &candidate) -> bool { return std::get<0>(candidate) == previous_identifier; }); if (existing != this->entries_.end()) { *existing = std::make_tuple(identifier, context, uri_template); } std::erase_if(this->operations_, - [&previous_identifier](const auto &entry) { + [&previous_identifier](const auto &entry) -> bool { return entry.second.first == previous_identifier; }); } @@ -294,7 +354,7 @@ auto URITemplateRouter::add(const std::string_view uri_template, operation_id, std::pair{identifier, context}); if (!arguments.empty()) { assert(std::ranges::none_of(this->arguments_, - [&identifier](const auto &entry) { + [&identifier](const auto &entry) -> bool { return entry.first == identifier; })); this->arguments_.emplace_back( @@ -516,14 +576,15 @@ auto URITemplateRouter::add(const std::string_view uri_template, this->entries_.emplace_back(identifier, context, uri_template); } else { const auto existing = std::ranges::find_if( - this->entries_, [&previous_identifier](const auto &candidate) { + this->entries_, + [&previous_identifier](const auto &candidate) -> bool { return std::get<0>(candidate) == previous_identifier; }); if (existing != this->entries_.end()) { *existing = std::make_tuple(identifier, context, uri_template); } std::erase_if(this->operations_, - [&previous_identifier](const auto &entry) { + [&previous_identifier](const auto &entry) -> bool { return entry.second.first == previous_identifier; }); } @@ -533,7 +594,7 @@ auto URITemplateRouter::add(const std::string_view uri_template, operation_id, std::pair{identifier, context}); if (!arguments.empty()) { assert(std::ranges::none_of(this->arguments_, - [&identifier](const auto &entry) { + [&identifier](const auto &entry) -> bool { return entry.first == identifier; })); this->arguments_.emplace_back( @@ -547,6 +608,39 @@ auto URITemplateRouter::root() const noexcept -> const Node & { return this->root_; } +auto URITemplateRouter::describes( + const std::string_view path, + const std::string_view base_path) const noexcept -> bool { + const Node *current = nullptr; + + if (!base_path.empty()) { + switch (walk_describe_fragment(current, this->root_, base_path)) { + case DescribeWalk::NoMatch: + return false; + case DescribeWalk::Captured: + return true; + case DescribeWalk::Reached: + break; + } + } + + switch (walk_describe_fragment(current, this->root_, path)) { + case DescribeWalk::NoMatch: + return false; + case DescribeWalk::Captured: + return true; + case DescribeWalk::Reached: + break; + } + + if (current == nullptr) { + return this->root_.identifier != 0 || !this->root_.literals.empty() || + this->root_.variable != nullptr; + } + + return true; +} + auto URITemplateRouter::arguments(const Identifier identifier, const ArgumentCallback &callback) const -> void { diff --git a/vendor/core/src/core/uritemplate/uritemplate_router_view.cc b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc index 4020dec4f..7b9badaec 100644 --- a/vendor/core/src/core/uritemplate/uritemplate_router_view.cc +++ b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc @@ -162,6 +162,83 @@ inline auto binary_search_literal_children( return NO_CHILD; } +enum class DescribeWalk : std::uint8_t { NoMatch, Captured, Reached }; + +// Walk the segments of a rooted path fragment through the serialized trie, +// starting from the given node index. On a successful walk, the node index is +// advanced to where the fragment ends +inline auto walk_describe_fragment(const SerializedNode *nodes, + const std::uint32_t node_count, + const char *string_table, + const std::size_t string_table_size, + std::uint32_t ¤t_node, + const std::string_view fragment) noexcept + -> DescribeWalk { + if (fragment.empty()) { + return DescribeWalk::Reached; + } + + if (fragment.front() != '/') { + return DescribeWalk::NoMatch; + } + + const char *position = fragment.data() + 1; + const char *const fragment_end = fragment.data() + fragment.size(); + + if (position >= fragment_end) { + return DescribeWalk::Reached; + } + + while (true) { + const char *segment_start = position; + while (position < fragment_end && *position != '/') { + ++position; + } + const auto segment_length = + static_cast(position - segment_start); + + const auto &node = nodes[current_node]; + + std::uint32_t literal_match = NO_CHILD; + if (node.first_literal_child != NO_CHILD) { + if (node.first_literal_child >= node_count || + node.literal_child_count > node_count - node.first_literal_child) { + return DescribeWalk::NoMatch; + } + literal_match = binary_search_literal_children( + nodes, string_table, string_table_size, node.first_literal_child, + node.literal_child_count, segment_start, segment_length); + } + + if (literal_match != NO_CHILD) { + current_node = literal_match; + } else if (segment_length > 0 && node.variable_child != NO_CHILD) { + if (node.variable_child >= node_count) { + return DescribeWalk::NoMatch; + } + const auto &variable_node = nodes[node.variable_child]; + if (variable_node.string_offset > string_table_size || + variable_node.string_length > + string_table_size - variable_node.string_offset) { + return DescribeWalk::NoMatch; + } + if (is_expansion_type(variable_node.type)) { + return DescribeWalk::Captured; + } + current_node = node.variable_child; + } else { + return DescribeWalk::NoMatch; + } + + if (position >= fragment_end) { + break; + } + ++position; + } + + return DescribeWalk::Reached; +} + } // namespace auto URITemplateRouterView::save(const URITemplateRouter &router, @@ -323,7 +400,8 @@ auto URITemplateRouterView::save(const URITemplateRouter &router, } std::ranges::sort( - operation_entries, {}, [&string_table](const OperationEntry &entry) { + operation_entries, {}, + [&string_table](const OperationEntry &entry) -> std::string_view { return std::string_view{string_table.data() + entry.string_offset, entry.string_length}; }); @@ -604,6 +682,78 @@ auto URITemplateRouterView::match( final_node.context); } +auto URITemplateRouterView::describes( + const std::string_view path, + const std::string_view base_path) const noexcept -> bool { + if (this->size_ < sizeof(RouterHeader)) { + return false; + } + + const auto *header = reinterpret_cast(this->data_); + if (header->magic != ROUTER_MAGIC || header->version != ROUTER_VERSION) { + return false; + } + + const auto node_count = header->node_count; + if (node_count == 0 || node_count > (this->size_ - sizeof(RouterHeader)) / + sizeof(SerializedNode)) { + return false; + } + + const auto *nodes = reinterpret_cast( + this->data_ + sizeof(RouterHeader)); + const auto nodes_size = + static_cast(node_count) * sizeof(SerializedNode); + const auto expected_string_table_offset = sizeof(RouterHeader) + nodes_size; + if (header->string_table_offset < expected_string_table_offset || + header->string_table_offset > this->size_) { + return false; + } + + if (header->arguments_offset < header->string_table_offset || + header->arguments_offset > this->size_) { + return false; + } + + const auto *string_table = + reinterpret_cast(this->data_ + header->string_table_offset); + const auto string_table_size = + header->arguments_offset - header->string_table_offset; + + std::uint32_t current_node = 0; + + if (!base_path.empty()) { + switch (walk_describe_fragment(nodes, node_count, string_table, + string_table_size, current_node, + base_path)) { + case DescribeWalk::NoMatch: + return false; + case DescribeWalk::Captured: + return true; + case DescribeWalk::Reached: + break; + } + } + + switch (walk_describe_fragment(nodes, node_count, string_table, + string_table_size, current_node, path)) { + case DescribeWalk::NoMatch: + return false; + case DescribeWalk::Captured: + return true; + case DescribeWalk::Reached: + break; + } + + if (current_node == 0) { + const auto &root = nodes[0]; + return root.identifier != 0 || root.first_literal_child != NO_CHILD || + root.variable_child != NO_CHILD; + } + + return true; +} + auto URITemplateRouterView::arguments( const URITemplateRouter::Identifier identifier, const URITemplateRouter::ArgumentCallback &callback) const -> void { diff --git a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h index 5c7a76fe3..d25178923 100644 --- a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h +++ b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h @@ -7,8 +7,10 @@ #include // std::uint64_t #include // std::exception +#include // std::filesystem::path #include // std::string #include // std::string_view +#include // std::move namespace sourcemeta::core { @@ -72,6 +74,34 @@ class SOURCEMETA_CORE_YAML_EXPORT YAMLParseError : public std::exception { const char *message_; }; +/// @ingroup yaml +/// An error that represents a YAML parse error occurring from parsing a file +class SOURCEMETA_CORE_YAML_EXPORT YAMLFileParseError : public YAMLParseError { +public: + YAMLFileParseError(std::filesystem::path path, const std::uint64_t line, + const std::uint64_t column, const char *message) + : YAMLParseError{line, column, message}, path_{std::move(path)} {} + YAMLFileParseError(std::filesystem::path path, const std::uint64_t line, + const std::uint64_t column, std::string message) = delete; + YAMLFileParseError(std::filesystem::path path, const std::uint64_t line, + const std::uint64_t column, + std::string &&message) = delete; + YAMLFileParseError(std::filesystem::path path, const std::uint64_t line, + const std::uint64_t column, + std::string_view message) = delete; + + YAMLFileParseError(std::filesystem::path path, const YAMLParseError &parent) + : YAMLParseError{parent.line(), parent.column(), parent.what()}, + path_{std::move(path)} {} + + [[nodiscard]] auto path() const noexcept -> const std::filesystem::path & { + return this->path_; + } + +private: + std::filesystem::path path_; +}; + /// @ingroup yaml /// An error that represents an unknown anchor reference in YAML class SOURCEMETA_CORE_YAML_EXPORT YAMLUnknownAnchorError diff --git a/vendor/core/src/core/yaml/parser.h b/vendor/core/src/core/yaml/parser.h index 0686de1da..2fcafd6d4 100644 --- a/vendor/core/src/core/yaml/parser.h +++ b/vendor/core/src/core/yaml/parser.h @@ -347,7 +347,13 @@ class Parser { if (this->recording_anchor_) { this->current_anchor_callbacks_.push_back( - {phase, type, line, column, context, index, std::string{property}}); + {.phase = phase, + .type = type, + .line = line, + .column = column, + .context = context, + .index = index, + .property = std::string{property}}); } } diff --git a/vendor/core/src/core/yaml/yaml.cc b/vendor/core/src/core/yaml/yaml.cc index 5ea5369f9..4eabe5e01 100644 --- a/vendor/core/src/core/yaml/yaml.cc +++ b/vendor/core/src/core/yaml/yaml.cc @@ -33,13 +33,18 @@ auto parse_yaml(const JSON::String &input) -> JSON { auto read_yaml(const std::filesystem::path &path) -> JSON { const auto input{read_file_to_string(path)}; - yaml::Lexer lexer{input}; - yaml::Parser parser{&lexer, nullptr}; - auto result{parser.parse()}; + try { + yaml::Lexer lexer{input}; + yaml::Parser parser{&lexer, nullptr}; + auto result{parser.parse()}; - parser.validate_end_of_stream(); + parser.validate_end_of_stream(); - return result; + return result; + } catch (const YAMLParseError &error) { + // For producing better error messages + throw YAMLFileParseError(path, error); + } } auto parse_yaml(std::basic_istream &stream, @@ -67,11 +72,16 @@ auto read_yaml(const std::filesystem::path &path, JSON &output, const JSON::ParseCallback &callback) -> void { const auto input{read_file_to_string(path)}; - yaml::Lexer lexer{input}; - yaml::Parser parser{&lexer, &callback}; - output = parser.parse(); - - parser.validate_end_of_stream(); + try { + yaml::Lexer lexer{input}; + yaml::Parser parser{&lexer, &callback}; + output = parser.parse(); + + parser.validate_end_of_stream(); + } catch (const YAMLParseError &error) { + // For producing better error messages + throw YAMLFileParseError(path, error); + } } auto read_yaml_or_json(const std::filesystem::path &path) -> JSON { diff --git a/vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_util.h b/vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_util.h index 8e5ca6a9d..f45f393f1 100644 --- a/vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_util.h +++ b/vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_util.h @@ -248,7 +248,7 @@ auto count_multiples(const Minimum &minimum, const Maximum &maximum, /// @ingroup numeric /// The maximum value representable by an unsigned integer of T bits template -constexpr auto uint_max = [] { +constexpr auto uint_max = []() -> std::uint64_t { static_assert(T > 0 && T < 64, "uint_max requires 0 < T < 64"); return (std::uint64_t{1} << T) - 1; }(); diff --git a/vendor/core/src/lang/process/spawn.cc b/vendor/core/src/lang/process/spawn.cc index 849128337..43cb1aaa1 100644 --- a/vendor/core/src/lang/process/spawn.cc +++ b/vendor/core/src/lang/process/spawn.cc @@ -168,8 +168,14 @@ auto spawn(const std::string &program, std::filesystem::current_path()}; std::filesystem::current_path(directory); #else + // The standardized child-directory file action is not yet provided by every + // system and toolchain this builds on, so we keep using the long-standing + // platform extension and silence the deprecation that newer SDKs attach to it +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" const int addchdir_result{ posix_spawn_file_actions_addchdir_np(&file_actions, directory.c_str())}; +#pragma GCC diagnostic pop if (addchdir_result != 0) { posix_spawn_file_actions_destroy(&file_actions); posix_spawnattr_destroy(&attributes); diff --git a/vendor/core/src/lang/text/include/sourcemeta/core/text.h b/vendor/core/src/lang/text/include/sourcemeta/core/text.h index a5dab4ff8..93e316804 100644 --- a/vendor/core/src/lang/text/include/sourcemeta/core/text.h +++ b/vendor/core/src/lang/text/include/sourcemeta/core/text.h @@ -169,6 +169,145 @@ inline auto is_lowercase(const String &value) noexcept -> bool { SOURCEMETA_CORE_TEXT_EXPORT auto is_lowercase(const std::filesystem::path &value) noexcept -> bool; +/// @ingroup text +/// +/// Return whether a character is an ASCII letter (A-Z or a-z). For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// assert(sourcemeta::core::is_alpha('a')); +/// assert(sourcemeta::core::is_alpha('Z')); +/// assert(!sourcemeta::core::is_alpha('5')); +/// ``` +template + requires std::same_as || + std::same_as || + std::same_as || + std::same_as +inline constexpr auto is_alpha(const Character character) noexcept -> bool { + return (character >= 'a' && character <= 'z') || + (character >= 'A' && character <= 'Z'); +} + +/// @ingroup text +/// +/// Return whether a string is non-empty and consists entirely of ASCII letters +/// (A-Z or a-z). An empty string is not considered a match. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// assert(sourcemeta::core::is_alpha("abc")); +/// assert(!sourcemeta::core::is_alpha("ab1")); +/// assert(!sourcemeta::core::is_alpha("")); +/// ``` +inline constexpr auto is_alpha(const std::string_view value) noexcept -> bool { + if (value.empty()) { + return false; + } + for (const auto character : value) { + if (!is_alpha(character)) { + return false; + } + } + return true; +} + +/// @ingroup text +/// +/// Return whether a character is an ASCII digit (0-9). For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// assert(sourcemeta::core::is_digit('5')); +/// assert(!sourcemeta::core::is_digit('a')); +/// ``` +template + requires std::same_as || + std::same_as || + std::same_as || + std::same_as +inline constexpr auto is_digit(const Character character) noexcept -> bool { + return character >= '0' && character <= '9'; +} + +/// @ingroup text +/// +/// Return whether a string is non-empty and consists entirely of ASCII digits +/// (0-9). An empty string is not considered a match. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// assert(sourcemeta::core::is_digit("123")); +/// assert(!sourcemeta::core::is_digit("12a")); +/// assert(!sourcemeta::core::is_digit("")); +/// ``` +inline constexpr auto is_digit(const std::string_view value) noexcept -> bool { + if (value.empty()) { + return false; + } + for (const auto character : value) { + if (!is_digit(character)) { + return false; + } + } + return true; +} + +/// @ingroup text +/// +/// Return whether a character is an ASCII letter or digit. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// assert(sourcemeta::core::is_alphanum('a')); +/// assert(sourcemeta::core::is_alphanum('5')); +/// assert(!sourcemeta::core::is_alphanum('-')); +/// ``` +template + requires std::same_as || + std::same_as || + std::same_as || + std::same_as +inline constexpr auto is_alphanum(const Character character) noexcept -> bool { + return is_alpha(character) || is_digit(character); +} + +/// @ingroup text +/// +/// Return whether a string is non-empty and consists entirely of ASCII letters +/// or digits. An empty string is not considered a match. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// assert(sourcemeta::core::is_alphanum("abc123")); +/// assert(!sourcemeta::core::is_alphanum("abc-123")); +/// assert(!sourcemeta::core::is_alphanum("")); +/// ``` +inline constexpr auto is_alphanum(const std::string_view value) noexcept + -> bool { + if (value.empty()) { + return false; + } + for (const auto character : value) { + if (!is_alphanum(character)) { + return false; + } + } + return true; +} + /// @ingroup text /// /// Truncate a string in place to at most `maximum_length` bytes, appending