Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/core/jsonschema/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ include(./official_resolver.cmake)

sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jsonschema
PRIVATE_HEADERS bundle.h walker.h frame.h error.h
types.h transform.h
SOURCES jsonschema.cc official_walker.cc frame.cc
walker.cc bundle.cc transformer.cc format.cc
types.h transform.h vocabularies.h
SOURCES jsonschema.cc vocabularies.cc official_walker.cc
frame.cc walker.cc bundle.cc transformer.cc format.cc
"${CMAKE_CURRENT_BINARY_DIR}/official_resolver.cc")

if(SOURCEMETA_CORE_INSTALL)
Expand Down
18 changes: 7 additions & 11 deletions src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,17 @@

#include <sourcemeta/core/json.h>
#include <sourcemeta/core/jsonpointer.h>
#include <sourcemeta/core/jsonschema_vocabularies.h>

#include <cstdint> // std::uint8_t
#include <functional> // std::function, std::reference_wrapper
#include <optional> // std::optional
#include <set> // std::set
#include <string> // std::string
#include <string_view> // std::string_view
#include <unordered_map> // std::unordered_map
#include <cstdint> // std::uint8_t
#include <functional> // std::function, std::reference_wrapper
#include <optional> // std::optional
#include <set> // std::set
#include <string> // std::string
#include <string_view> // std::string_view

namespace sourcemeta::core {

/// @ingroup jsonschema
/// A set of vocabularies
using Vocabularies = std::unordered_map<JSON::String, bool>;

// Take a URI and get back a schema
/// @ingroup jsonschema
///
Expand Down
125 changes: 125 additions & 0 deletions src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#ifndef SOURCEMETA_CORE_JSONSCHEMA_VOCABULARIES_H_
#define SOURCEMETA_CORE_JSONSCHEMA_VOCABULARIES_H_

#ifndef SOURCEMETA_CORE_JSONSCHEMA_EXPORT
#include <sourcemeta/core/jsonschema_export.h>
#endif

#include <sourcemeta/core/json.h>

#include <bitset> // std::bitset
#include <cassert> // assert
#include <cstdint> // std::uint32_t, std::size_t
#include <optional> // std::optional
#include <stdexcept> // std::out_of_range
#include <string> // std::string
#include <string_view> // std::string_view
#include <unordered_map> // std::unordered_map
#include <utility> // std::pair
#include <vector> // std::vector

namespace sourcemeta::core {

/// @ingroup jsonschema
/// Optimized vocabulary set using bitflags for known vocabularies
/// and a fallback `std::unordered_map` for custom vocabularies.
///
/// TODO: To maximize performance gains, convert string-based vocabulary checks
/// throughout the codebase to use enum-based methods.
struct SOURCEMETA_CORE_JSONSCHEMA_EXPORT Vocabularies {
enum class Known : std::uint8_t {
// Pre-vocabulary dialects (treated as vocabularies)
JSON_Schema_Draft_0 = 0,
JSON_Schema_Draft_0_Hyper = 1,
JSON_Schema_Draft_1 = 2,
JSON_Schema_Draft_1_Hyper = 3,
JSON_Schema_Draft_2 = 4,
JSON_Schema_Draft_2_Hyper = 5,
JSON_Schema_Draft_3 = 6,
JSON_Schema_Draft_3_Hyper = 7,
JSON_Schema_Draft_4 = 8,
JSON_Schema_Draft_4_Hyper = 9,
JSON_Schema_Draft_6 = 10,
JSON_Schema_Draft_6_Hyper = 11,
JSON_Schema_Draft_7 = 12,
JSON_Schema_Draft_7_Hyper = 13,
// 2019-09 vocabularies
JSON_Schema_2019_09_Core = 14,
JSON_Schema_2019_09_Applicator = 15,
JSON_Schema_2019_09_Validation = 16,
JSON_Schema_2019_09_Meta_Data = 17,
JSON_Schema_2019_09_Format = 18,
JSON_Schema_2019_09_Content = 19,
JSON_Schema_2019_09_Hyper_Schema = 20,
// 2020-12 vocabularies
JSON_Schema_2020_12_Core = 21,
JSON_Schema_2020_12_Applicator = 22,
JSON_Schema_2020_12_Unevaluated = 23,
JSON_Schema_2020_12_Validation = 24,
JSON_Schema_2020_12_Meta_Data = 25,
JSON_Schema_2020_12_Format_Annotation = 26,
JSON_Schema_2020_12_Format_Assertion = 27,
JSON_Schema_2020_12_Content = 28
};

// NOTE: Must be kept in sync with the Known enum above
static constexpr std::size_t KNOWN_VOCABULARY_COUNT = 29;

public:
Vocabularies() = default;
Vocabularies(const Vocabularies &) = default;
Vocabularies(Vocabularies &&) noexcept = default;
auto operator=(const Vocabularies &) -> Vocabularies & = default;
auto operator=(Vocabularies &&) noexcept -> Vocabularies & = default;
~Vocabularies() = default;

/// Construct from initializer list
Vocabularies(std::initializer_list<std::pair<JSON::String, bool>> init);

/// Construct from initializer list using known vocabulary enums
Vocabularies(std::initializer_list<std::pair<Known, bool>> init);

/// Check if a vocabulary is enabled
[[nodiscard]] auto contains(const JSON::String &uri) const noexcept -> bool;

/// Check if a known vocabulary is enabled
[[nodiscard]] auto contains(Known vocabulary) const noexcept -> bool;

/// Insert a vocabulary with its required/optional status
auto insert(const JSON::String &uri, bool required) noexcept -> void;

/// Insert a known vocabulary with its required/optional status
auto insert(Known vocabulary, bool required) noexcept -> void;

/// Get vocabulary status by URI
[[nodiscard]] auto get(const JSON::String &uri) const noexcept
-> std::optional<bool>;

/// Get known vocabulary status
[[nodiscard]] auto get(Known vocabulary) const noexcept
-> std::optional<bool>;

/// Get the number of vocabularies (required + optional + custom)
[[nodiscard]] auto size() const noexcept -> std::size_t;

/// Check if there are no vocabularies
[[nodiscard]] auto empty() const noexcept -> bool;

private:
// Invariant: required_known and optional_known must be mutually exclusive
// A vocabulary can be either required (true) OR optional (false), never both
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4251)
#endif
std::bitset<KNOWN_VOCABULARY_COUNT> required_known{};
std::bitset<KNOWN_VOCABULARY_COUNT> optional_known{};
std::unordered_map<JSON::String, bool> custom;
#ifdef _MSC_VER
#pragma warning(pop)
#endif
};

} // namespace sourcemeta::core

#endif
111 changes: 86 additions & 25 deletions src/core/jsonschema/jsonschema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,18 +259,67 @@ auto sourcemeta::core::base_dialect(
}

namespace {
auto core_vocabulary(std::string_view base_dialect) -> std::string {
auto core_vocabulary_known(std::string_view base_dialect)
-> sourcemeta::core::Vocabularies::Known {
if (base_dialect == "https://json-schema.org/draft/2020-12/schema" ||
base_dialect == "https://json-schema.org/draft/2020-12/hyper-schema") {
return "https://json-schema.org/draft/2020-12/vocab/core";
return sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core;
} else if (base_dialect == "https://json-schema.org/draft/2019-09/schema" ||
base_dialect ==
"https://json-schema.org/draft/2019-09/hyper-schema") {
return "https://json-schema.org/draft/2019-09/vocab/core";
return sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core;
} else {
throw sourcemeta::core::SchemaBaseDialectError(std::string{base_dialect});
}
}

auto dialect_to_known(std::string_view dialect)
-> std::optional<sourcemeta::core::Vocabularies::Known> {
using sourcemeta::core::Vocabularies;
if (dialect == "http://json-schema.org/draft-07/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_7;
}
if (dialect == "http://json-schema.org/draft-07/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_7_Hyper;
}
if (dialect == "http://json-schema.org/draft-06/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_6;
}
if (dialect == "http://json-schema.org/draft-06/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_6_Hyper;
}
if (dialect == "http://json-schema.org/draft-04/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_4;
}
if (dialect == "http://json-schema.org/draft-04/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_4_Hyper;
}
if (dialect == "http://json-schema.org/draft-03/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_3;
}
if (dialect == "http://json-schema.org/draft-03/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_3_Hyper;
}
if (dialect == "http://json-schema.org/draft-02/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_2;
}
if (dialect == "http://json-schema.org/draft-02/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_2_Hyper;
}
if (dialect == "http://json-schema.org/draft-01/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_1;
}
if (dialect == "http://json-schema.org/draft-01/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_1_Hyper;
}
if (dialect == "http://json-schema.org/draft-00/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_0;
}
if (dialect == "http://json-schema.org/draft-00/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_0_Hyper;
}
return std::nullopt;
}
} // namespace

auto sourcemeta::core::vocabularies(
Expand Down Expand Up @@ -304,21 +353,22 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver,
// As a performance optimization shortcut
if (base_dialect == dialect) {
if (dialect == "https://json-schema.org/draft/2020-12/schema") {
return {{"https://json-schema.org/draft/2020-12/vocab/core", true},
{"https://json-schema.org/draft/2020-12/vocab/applicator", true},
{"https://json-schema.org/draft/2020-12/vocab/unevaluated", true},
{"https://json-schema.org/draft/2020-12/vocab/validation", true},
{"https://json-schema.org/draft/2020-12/vocab/meta-data", true},
{"https://json-schema.org/draft/2020-12/vocab/format-annotation",
true},
{"https://json-schema.org/draft/2020-12/vocab/content", true}};
return Vocabularies{
{Vocabularies::Known::JSON_Schema_2020_12_Core, true},
{Vocabularies::Known::JSON_Schema_2020_12_Applicator, true},
{Vocabularies::Known::JSON_Schema_2020_12_Unevaluated, true},
{Vocabularies::Known::JSON_Schema_2020_12_Validation, true},
{Vocabularies::Known::JSON_Schema_2020_12_Meta_Data, true},
{Vocabularies::Known::JSON_Schema_2020_12_Format_Annotation, true},
{Vocabularies::Known::JSON_Schema_2020_12_Content, true}};
} else if (dialect == "https://json-schema.org/draft/2019-09/schema") {
return {{"https://json-schema.org/draft/2019-09/vocab/core", true},
{"https://json-schema.org/draft/2019-09/vocab/applicator", true},
{"https://json-schema.org/draft/2019-09/vocab/validation", true},
{"https://json-schema.org/draft/2019-09/vocab/meta-data", true},
{"https://json-schema.org/draft/2019-09/vocab/format", false},
{"https://json-schema.org/draft/2019-09/vocab/content", true}};
return Vocabularies{
{Vocabularies::Known::JSON_Schema_2019_09_Core, true},
{Vocabularies::Known::JSON_Schema_2019_09_Applicator, true},
{Vocabularies::Known::JSON_Schema_2019_09_Validation, true},
{Vocabularies::Known::JSON_Schema_2019_09_Meta_Data, true},
{Vocabularies::Known::JSON_Schema_2019_09_Format, false},
{Vocabularies::Known::JSON_Schema_2019_09_Content, true}};
}
}

Expand All @@ -336,7 +386,11 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver,
dialect == "http://json-schema.org/draft-02/schema#" ||
dialect == "http://json-schema.org/draft-01/schema#" ||
dialect == "http://json-schema.org/draft-00/schema#") {
return {{dialect, true}};
const auto known = dialect_to_known(dialect);
if (known.has_value()) {
return Vocabularies{{known.value(), true}};
}
return Vocabularies{{dialect, true}};
}

/*
Expand All @@ -356,7 +410,11 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver,
base_dialect == "http://json-schema.org/draft-02/hyper-schema#" ||
base_dialect == "http://json-schema.org/draft-01/hyper-schema#" ||
base_dialect == "http://json-schema.org/draft-00/hyper-schema#") {
return {{base_dialect, true}};
const auto known = dialect_to_known(base_dialect);
if (known.has_value()) {
return Vocabularies{{known.value(), true}};
}
return Vocabularies{{base_dialect, true}};
}

/*
Expand Down Expand Up @@ -384,25 +442,28 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver,
*/

Vocabularies result;
const std::string core{core_vocabulary(base_dialect)};
const auto core{core_vocabulary_known(base_dialect)};
if (schema_dialect.defines("$vocabulary")) {
const sourcemeta::core::JSON &vocabularies{
schema_dialect.at("$vocabulary")};
assert(vocabularies.is_object());
for (const auto &entry : vocabularies.as_object()) {
result.insert({entry.first, entry.second.to_boolean()});
result.insert(entry.first, entry.second.to_boolean());
}
} else {
result.insert({core, true});
result.insert(core, true);
}

// The specification recommends these checks
if (!result.contains(core)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we take advantage of your bitsets here? See that we get the core vocabulary for a dialect using core_vocabulary(). That one could return a known vocabulary enum member, and thus check containment using the bit maps?

throw sourcemeta::core::SchemaError(
"The core vocabulary must always be present");
} else if (!result.at(core)) {
throw sourcemeta::core::SchemaError(
"The core vocabulary must always be required");
} else {
const auto core_status{result.get(core)};
if (core_status.has_value() && !core_status.value()) {
throw sourcemeta::core::SchemaError(
"The core vocabulary must always be required");
}
}

return result;
Expand Down
Loading