From 45da0bc3f8512ff1136f72cc670227fdbc0a8997 Mon Sep 17 00:00:00 2001 From: Chris Fretz Date: Thu, 26 Mar 2020 13:16:41 -0400 Subject: [PATCH] Added a fuzz-tested buffer validation API Intended use case for dart::is_valid and dart::validate is to enable interacting with buffers from untrusted sources --- .travis.yml | 48 ++- benchmark/CMakeLists.txt | 1 + benchmark/configurable.cc | 298 ++++++++++------ include/dart.h | 470 +++++++++++++++++++++++++- include/dart/abi.h | 18 + include/dart/api.tcc | 12 +- include/dart/array.tcc | 90 +++++ include/dart/common.h | 64 +++- include/dart/connector/json.tcc | 36 ++ include/dart/object.tcc | 120 +++++++ include/dart/primitive.tcc | 29 ++ include/dart/shim.h | 2 +- include/dart/string.tcc | 50 +++ src/generic.cc | 4 + test/arr_unit_tests.cc | 6 +- test/dcm_unit_tests.cc | 2 +- test/fuzz_tests.cc | 95 ++++++ test/fuzzer_input/cyclic_case_1.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_10.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_11.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_12.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_13.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_14.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_15.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_16.dart | Bin 0 -> 1481 bytes test/fuzzer_input/cyclic_case_17.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_2.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_3.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_4.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_5.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_6.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_7.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_8.dart | Bin 0 -> 1464 bytes test/fuzzer_input/cyclic_case_9.dart | Bin 0 -> 1464 bytes test/fuzzer_input/test_case_0.dart | Bin 0 -> 1640 bytes test/fuzzer_input/test_case_1.dart | Bin 0 -> 1632 bytes test/fuzzer_input/test_case_2.dart | Bin 0 -> 1656 bytes test/fuzzer_input/test_case_3.dart | Bin 0 -> 1536 bytes test/fuzzer_input/test_case_4.dart | Bin 0 -> 1664 bytes test/fuzzer_input/test_case_5.dart | Bin 0 -> 1560 bytes test/fuzzer_input/test_case_6.dart | Bin 0 -> 1664 bytes test/fuzzer_input/test_case_7.dart | Bin 0 -> 1464 bytes test/fuzzer_input/test_case_8.dart | Bin 0 -> 1456 bytes test/fuzzer_input/test_case_9.dart | Bin 0 -> 1568 bytes test/generic_abi_unit_tests.cc | 33 ++ test/int_unit_tests.cc | 4 +- test/json_test.cc | 58 +++- test/obj_unit_tests.cc | 80 ++++- test/str_unit_tests.cc | 4 +- 49 files changed, 1376 insertions(+), 148 deletions(-) create mode 100644 test/fuzz_tests.cc create mode 100644 test/fuzzer_input/cyclic_case_1.dart create mode 100644 test/fuzzer_input/cyclic_case_10.dart create mode 100644 test/fuzzer_input/cyclic_case_11.dart create mode 100644 test/fuzzer_input/cyclic_case_12.dart create mode 100644 test/fuzzer_input/cyclic_case_13.dart create mode 100644 test/fuzzer_input/cyclic_case_14.dart create mode 100644 test/fuzzer_input/cyclic_case_15.dart create mode 100644 test/fuzzer_input/cyclic_case_16.dart create mode 100644 test/fuzzer_input/cyclic_case_17.dart create mode 100644 test/fuzzer_input/cyclic_case_2.dart create mode 100644 test/fuzzer_input/cyclic_case_3.dart create mode 100644 test/fuzzer_input/cyclic_case_4.dart create mode 100644 test/fuzzer_input/cyclic_case_5.dart create mode 100644 test/fuzzer_input/cyclic_case_6.dart create mode 100644 test/fuzzer_input/cyclic_case_7.dart create mode 100644 test/fuzzer_input/cyclic_case_8.dart create mode 100644 test/fuzzer_input/cyclic_case_9.dart create mode 100644 test/fuzzer_input/test_case_0.dart create mode 100644 test/fuzzer_input/test_case_1.dart create mode 100644 test/fuzzer_input/test_case_2.dart create mode 100644 test/fuzzer_input/test_case_3.dart create mode 100644 test/fuzzer_input/test_case_4.dart create mode 100644 test/fuzzer_input/test_case_5.dart create mode 100644 test/fuzzer_input/test_case_6.dart create mode 100644 test/fuzzer_input/test_case_7.dart create mode 100644 test/fuzzer_input/test_case_8.dart create mode 100644 test/fuzzer_input/test_case_9.dart diff --git a/.travis.yml b/.travis.yml index b6d5f8d..f8b1093 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,6 +64,20 @@ install: # Build matrix matrix: include: + - env: + VERSION: 7 + FUZZ: true + os: linux + compiler: clang + addons: + apt: + sources: + - llvm-toolchain-xenial-7 + - ubuntu-toolchain-r-test + packages: + - rapidjson-dev + - clang-7 + - g++-7 - env: VERSION: 5 USE_GSL_LITE: true @@ -215,26 +229,38 @@ matrix: env: USE_GSL_LITE: true IS_SLOW_AS_MOLASSES: true + - os: osx + compiler: clang + osx_image: xcode11.3 + env: + IS_SLOW_AS_MOLASSES: true # Build and run. script: - - mkdir build - - cd build - - while true; do sleep 60; echo 'still here!'; done & - - KEEPALIVE_PID=$! - | - if [ "${IS_SLOW_AS_MOLASSES}" = "true" ]; then - cmake .. -Dbenchmark=ON -Duse_sajson=ON -Dbuild_abi=ON -Duse_asan=ON + if [ "${FUZZ}" = "true" ]; then + cd test + clang++ -std=c++14 -g -O3 -fsanitize=fuzzer,address -o fuzz fuzz_tests.cc + mkdir discoveries + timeout --preserve-status 600 ./fuzz fuzzer_input discoveries else - cmake .. -Dbenchmark=ON -Duse_sajson=ON -Dextended_test=ON -Dbuild_abi=ON -Dgen_coverage=ON -Duse_asan=ON + mkdir build + cd build + while true; do sleep 60; echo 'still here!'; done & + KEEPALIVE_PID=$! + if [ "${IS_SLOW_AS_MOLASSES}" = "true" ]; then + cmake .. -Dbenchmark=ON -Duse_sajson=ON -Dbuild_abi=ON -Duse_asan=ON + else + cmake .. -Dbenchmark=ON -Duse_sajson=ON -Dextended_test=ON -Dbuild_abi=ON -Dgen_coverage=ON -Duse_asan=ON + fi + make VERBOSE=1 + kill -9 $KEEPALIVE_PID + ctest --output-on-failure fi - - make VERBOSE=1 - - kill -9 $KEEPALIVE_PID - - ctest --output-on-failure - - cd .. after_success: - | + cd .. if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "${GEN_COVERAGE}" = "true" ]; then coveralls --gcov gcov-7 -r . -b build/ -i include/ -i src/ -E '.*(catch|variant).*' --gcov-options '\-lp' fi diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 06922d1..bf4fc1d 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -38,6 +38,7 @@ endif (libyaml) # Link against flatbuffers if we have it if (libflatbuffers) target_compile_options(static_bench PUBLIC -DDART_HAS_FLEXBUFFERS) + target_compile_options(configurable_bench PUBLIC -DDART_HAS_FLEXBUFFERS) endif () # Handle YAJL diff --git a/benchmark/configurable.cc b/benchmark/configurable.cc index c5391ba..76681f2 100644 --- a/benchmark/configurable.cc +++ b/benchmark/configurable.cc @@ -22,114 +22,20 @@ #include #endif +#if DART_HAS_FLEXBUFFERS +#include +#endif + /*----- Local Includes -----*/ #include "../include/dart.h" -/*----- Helpers -----*/ - -template -constexpr bool all_equal(Idx) { - return true; -} -template -constexpr bool all_equal(Lhs lidx, Rhs ridx, Idxs... lens) { - return lidx == ridx && all_equal(ridx, lens...); -} +/*----- Globals -----*/ auto extract_directory(dart::shim::string_view path) noexcept { return path.substr(0, path.find_last_of("/")); } -template -constexpr void for_multi(Func&& cb, Containers&&... cs) { - // Setup ADL. - using std::end; - using std::begin; - - // Validation. - if (!all_equal(cs.size()...)) { - throw std::invalid_argument("All containers must be of the same size"); - } - - // Get a tuple of iterators. - [&] (auto first, auto... rest) { - // Get an end iterator to use as a sentinel. - auto& end = std::get<1>(first); - auto& curr = std::get<0>(first); - - // Walk each iterator along together. - while (curr != end) { - // Dereference. - cb(*curr, *std::get<0>(rest)...); - - // Increment our iterators. - ++curr; - (void) std::initializer_list {(++std::get<0>(rest), 0)...}; - } - }(std::make_tuple(begin(cs), end(cs))...); -} - -// This benchmark is getting out of hand. -#ifdef DART_HAS_YAJL -void yajl_serialize(yajl_val curr, yajl_gen handle) { - yajl_gen_status ret; - switch (curr->type) { - case yajl_t_object: - { - ret = yajl_gen_map_open(handle); - assert(ret == yajl_gen_status_ok); - auto* obj = YAJL_GET_OBJECT(curr); - for (auto i = 0U; i < obj->len; ++i) { - auto* key = obj->keys[i]; - ret = yajl_gen_string(handle, reinterpret_cast(key), strlen(key)); - assert(ret == yajl_gen_status_ok); - yajl_serialize(obj->values[i], handle); - } - ret = yajl_gen_map_close(handle); - assert(ret == yajl_gen_status_ok); - } - break; - case yajl_t_array: - { - ret = yajl_gen_array_open(handle); - assert(ret == yajl_gen_status_ok); - auto* arr = YAJL_GET_ARRAY(curr); - for (auto i = 0U; i < arr->len; ++i) yajl_serialize(arr->values[i], handle); - ret = yajl_gen_array_close(handle); - assert(ret == yajl_gen_status_ok); - } - break; - case yajl_t_string: - { - auto* str = YAJL_GET_STRING(curr); - ret = yajl_gen_string(handle, reinterpret_cast(str), strlen(str)); - } - break; - case yajl_t_number: - if (YAJL_IS_INTEGER(curr)) { - ret = yajl_gen_integer(handle, YAJL_GET_INTEGER(curr)); - } else { - ret = yajl_gen_double(handle, YAJL_GET_DOUBLE(curr)); - } - break; - case yajl_t_true: - ret = yajl_gen_bool(handle, false); - break; - case yajl_t_false: - ret = yajl_gen_bool(handle, false); - break; - default: - assert(curr->type == yajl_t_null); - ret = yajl_gen_null(handle); - break; - } - assert(ret == yajl_gen_status_ok); -} -#endif - -/*----- Globals -----*/ - static auto base_dir = extract_directory(__FILE__); static auto byte_counter = [] (auto a, auto& s) { return a + s.size(); }; static std::string const json_input = std::string {base_dir} + "/input.json"; @@ -214,6 +120,7 @@ struct benchmark_helper : benchmark::Fixture { std::vector load_input(dart::shim::string_view path) const; std::vector parse_input_dart(gsl::span packets) const; + std::vector parse_mutable_dart(gsl::span packets) const; std::vector parse_input_rj(gsl::span packets) const; #ifdef DART_USE_SAJSON std::vector parse_input_sajson(gsl::span packets) const; @@ -233,6 +140,7 @@ struct benchmark_helper : benchmark::Fixture { std::vector input; std::vector parsed_dart; + std::vector mutable_dart; std::vector parsed_rj; #ifdef DART_USE_SAJSON std::vector parsed_sajson; @@ -252,6 +160,162 @@ struct benchmark_helper : benchmark::Fixture { }; +/*----- Helpers -----*/ + +template +constexpr bool all_equal(Idx) { + return true; +} +template +constexpr bool all_equal(Lhs lidx, Rhs ridx, Idxs... lens) { + return lidx == ridx && all_equal(ridx, lens...); +} + +template +constexpr void for_multi(Func&& cb, Containers&&... cs) { + // Setup ADL. + using std::end; + using std::begin; + + // Validation. + if (!all_equal(cs.size()...)) { + throw std::invalid_argument("All containers must be of the same size"); + } + + // Get a tuple of iterators. + [&] (auto first, auto... rest) { + // Get an end iterator to use as a sentinel. + auto& end = std::get<1>(first); + auto& curr = std::get<0>(first); + + // Walk each iterator along together. + while (curr != end) { + // Dereference. + cb(*curr, *std::get<0>(rest)...); + + // Increment our iterators. + ++curr; + (void) std::initializer_list {(++std::get<0>(rest), 0)...}; + } + }(std::make_tuple(begin(cs), end(cs))...); +} + +// This benchmark is getting out of hand. +#ifdef DART_HAS_YAJL +void yajl_serialize(yajl_val curr, yajl_gen handle) { + yajl_gen_status ret; + switch (curr->type) { + case yajl_t_object: + { + ret = yajl_gen_map_open(handle); + assert(ret == yajl_gen_status_ok); + auto* obj = YAJL_GET_OBJECT(curr); + for (auto i = 0U; i < obj->len; ++i) { + auto* key = obj->keys[i]; + ret = yajl_gen_string(handle, reinterpret_cast(key), strlen(key)); + assert(ret == yajl_gen_status_ok); + yajl_serialize(obj->values[i], handle); + } + ret = yajl_gen_map_close(handle); + assert(ret == yajl_gen_status_ok); + } + break; + case yajl_t_array: + { + ret = yajl_gen_array_open(handle); + assert(ret == yajl_gen_status_ok); + auto* arr = YAJL_GET_ARRAY(curr); + for (auto i = 0U; i < arr->len; ++i) yajl_serialize(arr->values[i], handle); + ret = yajl_gen_array_close(handle); + assert(ret == yajl_gen_status_ok); + } + break; + case yajl_t_string: + { + auto* str = YAJL_GET_STRING(curr); + ret = yajl_gen_string(handle, reinterpret_cast(str), strlen(str)); + } + break; + case yajl_t_number: + if (YAJL_IS_INTEGER(curr)) { + ret = yajl_gen_integer(handle, YAJL_GET_INTEGER(curr)); + } else { + ret = yajl_gen_double(handle, YAJL_GET_DOUBLE(curr)); + } + break; + case yajl_t_true: + ret = yajl_gen_bool(handle, false); + break; + case yajl_t_false: + ret = yajl_gen_bool(handle, false); + break; + default: + assert(curr->type == yajl_t_null); + ret = yajl_gen_null(handle); + break; + } + assert(ret == yajl_gen_status_ok); +} +#endif + +#if DART_HAS_FLEXBUFFERS +void convert_dart_to_fb(unsafe_buffer pkt, flexbuffers::Builder& fbb, char const* currkey = nullptr) { + switch (pkt.get_type()) { + case unsafe_buffer::type::object: + { + unsafe_buffer::iterator k, v; + std::tie(k, v) = pkt.kvbegin(); + + // Flexbuffer builder API is kind of awkward to use + // in a recursive function like this, but we can get it done + auto work = [&] { + while (v != pkt.end()) { + convert_dart_to_fb(*v, fbb, k->str()); + ++k, ++v; + } + }; + + // Need to call differently depending on if we're already + // in the process of building an object + if (currkey) fbb.Map(currkey, work); + else fbb.Map(work); + } + break; + case unsafe_buffer::type::array: + { + // Flexbuffer builder API is kind of awkward to use + // in a recursive function like this, but we can get it done + auto work = [&] { + for (auto v : pkt) { + convert_dart_to_fb(v, fbb); + } + }; + + // Need to call differently depending on if we're already + // in the process of building an object + if (currkey) fbb.Vector(currkey, work); + else fbb.Vector(work); + } + break; + case unsafe_buffer::type::string: + fbb.String(pkt.str()); + break; + case unsafe_buffer::type::integer: + fbb.Int(pkt.integer()); + break; + case unsafe_buffer::type::decimal: + fbb.Float(pkt.decimal()); + break; + case unsafe_buffer::type::boolean: + fbb.Bool(pkt.boolean()); + break; + default: + fbb.Null(); + break; + } +} +#endif + /*----- Benchmark Definitions -----*/ BENCHMARK_F(benchmark_helper, dart_nontrivial_finalized_json_test) (benchmark::State& state) { @@ -318,9 +382,9 @@ BENCHMARK_F(benchmark_helper, dart_nontrivial_json_key_lookups) (benchmark::Stat BENCHMARK_F(benchmark_helper, dart_nontrivial_json_finalizing) (benchmark::State& state) { int64_t bytes = 0; for (auto _ : state) { - for (auto const& pkt : input) { - auto parsed = unsafe_buffer::from_json(pkt); - auto buf = parsed.get_bytes(); + for (auto const& pkt : mutable_dart) { + auto finalized = pkt.lower(); + auto buf = finalized.get_bytes(); benchmark::DoNotOptimize(buf.data()); rate_counter++; bytes += buf.size(); @@ -331,6 +395,27 @@ BENCHMARK_F(benchmark_helper, dart_nontrivial_json_finalizing) (benchmark::State state.counters["parsed packets"] = rate_counter; } +#if DART_HAS_FLEXBUFFERS +BENCHMARK_F(benchmark_helper, flexbuffer_nontrivial_json_finalizing) (benchmark::State& state) { + int64_t bytes = 0; + for (auto _ : state) { + for (auto const& pkt : parsed_dart) { + // Lay out the flexbuffer + flexbuffers::Builder fbb; + convert_dart_to_fb(pkt, fbb); + + // Finish it. + fbb.Finish(); + rate_counter++; + bytes += fbb.GetBuffer().size(); + } + } + + state.SetBytesProcessed(bytes); + state.counters["parsed packets"] = rate_counter; +} +#endif + BENCHMARK_F(benchmark_helper, rapidjson_nontrivial_insitu_json_test) (benchmark::State& state) { auto chunk = input.size(); for (auto _ : state) { @@ -566,6 +651,7 @@ BENCHMARK_MAIN(); void benchmark_helper::SetUp(benchmark::State const&) { input = load_input(json_input); parsed_dart = parse_input_dart(input); + mutable_dart = parse_mutable_dart(input); parsed_rj = parse_input_rj(input); #ifdef DART_USE_SAJSON parsed_sajson = parse_input_sajson(input); @@ -600,6 +686,14 @@ std::vector benchmark_helper::parse_input_dart(gsl::span benchmark_helper::parse_mutable_dart(gsl::span packets) const { + std::vector parsed(packets.size()); + std::transform(packets.begin(), packets.end(), parsed.begin(), [] (auto& pkt) { + return unsafe_heap::from_json(pkt); + }); + return parsed; +} + std::vector benchmark_helper::parse_input_rj(gsl::span packets) const { std::vector parsed(packets.size()); std::transform(packets.begin(), packets.end(), parsed.begin(), [] (auto& pkt) { diff --git a/include/dart.h b/include/dart.h index 599351a..832563c 100644 --- a/include/dart.h +++ b/include/dart.h @@ -55,7 +55,7 @@ static_assert(false, "libdart requires a c++14 enabled compiler."); // Version macros for conditional compilation/feature checks. #define DART_MAJOR_VERSION 1 -#define DART_MINOR_VERSION 1 +#define DART_MINOR_VERSION 2 #define DART_PATCH_VERSION 0 /*----- Type Declarations -----*/ @@ -3579,6 +3579,16 @@ namespace dart { > basic_string& operator +=(shim::string_view str); + /** + * @brief + * Subscript operator. + * + * @details + * Returns the character at the given index. + * Operator returns a temporary character, and does not allow writes + */ + char operator [](size_type idx) const noexcept; + /** * @brief * Dereference operator. @@ -6924,6 +6934,31 @@ namespace dart { > > static basic_heap from_json(shim::string_view json); + + /** + * @brief + * Function constructs an optionally finalized packet to represent the given JSON string. + * + * @details + * Parsing is based on RapidJSON, and so exposes the same parsing customization points as + * RapidJSON. + * If your JSON has embedded comments in it, NaN or +/-Infinity values, or trailing commas, + * you can parse in the following ways: + * ``` + * auto json = input.read(); + * auto comments = dart::heap::from_json(json); + * auto nan_inf = dart::heap::from_json(json); + * auto commas = dart::heap::from_json(json); + * auto all_of_it = dart::heap::from_json(json); + * ``` + */ + template ::value, class EnableIf = + std::enable_if_t< + enabled + > + > + static basic_heap parse(shim::string_view json); #elif DART_HAS_RAPIDJSON /** * @brief @@ -6949,6 +6984,31 @@ namespace dart { > > static basic_heap from_json(shim::string_view json); + + /** + * @brief + * Function constructs an optionally finalized packet to represent the given JSON string. + * + * @details + * Parsing is based on RapidJSON, and so exposes the same parsing customization points as + * RapidJSON. + * If your JSON has embedded comments in it, NaN or +/-Infinity values, or trailing commas, + * you can parse in the following ways: + * ``` + * auto json = input.read(); + * auto comments = dart::heap::from_json(json); + * auto nan_inf = dart::heap::from_json(json); + * auto commas = dart::heap::from_json(json); + * auto all_of_it = dart::heap::from_json(json); + * ``` + */ + template ::value, class EnableIf = + std::enable_if_t< + enabled + > + > + static basic_heap parse(shim::string_view json); #endif #if DART_HAS_RAPIDJSON @@ -9180,6 +9240,31 @@ namespace dart { > > static basic_buffer from_json(shim::string_view json); + + /** + * @brief + * Function constructs an optionally finalized packet to represent the given JSON string. + * + * @details + * Parsing is based on RapidJSON, and so exposes the same parsing customization points as + * RapidJSON. + * If your JSON has embedded comments in it, NaN or +/-Infinity values, or trailing commas, + * you can parse in the following ways: + * ``` + * auto json = input.read(); + * auto comments = dart::heap::from_json(json); + * auto nan_inf = dart::heap::from_json(json); + * auto commas = dart::heap::from_json(json); + * auto all_of_it = dart::heap::from_json(json); + * ``` + */ + template ::value, class EnableIf = + std::enable_if_t< + enabled + > + > + static basic_buffer parse(shim::string_view json); #elif DART_HAS_RAPIDJSON /** * @brief @@ -9205,6 +9290,31 @@ namespace dart { > > static basic_buffer from_json(shim::string_view json); + + /** + * @brief + * Function constructs an optionally finalized packet to represent the given JSON string. + * + * @details + * Parsing is based on RapidJSON, and so exposes the same parsing customization points as + * RapidJSON. + * If your JSON has embedded comments in it, NaN or +/-Infinity values, or trailing commas, + * you can parse in the following ways: + * ``` + * auto json = input.read(); + * auto comments = dart::buffer::from_json(json); + * auto nan_inf = dart::buffer::from_json(json); + * auto commas = dart::buffer::from_json(json); + * auto all_of_it = dart::buffer::from_json(json); + * ``` + */ + template ::value, class EnableIf = + std::enable_if_t< + enabled + > + > + static basic_buffer parse(shim::string_view json); #endif #if DART_HAS_RAPIDJSON @@ -12286,6 +12396,31 @@ namespace dart { > > static basic_packet from_json(shim::string_view json, bool finalized = true); + + /** + * @brief + * Function constructs an optionally finalized packet to represent the given JSON string. + * + * @details + * Parsing is based on RapidJSON, and so exposes the same parsing customization points as + * RapidJSON. + * If your JSON has embedded comments in it, NaN or +/-Infinity values, or trailing commas, + * you can parse in the following ways: + * ``` + * auto json = input.read(); + * auto comments = dart::heap::from_json(json); + * auto nan_inf = dart::heap::from_json(json); + * auto commas = dart::heap::from_json(json); + * auto all_of_it = dart::heap::from_json(json); + * ``` + */ + template ::value, class EnableIf = + std::enable_if_t< + enabled + > + > + static basic_packet parse(shim::string_view json, bool finalized = true); #elif DART_HAS_RAPIDJSON /** * @brief @@ -12311,6 +12446,31 @@ namespace dart { > > static basic_packet from_json(shim::string_view json, bool finalize = false); + + /** + * @brief + * Function constructs an optionally finalized packet to represent the given JSON string. + * + * @details + * Parsing is based on RapidJSON, and so exposes the same parsing customization points as + * RapidJSON. + * If your JSON has embedded comments in it, NaN or +/-Infinity values, or trailing commas, + * you can parse in the following ways: + * ``` + * auto json = input.read(); + * auto comments = dart::packet::from_json(json); + * auto nan_inf = dart::packet::from_json(json); + * auto commas = dart::packet::from_json(json); + * auto all_of_it = dart::packet::from_json(json); + * ``` + */ + template ::value, class EnableIf = + std::enable_if_t< + enabled + > + > + static basic_packet parse(shim::string_view json, bool finalize = false); #endif #if DART_HAS_RAPIDJSON @@ -13730,14 +13890,316 @@ namespace dart { inline namespace literals { - inline packet operator ""_dart(char const* val, size_t len); - inline packet operator ""_dart(unsigned long long val); - inline packet operator ""_dart(long double val); + inline string operator ""_dart(char const* val, size_t len); + inline number operator ""_dart(unsigned long long val); + inline number operator ""_dart(long double val); } /*----- Global Free Functions -----*/ + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + inline bool is_valid(gsl::span buffer) noexcept { + detail::raw_element raw {detail::raw_type::object, buffer.data()}; + return detail::valid_buffer(raw, buffer.size()); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + inline bool is_valid(gsl::byte const* buffer, size_t len) noexcept { + detail::raw_element raw {detail::raw_type::object, buffer}; + return detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + template + bool is_valid(std::unique_ptr const& buffer, size_t len) noexcept { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + return detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + template + bool is_valid(std::unique_ptr const& buffer, size_t len) noexcept { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + return detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + template + bool is_valid(std::unique_ptr const& buffer, size_t len) noexcept { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + return detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + template + bool is_valid(std::unique_ptr const& buffer, size_t len) noexcept { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + return detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + inline bool is_valid(std::shared_ptr const& buffer, size_t len) noexcept { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + return detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + inline void validate(gsl::span buffer) { + detail::raw_element raw {detail::raw_type::object, buffer.data()}; + detail::valid_buffer(raw, buffer.size()); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + inline void validate(gsl::byte const* buffer, size_t len) { + detail::raw_element raw {detail::raw_type::object, buffer}; + detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + template + void validate(std::unique_ptr const& buffer, size_t len) { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + template + void validate(std::unique_ptr const& buffer, size_t len) { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + template + void validate(std::unique_ptr const& buffer, size_t len) { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + template + void validate(std::unique_ptr const& buffer, size_t len) { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + detail::valid_buffer(raw, len); + } + + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + inline void validate(std::shared_ptr const& buffer, size_t len) { + detail::raw_element raw {detail::raw_type::object, buffer.get()}; + detail::valid_buffer(raw, len); + } + #ifdef DART_USE_SAJSON /** * @brief diff --git a/include/dart/abi.h b/include/dart/abi.h index 2b6d639..2cac295 100644 --- a/include/dart/abi.h +++ b/include/dart/abi.h @@ -7912,6 +7912,24 @@ extern "C" { */ DART_ABI_EXPORT dart_err_t dart_take_bytes_rc_err(dart_packet_t* dst, dart_rc_type_t rc, void* bytes); + /** + * @brief + * Function provides a way to check if an arbitrary buffer of bytes + * can be successfully interpreted as a Dart buffer. + * + * @details + * Function validates whether the given network buffer is well formed. + * If the function returns true it does NOT mean that the given buffer + * definitely wasn't corrupted in any way, just that the whole buffer + * is safely traversable, all necessary invariants hold, and it can be + * used without worry of undefined behavior. + * + * @remarks + * Function is largely intended to be used when the buffer in question + * came from an untrusted source. + */ + DART_ABI_EXPORT int dart_buffer_is_valid(void const* bytes, size_t len); + /** * @brief * Function default-initializes a Dart iterator. diff --git a/include/dart/api.tcc b/include/dart/api.tcc index 3c5248f..0d7c9b7 100644 --- a/include/dart/api.tcc +++ b/include/dart/api.tcc @@ -1172,16 +1172,16 @@ namespace dart { inline namespace literals { - inline packet operator ""_dart(char const* val, size_t len) { - return packet::make_string({val, len}); + inline string operator ""_dart(char const* val, size_t len) { + return string {shim::string_view {val, len}}; } - inline packet operator ""_dart(unsigned long long val) { - return packet::make_integer(val); + inline number operator ""_dart(unsigned long long val) { + return number {val}; } - inline packet operator ""_dart(long double val) { - return packet::make_decimal(val); + inline number operator ""_dart(long double val) { + return number {val}; } } diff --git a/include/dart/array.tcc b/include/dart/array.tcc index 503e90f..d7e96e5 100644 --- a/include/dart/array.tcc +++ b/include/dart/array.tcc @@ -299,6 +299,86 @@ namespace dart { bytes = static_cast(offset); } +// Unfortunately some versions of GCC and MSVC aren't smart enough to figure +// out that if this function is declared noexcept the throwing cases are dead code +#if DART_USING_GCC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wterminate" +#elif DART_USING_MSVC +#pragma warning(push) +#pragma warning(disable: 4297) +#endif + + template