diff --git a/deps/simdjson/simdjson.cpp b/deps/simdjson/simdjson.cpp index 7a8d24ab0d09e4..c0fdf3afad5c5b 100644 --- a/deps/simdjson/simdjson.cpp +++ b/deps/simdjson/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-05-07 18:04:59 -0400. Do not edit! */ +/* auto-generated on 2024-05-30 10:52:38 -0400. Do not edit! */ /* including simdjson.cpp: */ /* begin file simdjson.cpp */ #define SIMDJSON_SRC_SIMDJSON_CPP @@ -2359,7 +2359,7 @@ enum error_code { INDEX_OUT_OF_BOUNDS, ///< JSON array index too large NO_SUCH_FIELD, ///< JSON field not found in object IO_ERROR, ///< Error reading a file - INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_JSON_POINTER, ///< Invalid JSON pointer syntax INVALID_URI_FRAGMENT, ///< Invalid URI fragment UNEXPECTED_ERROR, ///< indicative of a bug in simdjson PARSER_IN_USE, ///< parser is already in use. @@ -6896,6 +6896,7 @@ static inline uint32_t detect_supported_architectures() { /* end file internal/isadetection.h */ #include +#include namespace simdjson { diff --git a/deps/simdjson/simdjson.h b/deps/simdjson/simdjson.h index 985ffe1144e57f..56f08de43d8fe4 100644 --- a/deps/simdjson/simdjson.h +++ b/deps/simdjson/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-05-07 18:04:59 -0400. Do not edit! */ +/* auto-generated on 2024-05-30 10:52:38 -0400. Do not edit! */ /* including simdjson.h: */ /* begin file simdjson.h */ #ifndef SIMDJSON_H @@ -2346,7 +2346,7 @@ namespace std { #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION "3.9.2" +#define SIMDJSON_VERSION "3.9.3" namespace simdjson { enum { @@ -2361,7 +2361,7 @@ enum { /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 2 + SIMDJSON_VERSION_REVISION = 3 }; } // namespace simdjson @@ -2422,7 +2422,7 @@ enum error_code { INDEX_OUT_OF_BOUNDS, ///< JSON array index too large NO_SUCH_FIELD, ///< JSON field not found in object IO_ERROR, ///< Error reading a file - INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_JSON_POINTER, ///< Invalid JSON pointer syntax INVALID_URI_FRAGMENT, ///< Invalid URI fragment UNEXPECTED_ERROR, ///< indicative of a bug in simdjson PARSER_IN_USE, ///< parser is already in use. @@ -6279,15 +6279,15 @@ class base_formatter { simdjson_inline void one_char(char c); simdjson_inline void call_print_newline() { - this->print_newline(); + static_cast(this)->print_newline(); } simdjson_inline void call_print_indents(size_t depth) { - this->print_indents(depth); + static_cast(this)->print_indents(depth); } simdjson_inline void call_print_space() { - this->print_space(); + static_cast(this)->print_space(); } protected: @@ -7440,6 +7440,23 @@ inline simdjson_result element::operator[](const char *key) const noexc return at_key(key); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { @@ -7448,7 +7465,10 @@ inline simdjson_result element::at_pointer(std::string_view json_pointe case internal::tape_type::START_ARRAY: return array(tape).at_pointer(json_pointer); default: { - if(!json_pointer.empty()) { // a non-empty string is invalid on an atom + if (!json_pointer.empty()) { // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } // an empty string means that we return the current node @@ -8017,10 +8037,10 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc } else { size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; size_t svlen = next_doc_index - current_index(); - if(svlen > 1) { + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } @@ -33195,6 +33215,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -33710,6 +33733,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -37732,10 +37758,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; // normally the length would be next_index - current_index() - 1, except for the last document size_t svlen = next_index - current_index(); - if(svlen > 1) { + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } cur_struct_index++; @@ -37991,6 +38018,23 @@ simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parse #endif } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -39181,6 +39225,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -40083,6 +40148,26 @@ simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); @@ -40093,6 +40178,10 @@ simdjson_inline simdjson_result value::at_pointer(std::string_view json_p case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } } @@ -43532,6 +43621,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -44047,6 +44139,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -48069,10 +48164,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; // normally the length would be next_index - current_index() - 1, except for the last document size_t svlen = next_index - current_index(); - if(svlen > 1) { + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } cur_struct_index++; @@ -48328,6 +48424,23 @@ simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parse #endif } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -49518,6 +49631,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -50420,6 +50554,26 @@ simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); @@ -50430,6 +50584,10 @@ simdjson_inline simdjson_result value::at_pointer(std::string_view json_p case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } } @@ -54361,6 +54519,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -54876,6 +55037,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -58898,10 +59062,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; // normally the length would be next_index - current_index() - 1, except for the last document size_t svlen = next_index - current_index(); - if(svlen > 1) { + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } cur_struct_index++; @@ -59157,6 +59322,23 @@ simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parse #endif } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -60347,6 +60529,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -61249,6 +61452,26 @@ simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); @@ -61259,6 +61482,10 @@ simdjson_inline simdjson_result value::at_pointer(std::string_view json_p case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } } @@ -65189,6 +65416,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -65704,6 +65934,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -69726,10 +69959,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; // normally the length would be next_index - current_index() - 1, except for the last document size_t svlen = next_index - current_index(); - if(svlen > 1) { + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } cur_struct_index++; @@ -69985,6 +70219,23 @@ simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parse #endif } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -71175,6 +71426,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -72077,6 +72349,26 @@ simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); @@ -72087,6 +72379,10 @@ simdjson_inline simdjson_result value::at_pointer(std::string_view json_p case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } } @@ -76132,6 +76428,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -76647,6 +76946,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -80669,10 +80971,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; // normally the length would be next_index - current_index() - 1, except for the last document size_t svlen = next_index - current_index(); - if(svlen > 1) { + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } cur_struct_index++; @@ -80928,6 +81231,23 @@ simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parse #endif } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -82118,6 +82438,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -83020,6 +83361,26 @@ simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); @@ -83030,6 +83391,10 @@ simdjson_inline simdjson_result value::at_pointer(std::string_view json_p case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } } @@ -87398,6 +87763,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -87913,6 +88281,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -91935,10 +92306,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; // normally the length would be next_index - current_index() - 1, except for the last document size_t svlen = next_index - current_index(); - if(svlen > 1) { + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } cur_struct_index++; @@ -92194,6 +92566,23 @@ simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parse #endif } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -93384,6 +93773,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -94286,6 +94696,26 @@ simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); @@ -94296,6 +94726,10 @@ simdjson_inline simdjson_result value::at_pointer(std::string_view json_p case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } } @@ -98135,6 +98569,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -98650,6 +99087,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -102672,10 +103112,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; // normally the length would be next_index - current_index() - 1, except for the last document size_t svlen = next_index - current_index(); - if(svlen > 1) { + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } cur_struct_index++; @@ -102931,6 +103372,23 @@ simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parse #endif } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -104121,6 +104579,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -105023,6 +105502,26 @@ simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); @@ -105033,6 +105532,10 @@ simdjson_inline simdjson_result value::at_pointer(std::string_view json_p case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } } @@ -108885,6 +109388,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -109400,6 +109906,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -113422,10 +113931,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; // normally the length would be next_index - current_index() - 1, except for the last document size_t svlen = next_index - current_index(); - if(svlen > 1) { + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { svlen--; } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), svlen); + return std::string_view(start, svlen); } } cur_struct_index++; @@ -113681,6 +114191,23 @@ simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parse #endif } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -114871,6 +115398,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -115773,6 +116321,26 @@ simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); @@ -115783,6 +116351,10 @@ simdjson_inline simdjson_result value::at_pointer(std::string_view json_p case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } }