diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index dac15376b..d0f05e42a 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -46,7 +46,8 @@ namespace rapidjson { enum ParseFlag { kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer. kParseInsituFlag = 1, //!< In-situ(destructive) parsing. - kParseValidateEncodingFlag = 2 //!< Validate encoding of JSON strings. + kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. + kParseNonRecursiveFlag = 4 //!< Non-recursive(constant complexity in terms of function call stack size) parsing. }; //! Error code of parsing. @@ -69,7 +70,7 @@ enum ParseErrorCode { kParseErrorStringUnicodeSurrogateInvalid, //!< The surrogate pair in string is invalid. kParseErrorStringEscapeInvalid, //!< Invalid escape character in string. kParseErrorStringMissQuotationMark, //!< Missing a closing quotation mark in string. - kParseErrorStringInvalidEncoding, //!< Invalid encoidng in string. + kParseErrorStringInvalidEncoding, //!< Invalid encoding in string. kParseErrorNumberTooBig, //!< Number too big to be stored in double. kParseErrorNumberMissFraction, //!< Miss fraction part in number. @@ -134,7 +135,7 @@ namespace internal { template::copyOptimization> class StreamLocalCopy; -//! Do copy optimziation. +//! Do copy optimization. template class StreamLocalCopy { public: @@ -297,6 +298,9 @@ class GenericReader { parseErrorCode_ = kParseErrorNone; errorOffset_ = 0; + if (parseFlags & kParseNonRecursiveFlag) + return NonRecursiveParse(is, handler); + SkipWhitespace(is); if (is.Peek() == '\0') @@ -748,6 +752,220 @@ class GenericReader { } } + // Non-recursive parsing + enum NonRecursiveParsingState { + NonRecursiveParsingStartState, + NonRecursiveParsingFinishState, + NonRecursiveParsingErrorState, + // Object states + NonRecursiveParsingObjectInitialState, + NonRecursiveParsingObjectContentState, + // Array states + NonRecursiveParsingArrayInitialState, + NonRecursiveParsingArrayContentState + }; + + template + NonRecursiveParsingState TransitToCompoundValueTypeState(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + // For compound value type(object and array), we should push the current state and start a new stack frame for this type. + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (is.Take()) { + case '{': + handler.StartObject(); + r = NonRecursiveParsingObjectInitialState; + // Push current state. + *stack_.template Push(1) = state; + // Initialize and push member count. + *stack_.template Push(1) = 0; + break; + case '[': + handler.StartArray(); + r = NonRecursiveParsingArrayInitialState; + // Push current state. + *stack_.template Push(1) = state; + // Initialize and push element count. + *stack_.template Push(1) = 0; + break; + } + return r; + } + + // Inner transition of object or array states(ObjectInitial->ObjectContent, ArrayInitial->ArrayContent). + template + NonRecursiveParsingState TransitByValue(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT( + state == NonRecursiveParsingObjectInitialState || + state == NonRecursiveParsingArrayInitialState || + state == NonRecursiveParsingObjectContentState || + state == NonRecursiveParsingArrayContentState); + + NonRecursiveParsingState t; + if (state == NonRecursiveParsingObjectInitialState) + t = NonRecursiveParsingObjectContentState; + else if (state == NonRecursiveParsingArrayInitialState) + t = NonRecursiveParsingArrayContentState; + else + t = state; + + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (is.Peek()) { + // For plain value state is not changed. + case 'n': ParseNull (is, handler); r = t; break; + case 't': ParseTrue (is, handler); r = t; break; + case 'f': ParseFalse (is, handler); r = t; break; + case '"': ParseString(is, handler); r = t; break; + // Transit when value is object or array. + case '{': + case '[': + r = TransitToCompoundValueTypeState(state, is, handler); break; + default: ParseNumber(is, handler); r = t; break; + } + + if (HasParseError()) + r = NonRecursiveParsingErrorState; + + return r; + } + + // Transit from object related states(ObjectInitial, ObjectContent). + template + NonRecursiveParsingState TransitFromObjectStates(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (is.Peek()) { + case '}': { + is.Take(); + // Get member count(include an extra one for non-empty object). + int memberCount = *stack_.template Pop(1); + if (state == NonRecursiveParsingObjectContentState) + ++memberCount; + // Restore the parent stack frame. + r = *stack_.template Pop(1); + // Transit to ContentState since a member/an element was just parsed. + if (r == NonRecursiveParsingArrayInitialState) + r = NonRecursiveParsingArrayContentState; + else if (r == NonRecursiveParsingObjectInitialState) + r = NonRecursiveParsingObjectContentState; + // If we return to the topmost frame mark it finished. + if (r == NonRecursiveParsingStartState) + r = NonRecursiveParsingFinishState; + handler.EndObject(memberCount); + break; + } + case ',': + is.Take(); + r = NonRecursiveParsingObjectContentState; + // Update member count. + *stack_.template Top() = *stack_.template Top() + 1; + break; + case '"': + // Should be a key-value pair. + ParseString(is, handler); + if (HasParseError()) { + r = NonRecursiveParsingErrorState; + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); + break; + } + + SkipWhitespace(is); + + if (is.Take() != ':') { + r = NonRecursiveParsingErrorState; + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissColon, is.Tell()); + break; + } + + SkipWhitespace(is); + + r = TransitByValue(state, is, handler); + + break; + default: + r = NonRecursiveParsingErrorState; + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); + break; + } + + return r; + } + + // Transit from array related states(ArrayInitial, ArrayContent). + template + NonRecursiveParsingState TransitFromArrayStates(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (is.Peek()) { + case ']': { + is.Take(); + // Get element count(include an extra one for non-empty array). + int elementCount = *stack_.template Pop(1); + if (state == NonRecursiveParsingArrayContentState) + ++elementCount; + // Restore the parent stack frame. + r = *stack_.template Pop(1); + // Transit to ContentState since a member/an element was just parsed. + if (r == NonRecursiveParsingArrayInitialState) + r = NonRecursiveParsingArrayContentState; + else if (r == NonRecursiveParsingObjectInitialState) + r = NonRecursiveParsingObjectContentState; + // If we return to the topmost frame mark it finished. + if (r == NonRecursiveParsingStartState) + r = NonRecursiveParsingFinishState; + handler.EndArray(elementCount); + break; + } + case ',': + is.Take(); + r = NonRecursiveParsingArrayContentState; + // Update element count. + *stack_.template Top() = *stack_.template Top() + 1; + break; + default: + // Should be a single value. + r = TransitByValue(state, is, handler); + break; + } + + return r; + } + + template + NonRecursiveParsingState Transit(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (state) { + case NonRecursiveParsingStartState: + r = TransitToCompoundValueTypeState(state, is, handler); + break; + case NonRecursiveParsingObjectInitialState: + case NonRecursiveParsingObjectContentState: + r = TransitFromObjectStates(state, is, handler); + break; + case NonRecursiveParsingArrayInitialState: + case NonRecursiveParsingArrayContentState: + r = TransitFromArrayStates(state, is, handler); + break; + } + + return r; + } + + template + bool NonRecursiveParse(InputStream& is, Handler& handler) { + NonRecursiveParsingState state = NonRecursiveParsingStartState; + + SkipWhitespace(is); + while (is.Peek() != '\0' && state != NonRecursiveParsingErrorState) { + state = Transit(state, is, handler); + SkipWhitespace(is); + } + + stack_.Clear(); + return state == NonRecursiveParsingFinishState && !HasParseError(); + } + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. ParseErrorCode parseErrorCode_; diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 62a0b42c4..028f93459 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -650,7 +650,7 @@ struct StreamTraits > { enum { copyOptimization = 1 }; }; -} // namespace rapdijson +} // namespace rapidjson #endif TEST(Reader, CustomStringStream) { @@ -706,6 +706,200 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { EXPECT_FALSE(reader.HasParseError()); } +TEST(Reader, NonRecursiveParsing) { + StringStream json("[1,true,false,null,\"string\",{\"array\":[1]}]"); + Reader reader; + BaseReaderHandler<> handler; + + Reader::NonRecursiveParsingState r; + + // [ + r = reader.Transit( + Reader::NonRecursiveParsingStartState, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayInitialState, r); + + // 1 + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(1, *reader.stack_.template Top()); // element count + + // true + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(1, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(2, *reader.stack_.template Top()); // element count + + // false + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(2, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(3, *reader.stack_.template Top()); // element count + + // null + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(3, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(4, *reader.stack_.template Top()); // element count + + // "string" + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(4, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(5, *reader.stack_.template Top()); // element count + + // { + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingObjectInitialState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // member count + + // "array":[ + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayInitialState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // element count + + // 1 + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // element count + + // ] + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingObjectContentState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // member count + + // } + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(5, *reader.stack_.template Top()); // element count + + // ] + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingFinishState, r); +} + +struct CountHandler : BaseReaderHandler<> { + void EndObject(SizeType memberCount) { + MemberCount = memberCount; + } + + void EndArray(SizeType elementCount) { + ElementCount = elementCount; + } + + SizeType MemberCount; + SizeType ElementCount; +}; + +TEST(Reader, NonRecursiveParsing_MemberCounting) { + StringStream json("{\"array\": []}"); + Reader reader; + CountHandler handler; + + reader.NonRecursiveParse(json, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(1, handler.MemberCount); +} + +TEST(Reader, NonRecursiveParsing_ElementCounting) { + StringStream json("[{}]"); + Reader reader; + CountHandler handler; + + reader.NonRecursiveParse(json, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(1, handler.ElementCount); +} + #ifdef __GNUC__ #pragma GCC diagnostic pop #endif