Skip to content

Commit

Permalink
Add pretty print for DOM
Browse files Browse the repository at this point in the history
Add pretty print for DOM with documentation.
Currently, the indentation is fixed to four spaces.

Close #1329.
  • Loading branch information
Cuda-Chen committed Jul 8, 2023
1 parent 92174c9 commit 441364b
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 63 deletions.
2 changes: 1 addition & 1 deletion doc/dom.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ Once you have an element, you can navigate it with idiomatic C++ iterators, oper
with the `size()` method.
* **Checking an Element Type:** You can check an element's type with `element.type()`. It
returns an `element_type` with values such as `simdjson::dom::element_type::ARRAY`, `simdjson::dom::element_type::OBJECT`, `simdjson::dom::element_type::INT64`, `simdjson::dom::element_type::UINT64`,`simdjson::dom::element_type::DOUBLE`, `simdjson::dom::element_type::STRING`, `simdjson::dom::element_type::BOOL` or, `simdjson::dom::element_type::NULL_VALUE`.
* **Output to streams and strings:** Given a document or an element (or node) out of a JSON document, you can output a minified string version using the C++ stream idiom (`out << element`). You can also request the construction of a minified string version (`simdjson::minify(element)`). Numbers are serialized as 64-bit floating-point numbers (`double`).
* **Output to streams and strings:** Given a document or an element (or node) out of a JSON document, you can output a minified string version using the C++ stream idiom (`out << element`). You can also request the construction of a minified string version (`simdjson::minify(element)`) or a prettified string version (`simdjson::prettify(element)`). Numbers are serialized as 64-bit floating-point numbers (`double`).
### Examples
Expand Down
100 changes: 83 additions & 17 deletions include/simdjson/dom/serialization-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,26 +100,31 @@ char *fast_itoa(char *output, uint64_t value) noexcept {
std::memcpy(output, write_pointer, len);
return output + len;
}


} // anonymous namespace
namespace internal {

/***
* Minifier/formatter code.
**/

simdjson_inline void mini_formatter::number(uint64_t x) {
template<class formatter>
simdjson_inline void base_formatter<formatter>::number(uint64_t x) {
char number_buffer[24];
char *newp = fast_itoa(number_buffer, x);
buffer.insert(buffer.end(), number_buffer, newp);
}

simdjson_inline void mini_formatter::number(int64_t x) {
template<class formatter>
simdjson_inline void base_formatter<formatter>::number(int64_t x) {
char number_buffer[24];
char *newp = fast_itoa(number_buffer, x);
buffer.insert(buffer.end(), number_buffer, newp);
}

simdjson_inline void mini_formatter::number(double x) {
template<class formatter>
simdjson_inline void base_formatter<formatter>::number(double x) {
char number_buffer[24];
// Currently, passing the nullptr to the second argument is
// safe because our implementation does not check the second
Expand All @@ -128,31 +133,51 @@ simdjson_inline void mini_formatter::number(double x) {
buffer.insert(buffer.end(), number_buffer, newp);
}

simdjson_inline void mini_formatter::start_array() { one_char('['); }
simdjson_inline void mini_formatter::end_array() { one_char(']'); }
simdjson_inline void mini_formatter::start_object() { one_char('{'); }
simdjson_inline void mini_formatter::end_object() { one_char('}'); }
simdjson_inline void mini_formatter::comma() { one_char(','); }
template<class formatter>
simdjson_inline void base_formatter<formatter>::start_array() { one_char('['); }


template<class formatter>
simdjson_inline void base_formatter<formatter>::end_array() { one_char(']'); }

template<class formatter>
simdjson_inline void base_formatter<formatter>::start_object() { one_char('{'); }

simdjson_inline void mini_formatter::true_atom() {
template<class formatter>
simdjson_inline void base_formatter<formatter>::end_object() { one_char('}'); }

template<class formatter>
simdjson_inline void base_formatter<formatter>::comma() { one_char(','); }

template<class formatter>
simdjson_inline void base_formatter<formatter>::true_atom() {
const char * s = "true";
buffer.insert(buffer.end(), s, s + 4);
}
simdjson_inline void mini_formatter::false_atom() {

template<class formatter>
simdjson_inline void base_formatter<formatter>::false_atom() {
const char * s = "false";
buffer.insert(buffer.end(), s, s + 5);
}
simdjson_inline void mini_formatter::null_atom() {

template<class formatter>
simdjson_inline void base_formatter<formatter>::null_atom() {
const char * s = "null";
buffer.insert(buffer.end(), s, s + 4);
}
simdjson_inline void mini_formatter::one_char(char c) { buffer.push_back(c); }
simdjson_inline void mini_formatter::key(std::string_view unescaped) {

template<class formatter>
simdjson_inline void base_formatter<formatter>::one_char(char c) { buffer.push_back(c); }

template<class formatter>
simdjson_inline void base_formatter<formatter>::key(std::string_view unescaped) {
string(unescaped);
one_char(':');
}
simdjson_inline void mini_formatter::string(std::string_view unescaped) {

template<class formatter>
simdjson_inline void base_formatter<formatter>::string(std::string_view unescaped) {
one_char('\"');
size_t i = 0;
// Fast path for the case where we have no control character, no ", and no backslash.
Expand Down Expand Up @@ -231,14 +256,44 @@ simdjson_inline void mini_formatter::string(std::string_view unescaped) {
one_char('\"');
}

inline void mini_formatter::clear() {

template<class formatter>
inline void base_formatter<formatter>::clear() {
buffer.clear();
}

simdjson_inline std::string_view mini_formatter::str() const {
template<class formatter>
simdjson_inline std::string_view base_formatter<formatter>::str() const {
return std::string_view(buffer.data(), buffer.size());
}

simdjson_inline void mini_formatter::print_newline() {
return;
}

simdjson_inline void mini_formatter::print_indents(size_t depth) {
(void)depth;
return;
}

simdjson_inline void mini_formatter::print_space() {
return;
}

simdjson_inline void pretty_formatter::print_newline() {
one_char('\n');
}

simdjson_inline void pretty_formatter::print_indents(size_t depth) {
if(this->indent_step <= 0)
return;
for(size_t i = 0; i < this->indent_step * depth; i++)
one_char(' ');
}

simdjson_inline void pretty_formatter::print_space() {
one_char(' ');
}

/***
* String building code.
Expand All @@ -258,11 +313,16 @@ inline void string_builder<serializer>::append(simdjson::dom::element value) {
// print commas after each value
if (after_value) {
format.comma();
format.print_newline();
}

format.print_indents(depth);

// If we are in an object, print the next key and :, and skip to the next
// value.
if (is_object[depth]) {
format.key(iter.get_string_view());
format.print_space();
iter.json_index++;
}
switch (iter.tape_ref_type()) {
Expand Down Expand Up @@ -291,6 +351,7 @@ inline void string_builder<serializer>::append(simdjson::dom::element value) {

is_object[depth] = false;
after_value = false;
format.print_newline();
continue;
}

Expand Down Expand Up @@ -318,6 +379,7 @@ inline void string_builder<serializer>::append(simdjson::dom::element value) {

is_object[depth] = true;
after_value = false;
format.print_newline();
continue;
}

Expand Down Expand Up @@ -362,17 +424,21 @@ inline void string_builder<serializer>::append(simdjson::dom::element value) {
// Handle multiple ends in a row
while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
iter.tape_ref_type() == tape_type::END_OBJECT)) {
format.print_newline();
depth--;
format.print_indents(depth);
if (iter.tape_ref_type() == tape_type::END_ARRAY) {
format.end_array();
} else {
format.end_object();
}
depth--;
iter.json_index++;
}

// Stop when we're at depth 0
} while (depth != 0);

format.print_newline();
}

template <class serializer>
Expand Down
155 changes: 110 additions & 45 deletions include/simdjson/dom/serialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,50 +19,9 @@ namespace simdjson {
*/
namespace internal {

class mini_formatter;

/**
* @private The string_builder template allows us to construct
* a string from a document element. It is parametrized
* by a "formatter" which handles the details. Thus
* the string_builder template could support both minification
* and prettification, and various other tradeoffs.
*/
template <class formatter = mini_formatter>
class string_builder {
template<class formatter>
class base_formatter {
public:
/** Construct an initially empty builder, would print the empty string **/
string_builder() = default;
/** Append an element to the builder (to be printed) **/
inline void append(simdjson::dom::element value);
/** Append an array to the builder (to be printed) **/
inline void append(simdjson::dom::array value);
/** Append an object to the builder (to be printed) **/
inline void append(simdjson::dom::object value);
/** Reset the builder (so that it would print the empty string) **/
simdjson_inline void clear();
/**
* Get access to the string. The string_view is owned by the builder
* and it is invalid to use it after the string_builder has been
* destroyed.
* However you can make a copy of the string_view on memory that you
* own.
*/
simdjson_inline std::string_view str() const;
/** Append a key_value_pair to the builder (to be printed) **/
simdjson_inline void append(simdjson::dom::key_value_pair value);
private:
formatter format{};
};

/**
* @private This is the class that we expect to use with the string_builder
* template. It tries to produce a compact version of the JSON element
* as quickly as possible.
*/
class mini_formatter {
public:
mini_formatter() = default;
/** Add a comma **/
simdjson_inline void comma();
/** Start an array, prints [ **/
Expand Down Expand Up @@ -97,14 +56,88 @@ class mini_formatter {
**/
simdjson_inline std::string_view str() const;

private:
// implementation details (subject to change)
/** Prints one character **/
simdjson_inline void one_char(char c);

simdjson_inline void call_print_newline() {
this->print_newline();
}

simdjson_inline void call_print_indents(size_t depth) {
this->print_indents(depth);
}

simdjson_inline void call_print_space() {
this->print_space();
}

protected:
// implementation details (subject to change)
/** Backing buffer **/
std::vector<char> buffer{}; // not ideal!
};


/**
* @private This is the class that we expect to use with the string_builder
* template. It tries to produce a compact version of the JSON element
* as quickly as possible.
*/
class mini_formatter : public base_formatter<mini_formatter> {
public:
simdjson_inline void print_newline();

simdjson_inline void print_indents(size_t depth);

simdjson_inline void print_space();
};

class pretty_formatter : public base_formatter<pretty_formatter> {
public:
simdjson_inline void print_newline();

simdjson_inline void print_indents(size_t depth);

simdjson_inline void print_space();

protected:
int indent_step = 4;
};

/**
* @private The string_builder template allows us to construct
* a string from a document element. It is parametrized
* by a "formatter" which handles the details. Thus
* the string_builder template could support both minification
* and prettification, and various other tradeoffs.
*/
template <class formatter = mini_formatter>
class string_builder {
public:
/** Construct an initially empty builder, would print the empty string **/
string_builder() = default;
/** Append an element to the builder (to be printed) **/
inline void append(simdjson::dom::element value);
/** Append an array to the builder (to be printed) **/
inline void append(simdjson::dom::array value);
/** Append an object to the builder (to be printed) **/
inline void append(simdjson::dom::object value);
/** Reset the builder (so that it would print the empty string) **/
simdjson_inline void clear();
/**
* Get access to the string. The string_view is owned by the builder
* and it is invalid to use it after the string_builder has been
* destroyed.
* However you can make a copy of the string_view on memory that you
* own.
*/
simdjson_inline std::string_view str() const;
/** Append a key_value_pair to the builder (to be printed) **/
simdjson_inline void append(simdjson::dom::key_value_pair value);
private:
formatter format{};
};

} // internal

namespace dom {
Expand Down Expand Up @@ -212,6 +245,38 @@ std::string minify(simdjson_result<T> x) {
}
#endif

/**
* Prettifies a JSON element or document, printing the valid JSON with indentation.
*
* dom::parser parser;
* element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded);
*
* // Prints:
* // {
* // [
* // 1,
* // 2,
* // 3
* // ]
* // }
* cout << prettify(doc) << endl;
*
*/
template <class T>
std::string prettify(T x) {
simdjson::internal::string_builder<simdjson::internal::pretty_formatter> sb;
sb.append(x);
std::string_view answer = sb.str();
return std::string(answer.data(), answer.size());
}

#if SIMDJSON_EXCEPTIONS
template <class T>
std::string prettify(simdjson_result<T> x) {
if (x.error()) { throw simdjson_error(x.error()); }
return to_string(x.value());
}
#endif

} // namespace simdjson

Expand Down

0 comments on commit 441364b

Please sign in to comment.