Skip to content

Commit

Permalink
Add pretty print for DOM
Browse files Browse the repository at this point in the history
Add pretty print for DOM with documentation.
Current, the indentation is fixed to four spaces.
  • Loading branch information
Cuda-Chen committed Jul 3, 2023
1 parent 92174c9 commit 9039010
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 4 deletions.
2 changes: 1 addition & 1 deletion doc/dom.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ Once you have an element, you can navigate it with idiomatic C++ iterators, oper
with the `size()` method.
* **Checking an Element Type:** You can check an element's type with `element.type()`. It
returns an `element_type` with values such as `simdjson::dom::element_type::ARRAY`, `simdjson::dom::element_type::OBJECT`, `simdjson::dom::element_type::INT64`, `simdjson::dom::element_type::UINT64`,`simdjson::dom::element_type::DOUBLE`, `simdjson::dom::element_type::STRING`, `simdjson::dom::element_type::BOOL` or, `simdjson::dom::element_type::NULL_VALUE`.
* **Output to streams and strings:** Given a document or an element (or node) out of a JSON document, you can output a minified string version using the C++ stream idiom (`out << element`). You can also request the construction of a minified string version (`simdjson::minify(element)`). Numbers are serialized as 64-bit floating-point numbers (`double`).
* **Output to streams and strings:** Given a document or an element (or node) out of a JSON document, you can output a minified string version using the C++ stream idiom (`out << element`). You can also request the construction of a minified string version (`simdjson::minify(element)`) or a prettified string version (`simdjson::prettify(element)`). Numbers are serialized as 64-bit floating-point numbers (`double`).
### Examples
Expand Down
42 changes: 41 additions & 1 deletion include/simdjson/dom/serialization-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,35 @@ simdjson_inline std::string_view mini_formatter::str() const {
return std::string_view(buffer.data(), buffer.size());
}

simdjson_inline void mini_formatter::print_newline() {
return;
}

simdjson_inline void mini_formatter::print_indents(size_t depth) {
(void)depth;
return;
}

simdjson_inline void mini_formatter::print_space() {
return;
}

simdjson_inline void pretty_formatter::print_newline() {
one_char('\n');
}

simdjson_inline void pretty_formatter::print_indents(size_t depth) {
if(indent_step <= 0)
return;
/*std::string tmp(indent_step * depth, ' ');
pretty_formatter::string(tmp);*/
for(size_t i = 0; i < indent_step * depth; i++)
one_char(' ');
}

simdjson_inline void pretty_formatter::print_space() {
one_char(' ');
}

/***
* String building code.
Expand All @@ -258,11 +287,16 @@ inline void string_builder<serializer>::append(simdjson::dom::element value) {
// print commas after each value
if (after_value) {
format.comma();
format.print_newline();
}

format.print_indents(depth);

// If we are in an object, print the next key and :, and skip to the next
// value.
if (is_object[depth]) {
format.key(iter.get_string_view());
format.print_space();
iter.json_index++;
}
switch (iter.tape_ref_type()) {
Expand Down Expand Up @@ -291,6 +325,7 @@ inline void string_builder<serializer>::append(simdjson::dom::element value) {

is_object[depth] = false;
after_value = false;
format.print_newline();
continue;
}

Expand Down Expand Up @@ -318,6 +353,7 @@ inline void string_builder<serializer>::append(simdjson::dom::element value) {

is_object[depth] = true;
after_value = false;
format.print_newline();
continue;
}

Expand Down Expand Up @@ -362,17 +398,21 @@ inline void string_builder<serializer>::append(simdjson::dom::element value) {
// Handle multiple ends in a row
while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
iter.tape_ref_type() == tape_type::END_OBJECT)) {
format.print_newline();
depth--;
format.print_indents(depth);
if (iter.tape_ref_type() == tape_type::END_ARRAY) {
format.end_array();
} else {
format.end_object();
}
depth--;
iter.json_index++;
}

// Stop when we're at depth 0
} while (depth != 0);

format.print_newline();
}

template <class serializer>
Expand Down
64 changes: 62 additions & 2 deletions include/simdjson/dom/serialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace simdjson {
namespace internal {

class mini_formatter;
class pretty_formatter;

/**
* @private The string_builder template allows us to construct
Expand Down Expand Up @@ -63,6 +64,9 @@ class string_builder {
class mini_formatter {
public:
mini_formatter() = default;

virtual ~mini_formatter() = default;

/** Add a comma **/
simdjson_inline void comma();
/** Start an array, prints [ **/
Expand Down Expand Up @@ -97,14 +101,38 @@ class mini_formatter {
**/
simdjson_inline std::string_view str() const;

private:
// implementation details (subject to change)
simdjson_inline virtual void print_newline();

simdjson_inline virtual void print_indents(size_t depth);

simdjson_inline virtual void print_space();

/** Prints one character **/
simdjson_inline void one_char(char c);

private:
// implementation details (subject to change)
/** Backing buffer **/
std::vector<char> buffer{}; // not ideal!
};

class pretty_formatter : public mini_formatter {
public:
pretty_formatter() : mini_formatter() {}

//~pretty_formatter() {}

simdjson_inline void print_newline();

simdjson_inline void print_indents(size_t depth);

simdjson_inline void print_space();

private:
int indent_step = 4;
};


} // internal

namespace dom {
Expand Down Expand Up @@ -212,6 +240,38 @@ std::string minify(simdjson_result<T> x) {
}
#endif

/**
* Prettifies a JSON element or document, printing the valid JSON with indentation.
*
* dom::parser parser;
* element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded);
*
* // Prints:
* // {
* // [
* // 1,
* // 2,
* // 3
* // ]
* // }
* cout << prettify(doc) << endl;
*
*/
template <class T>
std::string prettify(T x) {
simdjson::internal::string_builder<simdjson::internal::pretty_formatter> sb;
sb.append(x);
std::string_view answer = sb.str();
return std::string(answer.data(), answer.size());
}

#if SIMDJSON_EXCEPTIONS
template <class T>
std::string prettify(simdjson_result<T> x) {
if (x.error()) { throw simdjson_error(x.error()); }
return to_string(x.value());
}
#endif

} // namespace simdjson

Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ link_libraries(simdjson)
add_cpp_test(unicode_tests LABELS dom acceptance per_implementation)
add_cpp_test(minify_tests LABELS other acceptance per_implementation)
add_cpp_test(padded_string_tests LABELS other acceptance )
add_cpp_test(prettify_tests LABELS other acceptance per_implementation)

if(MSVC AND BUILD_SHARED_LIBS)
# Copy the simdjson dll into the tests directory
Expand Down
63 changes: 63 additions & 0 deletions tests/prettify_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <cinttypes>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <set>
#include <sstream>
#include <string>
#include <unistd.h>
#include <utility>
#include <vector>

#include "cast_tester.h"
#include "simdjson.h"
#include "test_macros.h"

const char *test_files[] = {
TWITTER_JSON, TWITTER_TIMELINE_JSON, REPEAT_JSON, CANADA_JSON,
MESH_JSON, APACHE_JSON, GSOC_JSON};

/**
* The general idea of these tests if that if you take a JSON file,
* load it, then convert it into a string, then parse that, and
* convert it again into a second string, then the two strings should
* be identifical. If not, then something was lost or added in the
* process.
*/

bool load_prettify(const char *filename) {
std::cout << "Loading " << filename << std::endl;
simdjson::dom::parser parser;
simdjson::dom::element doc;
auto error = parser.load(filename).get(doc);
if (error) { std::cerr << error << std::endl; return false; }
auto serial1 = simdjson::prettify(doc);
error = parser.parse(serial1).get(doc);
if (error) { std::cerr << error << std::endl; return false; }
auto serial2 = simdjson::prettify(doc);
bool match = (serial1 == serial2);
if (match) {
std::cout << "Parsing prettify and calling prettify again results in the same "
"content."
<< std::endl;
} else {
std::cout << "The content differs!" << std::endl;
}
return match;
}

bool prettify_test() {
std::cout << "Running " << __func__ << std::endl;

for (size_t i = 0; i < sizeof(test_files) / sizeof(test_files[0]); i++) {
bool ok = load_prettify(test_files[i]);
if (!ok) {
return false;
}
}
return true;
}

int main() { return prettify_test() ? EXIT_SUCCESS : EXIT_FAILURE; }

0 comments on commit 9039010

Please sign in to comment.