Skip to content

Commit

Permalink
[Picojson] Let the key of objects in json be ordered by default (#16863)
Browse files Browse the repository at this point in the history
Previously picojson define `object` as an alias of `std::unordered_map`. That means when parsing json, the order of keys in objects are uncertain and dependent on implementation. This makes it inconvenient for certain applications, e.g. in LLM generation output, we wish the order of keys the same as the order in the json file.

This PR implements a ordered hashmap `ordered_hashmap` that 1) maintains the order in which the elements are inserted, and 2) have the same interface as `std::unordered_map`. Picojson will define object as an alias of `ordered_hashmap`, so the order of the input json is maintained when parsing.

Macro `PICOJSON_USE_ORDERED_OBJECT` controls whether object uses the ordered version or the unordered version. It is set by default.
  • Loading branch information
Ubospica authored and MasterJH5574 committed Apr 10, 2024
1 parent c7bdcab commit a12e0ce
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
102 changes: 102 additions & 0 deletions 3rdparty/picojson/picojson.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,21 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once

#ifndef PICOJSON_USE_INT64
#define PICOJSON_USE_INT64
#define __STDC_FORMAT_MACROS 1
#endif

// If PICOJSON_USE_ORDERED_OBJECT is set, picojson uses object_with_ordered_keys, which maintains
// the insertion order of keys, i.e. the order of keys in the json string.
// This macro is set by default.
#ifndef PICOJSON_USE_ORDERED_OBJECT
#define PICOJSON_USE_ORDERED_OBJECT 1
#endif

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
Expand Down Expand Up @@ -137,10 +146,17 @@ enum { INDENT_WIDTH = 2 };

struct null {};

class object_with_ordered_keys;

class value {
public:
typedef std::vector<value> array;
#ifdef PICOJSON_USE_ORDERED_OBJECT
typedef object_with_ordered_keys object;
#else
typedef std::unordered_map<std::string, value> object;
#endif

union _storage {
bool boolean_;
double number_;
Expand Down Expand Up @@ -220,6 +236,92 @@ class value {
void clear();
};

// The ordered version of hashmap. It has the same interface as std::unordered_map, but provides
// ordered_keys() to return the keys in the order they were inserted.
class object_with_ordered_keys : private std::unordered_map<std::string, value> {
public:
using typename std::unordered_map<std::string, value>::value_type;
using typename std::unordered_map<std::string, value>::iterator;
using typename std::unordered_map<std::string, value>::const_iterator;

object_with_ordered_keys() = default;
object_with_ordered_keys(const object_with_ordered_keys&) = default;
object_with_ordered_keys(object_with_ordered_keys&&) = default;
object_with_ordered_keys(std::initializer_list<value_type> init)
: std::unordered_map<std::string, value>(init) {
for (const auto& pair : init) {
ordered_keys_.push_back(pair.first);
}
}
object_with_ordered_keys& operator=(const object_with_ordered_keys&) = default;
object_with_ordered_keys& operator=(object_with_ordered_keys&&) = default;

using std::unordered_map<std::string, value>::begin;
using std::unordered_map<std::string, value>::end;
using std::unordered_map<std::string, value>::cbegin;
using std::unordered_map<std::string, value>::cend;
using std::unordered_map<std::string, value>::empty;
using std::unordered_map<std::string, value>::size;
using std::unordered_map<std::string, value>::at;
using std::unordered_map<std::string, value>::count;
using std::unordered_map<std::string, value>::find;

value& operator[](const std::string& key) {
if (count(key) == 0) {
ordered_keys_.push_back(key);
}
return std::unordered_map<std::string, value>::operator[](key);
}

void clear() {
std::unordered_map<std::string, value>::clear();
ordered_keys_.clear();
}

std::pair<iterator, bool> insert(const value_type& kv) {
if (!count(kv.first)) {
ordered_keys_.push_back(kv.first);
}
return std::unordered_map<std::string, value>::insert(kv);
}

template <class... Args>
std::pair<iterator, bool> emplace(Args&&... args) {
return insert(value_type(std::forward<Args>(args)...));
}

iterator erase(const_iterator it) {
ordered_keys_.erase(std::find(ordered_keys_.begin(), ordered_keys_.end(), it->first));
return std::unordered_map<std::string, value>::erase(it);
}

iterator erase(iterator it) {
ordered_keys_.erase(std::find(ordered_keys_.begin(), ordered_keys_.end(), it->first));
return std::unordered_map<std::string, value>::erase(it);
}

size_t erase(const std::string& key) {
if (std::unordered_map<std::string, value>::erase(key)) {
ordered_keys_.erase(std::find(ordered_keys_.begin(), ordered_keys_.end(), key));
return 1;
} else {
return 0;
}
}

const std::vector<std::string>& ordered_keys() const { return ordered_keys_; }

friend bool operator==(const object_with_ordered_keys& lhs, const object_with_ordered_keys& rhs);

private:
std::vector<std::string> ordered_keys_;
};

inline bool operator==(const object_with_ordered_keys& lhs, const object_with_ordered_keys& rhs) {
return static_cast<const std::unordered_map<std::string, value>&>(lhs) ==
static_cast<const std::unordered_map<std::string, value>&>(rhs);
}

typedef value::array array;
typedef value::object object;

Expand Down
65 changes: 65 additions & 0 deletions 3rdparty/picojson/test_picojson.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <cassert>
#include <sstream>

#include "picojson.h"

using picojson::object_with_ordered_keys;

void test_constructor() {
object_with_ordered_keys obj;
obj["foo"] = picojson::value(true);
assert((obj.ordered_keys() == std::vector<std::string>{"foo"}));

object_with_ordered_keys obj1{{"foo", picojson::value(true)}, {"bar", picojson::value(false)}};
assert((obj1.ordered_keys() == std::vector<std::string>{"foo", "bar"}));

object_with_ordered_keys obj2(obj1);
assert((obj2.ordered_keys() == std::vector<std::string>{"foo", "bar"}));

object_with_ordered_keys obj3(std::move(obj2));
assert((obj3.ordered_keys() == std::vector<std::string>{"foo", "bar"}));

obj = obj3;
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar"}));
}

void test_modifier() {
object_with_ordered_keys obj{{"foo", picojson::value(true)}, {"bar", picojson::value(false)}};
obj.insert({"abc", picojson::value(false)});
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar", "abc"}));
obj.emplace("def", picojson::value(true));
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar", "abc", "def"}));
obj.insert({"abc", picojson::value(true)});
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar", "abc", "def"}));
auto it = obj.find("abc");
it = obj.erase(it);
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar", "def"}));
obj.erase("foo");
assert((obj.ordered_keys() == std::vector<std::string>{"bar", "def"}));
obj.clear();
assert((obj.ordered_keys() == std::vector<std::string>{}));
}

int main() {
test_constructor();
test_modifier();
return 0;
}

0 comments on commit a12e0ce

Please sign in to comment.