From 293d5708833d00e378f3da2937d88268a88e71e2 Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Tue, 12 Jan 2016 12:03:01 -0500 Subject: [PATCH] Code port to GitHub, added initial set of benchmarks --- .gitignore | 3 + LICENSE | 22 + Makefile | 1175 +++++++++++++++++++++++++++ src/binn/binn-file.c | 181 +++++ src/binn/binn-load.c | 175 ++++ src/binn/binn-write.c | 179 ++++ src/cmp/cmp-read.c | 180 ++++ src/cmp/cmp-write.c | 118 +++ src/common/benchmark.c | 277 +++++++ src/common/benchmark.h | 115 +++ src/common/buffer.h | 60 ++ src/common/generator.c | 380 +++++++++ src/common/generator.h | 98 +++ src/common/hash.h | 139 ++++ src/common/platform.h | 41 + src/hash/hash-data.c | 74 ++ src/hash/hash-object.c | 134 +++ src/jansson/jansson-dump.c | 127 +++ src/jansson/jansson-load.c | 176 ++++ src/libbson/libbson-append.c | 143 ++++ src/libbson/libbson-file.c | 136 ++++ src/libbson/libbson-iter.c | 152 ++++ src/mongo-cxx/mongo-cxx-builder.cpp | 165 ++++ src/mongo-cxx/mongo-cxx-obj.cpp | 131 +++ src/mpack/mpack-file.c | 107 +++ src/mpack/mpack-node.c | 123 +++ src/mpack/mpack-read.c | 137 ++++ src/mpack/mpack-write.c | 109 +++ src/msgpack/msgpack-c-pack.c | 116 +++ src/msgpack/msgpack-c-unpack.c | 123 +++ src/msgpack/msgpack-cpp-pack.cpp | 110 +++ src/msgpack/msgpack-cpp-unpack.cpp | 110 +++ src/rapidjson/rapidjson-dom.cpp | 130 +++ src/rapidjson/rapidjson-file.cpp | 131 +++ src/rapidjson/rapidjson-sax.cpp | 129 +++ src/rapidjson/rapidjson-write.cpp | 99 +++ src/ubj/ubj-file.c | 186 +++++ src/ubj/ubj-read.c | 189 +++++ src/ubj/ubj-write.c | 212 +++++ src/udp-json/json-builder.c | 136 ++++ src/udp-json/json-parser.c | 116 +++ src/yajl/yajl-gen.c | 118 +++ src/yajl/yajl-parse.c | 163 ++++ src/yajl/yajl-tree.c | 137 ++++ tools/results.py | 356 ++++++++ 45 files changed, 7418 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 src/binn/binn-file.c create mode 100644 src/binn/binn-load.c create mode 100644 src/binn/binn-write.c create mode 100644 src/cmp/cmp-read.c create mode 100644 src/cmp/cmp-write.c create mode 100644 src/common/benchmark.c create mode 100644 src/common/benchmark.h create mode 100644 src/common/buffer.h create mode 100644 src/common/generator.c create mode 100644 src/common/generator.h create mode 100644 src/common/hash.h create mode 100644 src/common/platform.h create mode 100644 src/hash/hash-data.c create mode 100644 src/hash/hash-object.c create mode 100644 src/jansson/jansson-dump.c create mode 100644 src/jansson/jansson-load.c create mode 100644 src/libbson/libbson-append.c create mode 100644 src/libbson/libbson-file.c create mode 100644 src/libbson/libbson-iter.c create mode 100644 src/mongo-cxx/mongo-cxx-builder.cpp create mode 100644 src/mongo-cxx/mongo-cxx-obj.cpp create mode 100644 src/mpack/mpack-file.c create mode 100644 src/mpack/mpack-node.c create mode 100644 src/mpack/mpack-read.c create mode 100644 src/mpack/mpack-write.c create mode 100644 src/msgpack/msgpack-c-pack.c create mode 100644 src/msgpack/msgpack-c-unpack.c create mode 100644 src/msgpack/msgpack-cpp-pack.cpp create mode 100644 src/msgpack/msgpack-cpp-unpack.cpp create mode 100644 src/rapidjson/rapidjson-dom.cpp create mode 100644 src/rapidjson/rapidjson-file.cpp create mode 100644 src/rapidjson/rapidjson-sax.cpp create mode 100644 src/rapidjson/rapidjson-write.cpp create mode 100644 src/ubj/ubj-file.c create mode 100644 src/ubj/ubj-read.c create mode 100644 src/ubj/ubj-write.c create mode 100644 src/udp-json/json-builder.c create mode 100644 src/udp-json/json-parser.c create mode 100644 src/yajl/yajl-gen.c create mode 100644 src/yajl/yajl-parse.c create mode 100644 src/yajl/yajl-tree.c create mode 100755 tools/results.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e18bf64 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build/ +contrib/ +results* diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..24654b2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015-2016 Nicholas Fraser + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7a70d64 --- /dev/null +++ b/Makefile @@ -0,0 +1,1175 @@ + +# common configuration + +CC ?= gcc +CXX ?= g++ + +# Pass SIZE=1 to perform a size test instead of a speed test. +# We want to use either -O3 or -Os, but several libraries annoyingly +# override everything with -O2. We are in some cases manually +# editing configure and CMakeLists.txt files to get rid of them. +ifeq ($(SIZE), 1) + OPTCONFIG := -Os -DBENCHMARK_SIZE_OPTIMIZED=1 + OBJECT_SIZES := 2 + RESULTS_ARG := size + RESULTS_DOC := results-size + RESULTS_EXTENDED_DOC := results-size-extended +else + OPTCONFIG := -O3 + OBJECT_SIZES := 2 4 + RESULTS_ARG := speed + RESULTS_DOC := results + RESULTS_EXTENDED_DOC := results-extended +endif + +# Compiler optimization flags (both are added to LDFLAGS.) +OPTFLAGS := $(OPTCONFIG) -flto -fno-fat-lto-objects -DNDEBUG +LDOPTFLAGS := -s -fuse-linker-plugin -fuse-ld=gold + +# Resolved compiler flags +# We specify defaults of the most recent language standards. The individual +# libraries may override them (e.g. YAJL which specifies -std=c99) +CPPFLAGS := -Wall -fPIC -DPIC $(OPTFLAGS) -Isrc/common +CFLAGS := $(CPPFLAGS) -std=c11 +CXXFLAGS := $(CPPFLAGS) -std=gnu++14 # we use anonymous structs +LDFLAGS := $(CPPFLAGS) $(LDOPTFLAGS) + +# Other run configurations +FILE_OBJECT_SIZES = 1 2 3 4 5 +ITERATIONS = 7 + + +# the default "all" target recurses back into the makefile to run +# everything twice, once optimized for speed and once optimized for +# size. the run-iterations target again recurses back into the +# makefile to run all tests repeatedly. + +.PHONY: all +all: speed size + +.PHONY: speed +speed: + make fetch + make clean-builds + make build data run-iterations results + +.PHONY: size +size: + make fetch + make clean-builds + make SIZE=1 build data run-iterations results + +.PHONY: run-iterations +run-iterations: + bash -c "for i in {1..${ITERATIONS}}; do make run; done" + + +# global targets + +.PHONY: fetch +fetch: fetch-mpack fetch-cmp fetch-msgpack fetch-rapidjson fetch-yajl fetch-jansson fetch-libbson fetch-binn fetch-udp-json fetch-ubj fetch-mongo-cxx + +.PHONY: build +build: build-common build-hash build-mpack build-cmp build-msgpack build-rapidjson build-yajl build-jansson build-libbson build-binn build-udp-json build-ubj build-mongo-cxx + +.PHONY: run +run: run-hash run-mpack run-cmp run-msgpack run-rapidjson run-yajl run-jansson run-libbson run-binn run-udp-json run-ubj run-mongo-cxx + +.PHONY: data +data: data-mp data-json data-bson data-binn data-ubjson + +.PHONY: results +results: $(RESULTS_DOC).md $(RESULTS_EXTENDED_DOC).md + +results.csv: results + +$(RESULTS_DOC).md: results.csv tools/results.py + echo "" > $(RESULTS_DOC).md + grep "^model name" /proc/cpuinfo | head -n 1 | sed 's/^.*\s*:/CPU model:/' | sed 's/$$/ /' >> $(RESULTS_DOC).md + grep "^bogomips" /proc/cpuinfo | head -n 1 | sed 's/^.*\s*:/Bogomips:/' >> $(RESULTS_DOC).md + date >> $(RESULTS_DOC).md + git rev-parse HEAD >> $(RESULTS_DOC).md + tools/results.py $(RESULTS_ARG) >> $(RESULTS_DOC).md + +$(RESULTS_EXTENDED_DOC).md: results.csv tools/results.py + echo "" > $(RESULTS_EXTENDED_DOC).md + grep "^model name" /proc/cpuinfo | head -n 1 | sed 's/^.*\s*:/CPU model:/' | sed 's/$$/ /' >> $(RESULTS_EXTENDED_DOC).md + grep "^bogomips" /proc/cpuinfo | head -n 1 | sed 's/^.*\s*:/Bogomips:/' >> $(RESULTS_EXTENDED_DOC).md + date >> $(RESULTS_EXTENDED_DOC).md + git rev-parse HEAD >> $(RESULTS_EXTENDED_DOC).md + tools/results.py $(RESULTS_ARG) extended >> $(RESULTS_EXTENDED_DOC).md + +# the html generators below use pandoc, but pandoc isn't +# available on ARM, so we don't generate them by default + +$(RESULTS_DOC).html: $(RESULTS_DOC).md + touch $(RESULTS_DOC).html + pandoc --from markdown_github --to html -o $(RESULTS_DOC).html $(RESULTS_DOC).md + sed -i '1s@^@\n@' $(RESULTS_DOC).html + sed -i '1s@^@\n@' $(RESULTS_DOC).html + sed -i '1s@^@\n@' $(RESULTS_DOC).html + sed -i '1s@^@\n\n@' $(RESULTS_DOC).html + +$(RESULTS_EXTENDED_DOC).html: $(RESULTS_EXTENDED_DOC).md + touch $(RESULTS_EXTENDED_DOC).html + pandoc --from markdown_github --to html -o $(RESULTS_EXTENDED_DOC).html $(RESULTS_EXTENDED_DOC).md + sed -i '1s@^@\n@' $(RESULTS_EXTENDED_DOC).html + sed -i '1s@^@\n@' $(RESULTS_EXTENDED_DOC).html + sed -i '1s@^@\n@' $(RESULTS_EXTENDED_DOC).html + sed -i '1s@^@\n\n@' $(RESULTS_EXTENDED_DOC).html + +.PHONY: clean-builds +clean-builds: + -rm -rf build + -( cd $(msgpack-dir) && ( make clean ; make distclean ) ) + -( cd $(jansson-dir) && ( make clean ; make distclean ) ) + -( cd $(libbson-dir) && ( make clean ; make distclean ) ) + -( cd $(mongo-cxx-dir) && ( rm -r build ) ) + -rm -rf "$(yajl-dir)/build/" + -rm -rf "$(ubj-dir)/build/" + +.PHONY: clean +clean: clean-builds + -rm -rf results* + +.PHONY: distclean +distclean: clean + -rm -rf contrib + + +# common + +common-headers := src/common/generator.h src/common/benchmark.h +common-objs := build/common/generator.o build/common/benchmark.o +.PHONY: build-common +build-common: $(common-objs) + +build/common/generator.o: $(common-headers) src/common/generator.c + mkdir -p build/common + $(CC) $(CFLAGS) -I contrib/mpack -c -o $@ src/common/generator.c + +build/common/benchmark.o: $(common-headers) src/common/benchmark.c + mkdir -p build/common + $(CC) $(CFLAGS) -I contrib/mpack -c -o $@ src/common/benchmark.c + + + +# hash benchmarks + +.PHONY: run-hash +run-hash: run-hash-object run-hash-data + +.PHONY: build-hash +build-hash: build/hash-object build/hash-data + +# hash-object + +build/hash/hash-object.o: $(common-headers) src/hash/hash-object.c + mkdir -p build/hash + $(CC) $(CFLAGS) -c -o $@ src/hash/hash-object.c + +build/hash-object: build/hash/hash-object.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-hash-object +run-hash-object: build/hash-object + build/hash-object $(OBJECT_SIZES) + +# hash-data + +build/hash/hash-data.o: $(common-headers) src/hash/hash-data.c + mkdir -p build/hash + $(CC) $(CFLAGS) -c -o $@ src/hash/hash-data.c + +build/hash-data: build/hash/hash-data.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-hash-data +run-hash-data: build/hash-data + build/hash-data $(OBJECT_SIZES) + + + +# mpack + +mpack-version := 0.8.1 +mpack-url := https://github.com/ludocode/mpack/releases/download/v$(mpack-version)/mpack-amalgamation-$(mpack-version).tar.gz +mpack-dir := contrib/mpack/mpack-amalgamation-$(mpack-version)/src +mpack-config := $(mpack-dir)/mpack-config.h + +.PHONY: fetch-mpack +fetch-mpack: $(mpack-config) +$(mpack-config): + mkdir -p contrib/mpack + cd contrib/mpack ;\ + curl -LO $(mpack-url) ;\ + tar -xzf mpack-amalgamation-$(mpack-version).tar.gz + cp $(mpack-dir)/mpack-config.h.sample $(mpack-config) + +build/mpack/mpack.o: $(mpack-config) + mkdir -p build/mpack + $(CC) $(CFLAGS) -I $(mpack-dir) -c -o $@ $(mpack-dir)/mpack/mpack.c + +MPACK_TRACKING_FLAGS := -DMPACK_READ_TRACKING=1 -DMPACK_WRITE_TRACKING=1 +build/mpack/mpack-tracking.o: $(mpack-config) + mkdir -p build/mpack + $(CC) $(CFLAGS) $(MPACK_TRACKING_FLAGS) -I $(mpack-dir) -c -o $@ $(mpack-dir)/mpack/mpack.c + +.PHONY: run-mpack +run-mpack: run-mpack-write run-mpack-read run-mpack-node run-mpack-tracking-write run-mpack-tracking-read run-mpack-utf8-read run-mpack-utf8-node + +.PHONY: build-mpack +build-mpack: build/mpack-file build/mpack-write build/mpack-read build/mpack-node build/mpack-tracking-write build/mpack-tracking-read build/mpack-utf8-read build/mpack-utf8-node + +# mpack-file + +build/mpack/mpack-file.o: $(common-headers) $(mpack-config) src/mpack/mpack-file.c + mkdir -p build/mpack + $(CC) $(CFLAGS) -I $(mpack-dir) -c -o $@ src/mpack/mpack-file.c + +build/mpack-file: build/mpack/mpack.o build/mpack/mpack-file.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: data-mp +data-mp: run-mpack-file + +.PHONY: run-mpack-file +run-mpack-file: build/mpack-file + build/mpack-file $(FILE_OBJECT_SIZES) +build/data.mp: run-mpack-file + +# mpack-write + +build/mpack/mpack-write.o: $(common-headers) $(mpack-config) src/mpack/mpack-write.c + mkdir -p build/mpack + $(CC) $(CFLAGS) -I $(mpack-dir) -c -o $@ src/mpack/mpack-write.c + +build/mpack-write: build/mpack/mpack.o build/mpack/mpack-write.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-mpack-write +run-mpack-write: build/mpack-write + build/mpack-write $(OBJECT_SIZES) + +# mpack-read + +build/mpack/mpack-read.o: $(common-headers) $(mpack-config) src/mpack/mpack-read.c + mkdir -p build/mpack + $(CC) $(CFLAGS) -I $(mpack-dir) -c -o $@ src/mpack/mpack-read.c + +build/mpack-read: build/mpack/mpack.o build/mpack/mpack-read.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-mpack-read +run-mpack-read: build/mpack-read data-mp + build/mpack-read $(OBJECT_SIZES) + +# mpack-node + +build/mpack/mpack-node.o: $(common-headers) $(mpack-config) src/mpack/mpack-node.c + mkdir -p build/mpack + $(CC) $(CFLAGS) -I $(mpack-dir) -c -o $@ src/mpack/mpack-node.c + +build/mpack-node: build/mpack/mpack.o build/mpack/mpack-node.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-mpack-node +run-mpack-node: build/mpack-node data-mp + build/mpack-node $(OBJECT_SIZES) + +# mpack-tracking-write + +build/mpack/mpack-tracking-write.o: $(common-headers) $(mpack-config) src/mpack/mpack-write.c + mkdir -p build/mpack + $(CC) $(CFLAGS) $(MPACK_TRACKING_FLAGS) -I $(mpack-dir) -c -o $@ src/mpack/mpack-write.c + +build/mpack-tracking-write: build/mpack/mpack-tracking.o build/mpack/mpack-tracking-write.o $(common-objs) + $(CC) $(LDFLAGS) $(MPACK_TRACKING_FLAGS) -o $@ $^ + +.PHONY: run-mpack-tracking-write +run-mpack-tracking-write: build/mpack-tracking-write + build/mpack-tracking-write $(OBJECT_SIZES) + +# mpack-tracking-read + +build/mpack/mpack-tracking-read.o: $(common-headers) $(mpack-config) src/mpack/mpack-read.c + mkdir -p build/mpack + $(CC) $(CFLAGS) $(MPACK_TRACKING_FLAGS) -I $(mpack-dir) -c -o $@ src/mpack/mpack-read.c + +build/mpack-tracking-read: build/mpack/mpack-tracking.o build/mpack/mpack-tracking-read.o $(common-objs) + $(CC) $(LDFLAGS) $(MPACK_TRACKING_FLAGS) -o $@ $^ + +.PHONY: run-mpack-tracking-read +run-mpack-tracking-read: build/mpack-tracking-read data-mp + build/mpack-tracking-read $(OBJECT_SIZES) + +# mpack-utf8-read + +build/mpack/mpack-utf8-read.o: $(common-headers) $(mpack-config) src/mpack/mpack-read.c + mkdir -p build/mpack + $(CC) $(CFLAGS) -DCHECK_UTF8=1 -I $(mpack-dir) -c -o $@ src/mpack/mpack-read.c + +build/mpack-utf8-read: build/mpack/mpack.o build/mpack/mpack-utf8-read.o $(common-objs) + $(CC) $(LDFLAGS) -DCHECK_UTF8=1 -o $@ $^ + +.PHONY: run-mpack-utf8-read +run-mpack-utf8-read: build/mpack-utf8-read data-mp + build/mpack-utf8-read $(OBJECT_SIZES) + +# mpack-utf8-node + +build/mpack/mpack-utf8-node.o: $(common-headers) $(mpack-config) src/mpack/mpack-node.c + mkdir -p build/mpack + $(CC) $(CFLAGS) -DCHECK_UTF8=1 -I $(mpack-dir) -c -o $@ src/mpack/mpack-node.c + +build/mpack-utf8-node: build/mpack/mpack.o build/mpack/mpack-utf8-node.o $(common-objs) + $(CC) $(LDFLAGS) -DCHECK_UTF8=1 -o $@ $^ + +.PHONY: run-mpack-utf8-node +run-mpack-utf8-node: build/mpack-utf8-node data-mp + build/mpack-utf8-node $(OBJECT_SIZES) + + + +# cmp + +# as of this writing, cmp has not done a release since version 10, but we'll +# give them the benefit of the doubt and just use the current contents of the +# repository (since that is most likely how it is intended to be used.) +cmp-revision := daec9e09d7f7f79209b318f4a71a23981e3879be +cmp-url := https://github.com/camgunz/cmp/archive/$(cmp-revision).tar.gz +cmp-dir := contrib/cmp/cmp-$(cmp-revision) +cmp-header := $(cmp-dir)/cmp.h + +.PHONY: fetch-cmp +fetch-cmp: $(cmp-header) +$(cmp-header): + mkdir -p contrib/cmp + cd contrib/cmp ;\ + curl -LO $(cmp-url) ;\ + tar -xzf $(cmp-revision).tar.gz + +build/cmp/cmp.o: $(cmp-header) + mkdir -p build/cmp + $(CC) $(CFLAGS) -I $(cmp-dir) -c -o build/cmp/cmp.o $(cmp-dir)/cmp.c + +.PHONY: run-cmp +run-cmp: run-cmp-write run-cmp-read + +.PHONY: build-cmp +build-cmp: build/cmp-write build/cmp-read + +# cmp-read + +build/cmp/cmp-read.o: $(common-headers) $(cmp-header) src/cmp/cmp-read.c + mkdir -p build/cmp + $(CC) $(CFLAGS) -I $(cmp-dir) -c -o $@ src/cmp/cmp-read.c + +build/cmp-read: build/cmp/cmp.o build/cmp/cmp-read.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-cmp-read +run-cmp-read: build/cmp-read data-mp + build/cmp-read $(OBJECT_SIZES) + +# cmp-write + +build/cmp/cmp-write.o: $(common-headers) $(cmp-header) src/cmp/cmp-write.c + mkdir -p build/cmp + $(CC) $(CFLAGS) -I $(cmp-dir) -c -o $@ src/cmp/cmp-write.c + +build/cmp-write: build/cmp/cmp.o build/cmp/cmp-write.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-cmp-write +run-cmp-write: build/cmp-write + build/cmp-write $(OBJECT_SIZES) + + + +# msgpack + +msgpack-version := 1.3.0 +msgpack-url := https://github.com/msgpack/msgpack-c/releases/download/cpp-$(msgpack-version)/msgpack-$(msgpack-version).tar.gz +msgpack-dir := contrib/msgpack/msgpack-$(msgpack-version) +msgpack-lib := $(msgpack-dir)/src/.libs/libmsgpack.a +msgpack-header := $(msgpack-dir)/include/msgpack.h + +.PHONY: fetch-msgpack +fetch-msgpack: $(msgpack-header) +$(msgpack-header): + mkdir -p contrib/msgpack + cd contrib/msgpack ;\ + curl -LO $(msgpack-url) ;\ + tar -xzf msgpack-$(msgpack-version).tar.gz + +$(msgpack-lib): $(msgpack-header) + # libtool strips off -fuse-ld=gold, so we pass all our compiler flags + # as part of CC and CXX. we also pass them as CFLAGS and CXXFLAGS to + # prevent msgpack from overriding them. + cd $(msgpack-dir); CC="$(CC) $(CFLAGS)" CXX="$(CXX) $(CXXFLAGS)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" ./configure && make + +.PHONY: run-msgpack +run-msgpack: run-msgpack-c-pack run-msgpack-cpp-pack run-msgpack-c-unpack run-msgpack-cpp-unpack + +.PHONY: build-msgpack +build-msgpack: build/msgpack-c-pack build/msgpack-cpp-pack build/msgpack-c-unpack build/msgpack-cpp-unpack + +# msgpack-c-unpack + +build/msgpack/msgpack-c-unpack.o: $(common-headers) $(msgpack-lib) src/msgpack/msgpack-c-unpack.c + mkdir -p build/msgpack + $(CC) $(CFLAGS) -I $(msgpack-dir) -I $(msgpack-dir)/include -c -o $@ src/msgpack/msgpack-c-unpack.c + +build/msgpack-c-unpack: build/msgpack/msgpack-c-unpack.o $(common-objs) $(msgpack-lib) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-msgpack-c-unpack +run-msgpack-c-unpack: build/msgpack-c-unpack data-mp + build/msgpack-c-unpack $(OBJECT_SIZES) + +# msgpack-cpp-unpack + +build/msgpack/msgpack-cpp-unpack.o: $(common-headers) $(msgpack-lib) src/msgpack/msgpack-cpp-unpack.cpp + mkdir -p build/msgpack + $(CXX) $(CXXFLAGS) -I $(msgpack-dir) -I $(msgpack-dir)/include -c -o $@ src/msgpack/msgpack-cpp-unpack.cpp + +build/msgpack-cpp-unpack: build/msgpack/msgpack-cpp-unpack.o $(common-objs) $(msgpack-lib) + $(CXX) $(LDFLAGS) -o $@ $^ + +.PHONY: run-msgpack-cpp-unpack +run-msgpack-cpp-unpack: build/msgpack-cpp-unpack data-mp + build/msgpack-cpp-unpack $(OBJECT_SIZES) + +# msgpack-c-pack + +build/msgpack/msgpack-c-pack.o: $(common-headers) $(msgpack-lib) src/msgpack/msgpack-c-pack.c + mkdir -p build/msgpack + $(CC) $(CFLAGS) -I $(msgpack-dir) -I $(msgpack-dir)/include -c -o $@ src/msgpack/msgpack-c-pack.c + +build/msgpack-c-pack: build/msgpack/msgpack-c-pack.o $(common-objs) $(msgpack-lib) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-msgpack-c-pack +run-msgpack-c-pack: build/msgpack-c-pack + build/msgpack-c-pack $(OBJECT_SIZES) + +# msgpack-cpp-pack + +build/msgpack/msgpack-cpp-pack.o: $(common-headers) $(msgpack-lib) src/msgpack/msgpack-cpp-pack.cpp + mkdir -p build/msgpack + $(CXX) $(CXXFLAGS) -I $(msgpack-dir) -I $(msgpack-dir)/include -c -o $@ src/msgpack/msgpack-cpp-pack.cpp + +build/msgpack-cpp-pack: build/msgpack/msgpack-cpp-pack.o $(common-objs) $(msgpack-lib) + $(CXX) $(LDFLAGS) -o $@ $^ + +.PHONY: run-msgpack-cpp-pack +run-msgpack-cpp-pack: build/msgpack-cpp-pack + build/msgpack-cpp-pack $(OBJECT_SIZES) + + + +# rapidjson + +rapidjson-version := 1.0.2 +rapidjson-url := https://github.com/miloyip/rapidjson/archive/v$(rapidjson-version).tar.gz +rapidjson-dir := contrib/rapidjson/rapidjson-$(rapidjson-version) +rapidjson-tarball := v$(rapidjson-version).tar.gz +rapidjson-header := $(rapidjson-dir)/include/rapidjson/rapidjson.h + +.PHONY: fetch-rapidjson +fetch-rapidjson: $(rapidjson-header) +$(rapidjson-header): + mkdir -p contrib/rapidjson + cd contrib/rapidjson ;\ + curl -LO $(rapidjson-url) ;\ + tar -xzf $(rapidjson-tarball) + +.PHONY: run-rapidjson +run-rapidjson: run-rapidjson-write run-rapidjson-sax run-rapidjson-insitu-sax run-rapidjson-dom run-rapidjson-insitu-dom + +.PHONY: build-rapidjson +build-rapidjson: build/rapidjson-file build/rapidjson-write build/rapidjson-sax build/rapidjson-insitu-sax build/rapidjson-dom build/rapidjson-insitu-dom + +# rapidjson-file + +build/rapidjson/rapidjson-file.o: $(common-headers) $(rapidjson-header) src/rapidjson/rapidjson-file.cpp + mkdir -p build/rapidjson + $(CXX) $(CXXFLAGS) -I $(rapidjson-dir) -I $(rapidjson-dir)/include -c -o $@ src/rapidjson/rapidjson-file.cpp + +build/rapidjson-file: build/rapidjson/rapidjson-file.o $(common-objs) + $(CXX) $(LDFLAGS) -o $@ $^ + +.PHONY: data-json +data-json: run-rapidjson-file + +.PHONY: run-rapidjson-file +run-rapidjson-file: build/rapidjson-file + build/rapidjson-file $(FILE_OBJECT_SIZES) +build/data.json: run-rapidjson-file + +# rapidjson-write + +build/rapidjson/rapidjson-write.o: $(common-headers) $(rapidjson-header) src/rapidjson/rapidjson-write.cpp + mkdir -p build/rapidjson + $(CXX) $(CXXFLAGS) -I $(rapidjson-dir) -I $(rapidjson-dir)/include -c -o $@ src/rapidjson/rapidjson-write.cpp + +build/rapidjson-write: build/rapidjson/rapidjson-write.o $(common-objs) + $(CXX) $(LDFLAGS) -o $@ $^ + +.PHONY: run-rapidjson-write +run-rapidjson-write: build/rapidjson-write + build/rapidjson-write $(OBJECT_SIZES) + +# rapidjson-sax + +build/rapidjson/rapidjson-sax.o: $(common-headers) $(rapidjson-header) src/rapidjson/rapidjson-sax.cpp + mkdir -p build/rapidjson + $(CXX) $(CXXFLAGS) -I $(rapidjson-dir) -I $(rapidjson-dir)/include -c -o $@ src/rapidjson/rapidjson-sax.cpp + +build/rapidjson-sax: build/rapidjson/rapidjson-sax.o $(common-objs) + $(CXX) $(LDFLAGS) -o $@ $^ + +.PHONY: run-rapidjson-sax +run-rapidjson-sax: build/rapidjson-sax data-json + build/rapidjson-sax $(OBJECT_SIZES) + +# rapidjson-insitu-sax + +build/rapidjson/rapidjson-insitu-sax.o: $(common-headers) $(rapidjson-header) src/rapidjson/rapidjson-sax.cpp + mkdir -p build/rapidjson + $(CXX) $(CXXFLAGS) -DBENCHMARK_IN_SITU=1 -I $(rapidjson-dir) -I $(rapidjson-dir)/include -c -o $@ src/rapidjson/rapidjson-sax.cpp + +build/rapidjson-insitu-sax: build/rapidjson/rapidjson-insitu-sax.o $(common-objs) + $(CXX) $(LDFLAGS) -o $@ $^ + +.PHONY: run-rapidjson-insitu-sax +run-rapidjson-insitu-sax: build/rapidjson-insitu-sax data-json + build/rapidjson-insitu-sax $(OBJECT_SIZES) + +# rapidjson-dom + +build/rapidjson/rapidjson-dom.o: $(common-headers) $(rapidjson-header) src/rapidjson/rapidjson-dom.cpp + mkdir -p build/rapidjson + $(CXX) $(CXXFLAGS) -I $(rapidjson-dir) -I $(rapidjson-dir)/include -c -o $@ src/rapidjson/rapidjson-dom.cpp + +build/rapidjson-dom: build/rapidjson/rapidjson-dom.o $(common-objs) + $(CXX) $(LDFLAGS) -o $@ $^ + +.PHONY: run-rapidjson-dom +run-rapidjson-dom: build/rapidjson-dom data-json + build/rapidjson-dom $(OBJECT_SIZES) + +# rapidjson-insitu-dom + +build/rapidjson/rapidjson-insitu-dom.o: $(common-headers) $(rapidjson-header) src/rapidjson/rapidjson-dom.cpp + mkdir -p build/rapidjson + $(CXX) $(CXXFLAGS) -DBENCHMARK_IN_SITU=1 -I $(rapidjson-dir) -I $(rapidjson-dir)/include -c -o $@ src/rapidjson/rapidjson-dom.cpp + +build/rapidjson-insitu-dom: build/rapidjson/rapidjson-insitu-dom.o $(common-objs) + $(CXX) $(LDFLAGS) -o $@ $^ + +.PHONY: run-rapidjson-insitu-dom +run-rapidjson-insitu-dom: build/rapidjson-insitu-dom data-json + build/rapidjson-insitu-dom $(OBJECT_SIZES) + + + +# yajl + +yajl-version := 2.1.0 +yajl-url := http://github.com/lloyd/yajl/tarball/$(yajl-version) +yajl-tarball := yajl-$(yajl-version).tar.gz +yajl-dir := contrib/yajl/lloyd-yajl-66cb08c +yajl-include := $(yajl-dir)/build/yajl-$(yajl-version)/include +yajl-lib := $(yajl-dir)/build/yajl-$(yajl-version)/lib/libyajl_s.a +yajl-src := $(yajl-dir)/src/api/yajl_parse.h + +.PHONY: fetch-yajl +fetch-yajl: $(yajl-src) +$(yajl-src): + mkdir -p contrib/yajl + cd contrib/yajl ;\ + curl -L -o $(yajl-tarball) $(yajl-url) ;\ + tar -xzf $(yajl-tarball) + +$(yajl-lib): $(yajl-src) + # We have to clear out YAJL's release flags since they override + # ours (with -O2, which seems to be about half the speed of -O3) + sed -i '/CMAKE_C_FLAGS_RELEASE/d' $(yajl-dir)/CMakeLists.txt + + # Due to a bug in CMake 3.4.0, we have to manually invoke cmake with + # -DCMAKE_C_STANDARD_COMPUTED_DEFAULT=GNU in order to have -flto in + # our CFLAGS. This will apparently be fixed in 3.4.0: + # http://public.kitware.com/pipermail/cmake-developers/2015-November/026993.html + mkdir -p $(yajl-dir)/build + cd $(yajl-dir)/build ;\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="" LDFLAGS="$(LDFLAGS)" \ + cmake .. \ + -DCMAKE_C_FLAGS_RELEASE="" \ + -DCMAKE_C_STANDARD_COMPUTED_DEFAULT=GNU \ + -DCMAKE_VERBOSE_MAKEFILE=1 \ + && make + +.PHONY: run-yajl +run-yajl: run-yajl-gen run-yajl-parse run-yajl-tree + +.PHONY: build-yajl +build-yajl: build/yajl-gen build/yajl-parse build/yajl-tree + +# yajl-gen + +build/yajl/yajl-gen.o: $(common-headers) $(yajl-lib) src/yajl/yajl-gen.c + mkdir -p build/yajl + $(CC) $(CFLAGS) -I $(yajl-include) -c -o $@ src/yajl/yajl-gen.c + +build/yajl-gen: build/yajl/yajl-gen.o $(common-objs) $(yajl-lib) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-yajl-gen +run-yajl-gen: build/yajl-gen + build/yajl-gen $(OBJECT_SIZES) + +# yajl-parse + +build/yajl/yajl-parse.o: $(common-headers) $(yajl-lib) src/yajl/yajl-parse.c + mkdir -p build/yajl + $(CC) $(CFLAGS) -I $(yajl-include) -c -o $@ src/yajl/yajl-parse.c + +build/yajl-parse: build/yajl/yajl-parse.o $(common-objs) $(yajl-lib) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-yajl-parse +run-yajl-parse: build/yajl-parse data-json + build/yajl-parse $(OBJECT_SIZES) + +# yajl-tree + +build/yajl/yajl-tree.o: $(common-headers) $(yajl-lib) src/yajl/yajl-tree.c + mkdir -p build/yajl + $(CC) $(CFLAGS) -I $(yajl-include) -c -o $@ src/yajl/yajl-tree.c + +build/yajl-tree: build/yajl/yajl-tree.o $(common-objs) $(yajl-lib) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-yajl-tree +run-yajl-tree: build/yajl-tree data-json + build/yajl-tree $(OBJECT_SIZES) + + + +# jansson + +jansson-version := 2.7 +jansson-url := http://www.digip.org/jansson/releases/jansson-$(jansson-version).tar.gz +jansson-dir := contrib/jansson/jansson-$(jansson-version) +jansson-include := $(jansson-dir)/src +jansson-lib := $(jansson-dir)/src/.libs/libjansson.a +jansson-header := $(jansson-dir)/src/jansson.h + +.PHONY: fetch-jansson +fetch-jansson: $(jansson-header) +$(jansson-header): + mkdir -p contrib/jansson + cd contrib/jansson ;\ + curl -LO $(jansson-url) ;\ + tar -xzf jansson-$(jansson-version).tar.gz + +$(jansson-lib): $(jansson-header) + # jansson lets us specify our own flags! thank you for setting this up properly! + cd $(jansson-dir); CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" ./configure && make V=1 + +.PHONY: run-jansson +run-jansson: run-jansson-dump run-jansson-load run-jansson-ordered-dump run-jansson-ordered-load + +.PHONY: build-jansson +build-jansson: build/jansson-dump build/jansson-load build/jansson-ordered-dump build/jansson-ordered-load + +# jansson-dump + +build/jansson/jansson-dump.o: $(common-headers) $(jansson-lib) src/jansson/jansson-dump.c + mkdir -p build/jansson + $(CC) $(CFLAGS) -I $(jansson-include) -c -o $@ src/jansson/jansson-dump.c + +build/jansson-dump: build/jansson/jansson-dump.o $(common-objs) $(jansson-lib) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-jansson-dump +run-jansson-dump: build/jansson-dump + build/jansson-dump $(OBJECT_SIZES) + +# jansson-load + +build/jansson/jansson-load.o: $(common-headers) $(jansson-lib) src/jansson/jansson-load.c + mkdir -p build/jansson + $(CC) $(CFLAGS) -I $(jansson-include) -c -o $@ src/jansson/jansson-load.c + +build/jansson-load: build/jansson/jansson-load.o $(common-objs) $(jansson-lib) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-jansson-load +run-jansson-load: build/jansson-load data-json + build/jansson-load $(OBJECT_SIZES) + +# jansson-ordered-dump + +build/jansson/jansson-ordered-dump.o: $(common-headers) $(jansson-lib) src/jansson/jansson-dump.c + mkdir -p build/jansson + $(CC) $(CFLAGS) -DPRESERVE_ORDER=1 -I $(jansson-include) -c -o $@ src/jansson/jansson-dump.c + +build/jansson-ordered-dump: build/jansson/jansson-ordered-dump.o $(common-objs) $(jansson-lib) + $(CC) $(LDFLAGS) -DPRESERVE_ORDER=1 -o $@ $^ + +.PHONY: run-jansson-ordered-dump +run-jansson-ordered-dump: build/jansson-ordered-dump + build/jansson-ordered-dump $(OBJECT_SIZES) + +# jansson-ordered-load + +build/jansson/jansson-ordered-load.o: $(common-headers) $(jansson-lib) src/jansson/jansson-load.c + mkdir -p build/jansson + $(CC) $(CFLAGS) -DPRESERVE_ORDER=1 -I $(jansson-include) -c -o $@ src/jansson/jansson-load.c + +build/jansson-ordered-load: build/jansson/jansson-ordered-load.o $(common-objs) $(jansson-lib) + $(CC) $(LDFLAGS) -DPRESERVE_ORDER=1 -o $@ $^ + +.PHONY: run-jansson-ordered-load +run-jansson-ordered-load: build/jansson-ordered-load data-json + build/jansson-ordered-load $(OBJECT_SIZES) + + + +# libbson + +libbson-version := 1.3.0 +libbson-tarball := libbson-$(libbson-version).tar.gz +libbson-url := https://github.com/mongodb/libbson/releases/download/$(libbson-version)/$(libbson-tarball) +libbson-dir := contrib/libbson/libbson-$(libbson-version) +libbson-lib := $(libbson-dir)/.libs/libbson.a +libbson-src := $(libbson-dir)/src/bson/bson.h +libbson-include := $(libbson-dir)/src/bson + +.PHONY: fetch-libbson +fetch-libbson: $(libbson-src) +$(libbson-src): + mkdir -p contrib/libbson + cd contrib/libbson ;\ + curl -L -O $(libbson-url) ;\ + tar -xzf $(libbson-tarball) + +$(libbson-lib): $(libbson-src) + # We have to clear out libbson's release flags since they override ours with -O2 + sed -i 's/-O2/ /g' $(libbson-dir)/configure + mkdir -p $(libbson-dir)/build + cd $(libbson-dir); CFLAGS=" $(CFLAGS) " CXXFLAGS=" $(CXXFLAGS) " CPPFLAGS=" " LDFLAGS=" $(LDFLAGS) " ./configure && make V=1 libbson.la + +.PHONY: run-libbson +run-libbson: run-libbson-append run-libbson-iter + +.PHONY: build-libbson +build-libbson: build/libbson-file build/libbson-append build/libbson-iter + +# libbson requires pthreads, at least in debug mode (-flto seems +# to be able to eliminate this dependency, but we still want to +# be able to build without optimization for testing purposes.) +BSONLDFLAGS := $(LDFLAGS) -pthread + +# libbson-file + +build/libbson/libbson-file.o: $(common-headers) $(libbson-lib) src/libbson/libbson-file.c + mkdir -p build/libbson + $(CC) $(CFLAGS) -I $(libbson-include) -c -o $@ src/libbson/libbson-file.c + +build/libbson-file: build/libbson/libbson-file.o $(common-objs) $(libbson-lib) + $(CC) $(BSONLDFLAGS) -o $@ $^ + +.PHONY: data-bson +data-bson: run-libbson-file + +.PHONY: run-libbson-file +run-libbson-file: build/libbson-file + build/libbson-file $(FILE_OBJECT_SIZES) +build/data.bson: run-libbson-file + +# libbson-append + +build/libbson/libbson-append.o: $(common-headers) $(libbson-lib) $(libbson-config) src/libbson/libbson-append.c + mkdir -p build/libbson + $(CC) $(CFLAGS) -I $(libbson-include) -c -o $@ src/libbson/libbson-append.c + +build/libbson-append: build/libbson/libbson-append.o $(common-objs) $(libbson-lib) + $(CC) $(BSONLDFLAGS) -o $@ $^ + +.PHONY: run-libbson-append +run-libbson-append: build/libbson-append + build/libbson-append $(OBJECT_SIZES) + +# libbson-iter + +build/libbson/libbson-iter.o: $(common-headers) $(libbson-lib) $(libbson-config) src/libbson/libbson-iter.c + mkdir -p build/libbson + $(CC) $(CFLAGS) -I $(libbson-include) -c -o $@ src/libbson/libbson-iter.c + +build/libbson-iter: build/libbson/libbson-iter.o $(common-objs) $(libbson-lib) + $(CC) $(BSONLDFLAGS) -o $@ $^ + +.PHONY: run-libbson-iter +run-libbson-iter: build/libbson-iter data-bson + build/libbson-iter $(OBJECT_SIZES) + + + +# binn + +# binn has not done official releases yet. it also does +# not appear to have a version number. +binn-version := b3e2c27 +binn-revision := b3e2c27c59747f2c79561fb0ee1c4228633d5581 +binn-url := https://github.com/liteserver/binn/archive/$(binn-revision).tar.gz +binn-dir := contrib/binn/binn-$(binn-revision)/src +binn-header := $(binn-dir)/binn.h + +.PHONY: fetch-binn +fetch-binn: $(binn-header) +$(binn-header): + mkdir -p contrib/binn + cd contrib/binn ;\ + curl -LO $(binn-url) ;\ + tar -xzf $(binn-revision).tar.gz ;\ + +# binn needs __BYTE_ORDER, so it needs sys/param.h (on RPi) +# also since binn does not publish a version number, we'll +# just insert the revision on the command line. +BINNFLAGS = $(CFLAGS) \ + -include sys/param.h \ + -DBENCHMARK_BINN_VERSION='"'$(binn-version)'"' + +# binn suggests linking against the prebuilt library as a possible +# way to use it, but the makefile ignores CFLAGS and does not specify +# an optimization level. we'll just build it ourselves. +build/binn/binn.o: $(binn-header) + mkdir -p build/binn + $(CC) $(BINNFLAGS) -I $(binn-dir) -c -o build/binn/binn.o $(binn-dir)/binn.c + +.PHONY: run-binn +run-binn: run-binn-write run-binn-load + +.PHONY: build-binn +build-binn: build/binn-file build/binn-write build/binn-load + +# binn-file + +build/binn/binn-file.o: $(common-headers) $(binn-header) src/binn/binn-file.c + mkdir -p build/binn + $(CC) $(BINNFLAGS) -I $(binn-dir) -c -o $@ src/binn/binn-file.c + +build/binn-file: build/binn/binn.o build/binn/binn-file.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: data-binn +data-binn: run-binn-file + +.PHONY: run-binn-file +run-binn-file: build/binn-file + build/binn-file $(FILE_OBJECT_SIZES) +build/data.mp: run-binn-file + +# binn-write + +build/binn/binn-write.o: $(common-headers) $(binn-header) src/binn/binn-write.c + mkdir -p build/binn + $(CC) $(BINNFLAGS) -I $(binn-dir) -c -o $@ src/binn/binn-write.c + +build/binn-write: build/binn/binn.o build/binn/binn-write.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-binn-write +run-binn-write: build/binn-write + build/binn-write $(OBJECT_SIZES) + +# binn-load + +build/binn/binn-load.o: $(common-headers) $(binn-header) src/binn/binn-load.c + mkdir -p build/binn + $(CC) $(BINNFLAGS) -I $(binn-dir) -c -o $@ src/binn/binn-load.c + +build/binn-load: build/binn/binn.o build/binn/binn-load.o $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-binn-load +run-binn-load: build/binn-load data-binn + build/binn-load $(OBJECT_SIZES) + + + +# ubj + +# ubj has not done official releases yet. it also does +# not appear to have a version number. +ubj-version := f4d85c4 +ubj-revision := f4d85c4b848577d1f4d7b4437088609d2ee1fb08 +ubj-url := https://github.com/Steve132/ubj/archive/$(ubj-revision).tar.gz +ubj-dir := contrib/ubj/ubj-$(ubj-revision) +ubj-header := $(ubj-dir)/ubj.h +ubj-lib := $(ubj-dir)/build/libubj.a + +.PHONY: fetch-ubj +fetch-ubj: $(ubj-header) +$(ubj-header): + mkdir -p contrib/ubj + cd contrib/ubj ;\ + curl -LO $(ubj-url) ;\ + tar -xzf $(ubj-revision).tar.gz ;\ + +# ubj does not publish a version number, so we'll +# just insert the revision on the command line. +UBJFLAGS = $(CFLAGS) \ + -DBENCHMARK_UBJ_VERSION='"'$(ubj-version)'"' + +$(ubj-lib): $(ubj-header) + # Due to a bug in CMake 3.4.0, we have to manually invoke cmake with + # -DCMAKE_C_STANDARD_COMPUTED_DEFAULT=GNU in order to have -flto in + # our CFLAGS. This will apparently be fixed in 3.4.0: + # http://public.kitware.com/pipermail/cmake-developers/2015-November/026993.html + mkdir -p $(ubj-dir)/build + cd $(ubj-dir)/build ;\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="" LDFLAGS="$(LDFLAGS)" \ + cmake .. \ + -DCMAKE_C_FLAGS_RELEASE="" \ + -DCMAKE_C_STANDARD_COMPUTED_DEFAULT=GNU \ + -DCMAKE_VERBOSE_MAKEFILE=1 \ + && make + +.PHONY: run-ubj +run-ubj: run-ubj-write run-ubj-read run-ubj-opt-write run-ubj-opt-read + +.PHONY: build-ubj +build-ubj: build/ubj-file build/ubj-write build/ubj-read build/ubj-opt-write build/ubj-opt-read + +# ubj-file + +build/ubj/ubj-file.o: $(common-headers) $(ubj-header) src/ubj/ubj-file.c + mkdir -p build/ubj + $(CC) $(UBJFLAGS) -I $(ubj-dir) -c -o $@ src/ubj/ubj-file.c + +build/ubj-file: build/ubj/ubj-file.o $(ubj-lib) $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: data-ubjson +data-ubjson: run-ubj-file + +.PHONY: run-ubj-file +run-ubj-file: build/ubj-file + build/ubj-file $(FILE_OBJECT_SIZES) +build/data.ubjson: run-ubj-file + +# ubj-write + +build/ubj/ubj-write.o: $(common-headers) $(ubj-header) src/ubj/ubj-write.c + mkdir -p build/ubj + $(CC) $(UBJFLAGS) -I $(ubj-dir) -c -o $@ src/ubj/ubj-write.c + +build/ubj-write: build/ubj/ubj-write.o $(ubj-lib) $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-ubj-write +run-ubj-write: build/ubj-write + build/ubj-write $(OBJECT_SIZES) + +# ubj-read + +build/ubj/ubj-read.o: $(common-headers) $(ubj-header) src/ubj/ubj-read.c + mkdir -p build/ubj + $(CC) $(UBJFLAGS) -I $(ubj-dir) -c -o $@ src/ubj/ubj-read.c + +build/ubj-read: build/ubj/ubj-read.o $(ubj-lib) $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-ubj-read +run-ubj-read: build/ubj-read data-ubjson + build/ubj-read $(OBJECT_SIZES) + +# ubj-opt-write + +build/ubj/ubj-opt-write.o: $(common-headers) $(ubj-header) src/ubj/ubj-write.c + mkdir -p build/ubj + $(CC) $(UBJFLAGS) -DBENCHMARK_UBJ_OPTIMIZED=1 -I $(ubj-dir) -c -o $@ src/ubj/ubj-write.c + +build/ubj-opt-write: build/ubj/ubj-opt-write.o $(ubj-lib) $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-ubj-opt-write +run-ubj-opt-write: build/ubj-opt-write + build/ubj-opt-write $(OBJECT_SIZES) + +# ubj-opt-read + +build/ubj/ubj-opt-read.o: $(common-headers) $(ubj-header) src/ubj/ubj-read.c + mkdir -p build/ubj + $(CC) $(UBJFLAGS) -DBENCHMARK_UBJ_OPTIMIZED=1 -I $(ubj-dir) -c -o $@ src/ubj/ubj-read.c + +build/ubj-opt-read: build/ubj/ubj-opt-read.o $(ubj-lib) $(common-objs) + $(CC) $(LDFLAGS) -o $@ $^ + +.PHONY: run-ubj-opt-read +run-ubj-opt-read: build/ubj-opt-read data-ubjson + build/ubj-opt-read $(OBJECT_SIZES) + + + +# udp/json-parser and udp/json-builder +# +# this parser and builder are complementary; they are meant to be used +# together, and the builder requires the parser. +# +# the parser has two releases but the builder has none. instead, like +# CMP we'll give them the benefit of the doubt and just checkout the +# current source for both (there doesn't appear to be performance-related +# changes since json-parser's last release anyway.) +# +# these provide configure scripts to build libraries, but they are single +# .c files and they recommend just dropping the files into your project, +# so we'll ignore their build systems and just compile them ourselves. + +.PHONY: fetch-udp-json +fetch-udp-json: fetch-json-parser fetch-json-builder + +.PHONY: build-udp-json +build-udp-json: build-json-parser build-json-builder + +.PHONY: run-udp-json +run-udp-json: run-json-parser run-json-builder + +# json-parser + +json-parser-version := 7053321 +json-parser-revision := 70533215eea575e40a0b91a34ae01a779641d73a +json-parser-url := https://github.com/udp/json-parser/archive/$(json-parser-revision).tar.gz +json-parser-dir := contrib/udp-json/json-parser-$(json-parser-revision) +json-parser-header := $(json-parser-dir)/json.h + +.PHONY: fetch-json-parser +fetch-json-parser: $(json-parser-header) +$(json-parser-header): + mkdir -p contrib/udp-json + cd contrib/udp-json ;\ + curl -LO $(json-parser-url) ;\ + tar -xzf $(json-parser-revision).tar.gz + +.PHONY: build-json-parser +build-json-parser: build/json-parser + +build/udp-json/json-lib.o: $(json-parser-header) + mkdir -p build/udp-json + $(CC) $(CFLAGS) -I $(json-parser-dir) -c -o $@ $(json-parser-dir)/json.c + +build/udp-json/json-parser-test.o: $(common-headers) $(json-parser-header) src/udp-json/json-parser.c + mkdir -p build/udp-json + $(CC) $(CFLAGS) \ + -DBENCHMARK_JSON_PARSER_VERSION='"'$(json-parser-version)'"' \ + -I $(json-parser-dir) -c -o $@ src/udp-json/json-parser.c + +build/json-parser: build/udp-json/json-lib.o build/udp-json/json-parser-test.o $(common-objs) + $(CC) -lm $(LDFLAGS) -o $@ $^ + +.PHONY: run-json-parser +run-json-parser: build/json-parser data-json + build/json-parser $(OBJECT_SIZES) + +# json-builder + +json-builder-version := 19c739f +json-builder-revision := 19c739f64d1da157789c35a06911b865486e6c2e +json-builder-url := https://github.com/udp/json-builder/archive/$(json-builder-revision).tar.gz +json-builder-dir := contrib/udp-json/json-builder-$(json-builder-revision) +json-builder-header := $(json-builder-dir)/json-builder.h + +.PHONY: fetch-json-builder +fetch-json-builder: $(json-builder-header) $(json-parser-header) +$(json-builder-header): + mkdir -p contrib/udp-json + cd contrib/udp-json ;\ + curl -LO $(json-builder-url) ;\ + tar -xzf $(json-builder-revision).tar.gz + +.PHONY: build-json-builder +build-json-builder: build/json-builder + +build/udp-json/json-builder-lib.o: $(json-parser-header) $(json-builder-header) + mkdir -p build/udp-json + $(CC) $(CFLAGS) -I $(json-parser-dir) -I $(json-builder-dir) -c -o $@ $(json-builder-dir)/json-builder.c + +build/udp-json/json-builder-test.o: $(common-headers) $(json-parser-header) $(json-builder-header) src/udp-json/json-builder.c + mkdir -p build/udp-json + $(CC) $(CFLAGS) \ + -DBENCHMARK_JSON_BUILDER_VERSION='"'$(json-builder-version)'"' \ + -I $(json-parser-dir) -I $(json-builder-dir) -c -o $@ src/udp-json/json-builder.c + +build/json-builder: build/udp-json/json-lib.o build/udp-json/json-builder-lib.o build/udp-json/json-builder-test.o $(common-objs) + $(CC) -lm $(LDFLAGS) -o $@ $^ + +.PHONY: run-json-builder +run-json-builder: build/json-builder + build/json-builder $(OBJECT_SIZES) + + + +# mongo-cxx + +# the bson website recommends using just the bson portion of the +# MongoDB C++ Legacy Driver as the primary bson implementation +# for C++. +mongo-cxx-version := 1.1.0 +mongo-cxx-url := https://github.com/mongodb/mongo-cxx-driver/archive/legacy-$(mongo-cxx-version).tar.gz +mongo-cxx-dir := contrib/mongo-cxx/mongo-cxx-driver-legacy-$(mongo-cxx-version) +mongo-cxx-lib := $(mongo-cxx-dir)/build/install/lib/libmongoclient.a +mongo-cxx-header := $(mongo-cxx-dir)/src/mongo/bson/bson.h + +.PHONY: fetch-mongo-cxx +fetch-mongo-cxx: $(mongo-cxx-header) +$(mongo-cxx-header): + mkdir -p contrib/mongo-cxx + cd contrib/mongo-cxx ;\ + curl -LO $(mongo-cxx-url) ;\ + tar -xzf legacy-$(mongo-cxx-version).tar.gz + +MONGOFLAGS = -Wno-deprecated-declarations + +$(mongo-cxx-lib): $(mongo-cxx-header) + # scons doesn't listen to environment flags, so we pass flags + # as part of CC and CXX. the buildsystem puts the entire set + # of optimizations into the build path but it still works. + cd $(mongo-cxx-dir); scons --disable-warnings-as-errors --cc="$(CC) $(CFLAGS) $(MONGOFLAGS)" --cxx="$(CXX) $(CXXFLAGS) $(MONGOFLAGS)" install + +.PHONY: run-mongo-cxx +run-mongo-cxx: run-mongo-cxx-obj run-mongo-cxx-builder + +.PHONY: build-mongo-cxx +build-mongo-cxx: $(mongo-cxx-lib) build/mongo-cxx-obj build/mongo-cxx-builder + +# mongo-cxx-builder + +build/mongo-cxx/mongo-cxx-builder.o: $(common-headers) $(mongo-cxx-lib) src/mongo-cxx/mongo-cxx-builder.cpp + mkdir -p build/mongo-cxx + $(CXX) $(CXXFLAGS) $(MONGOFLAGS) -I $(mongo-cxx-dir) -I $(mongo-cxx-dir)/build/install/include -c -o $@ src/mongo-cxx/mongo-cxx-builder.cpp + +build/mongo-cxx-builder: build/mongo-cxx/mongo-cxx-builder.o $(common-objs) $(mongo-cxx-lib) + $(CXX) $(LDFLAGS) $(MONGOFLAGS) -lboost_system -lboost_thread -o $@ $^ + +.PHONY: run-mongo-cxx-builder +run-mongo-cxx-builder: build/mongo-cxx-builder + build/mongo-cxx-builder $(OBJECT_SIZES) + +# mongo-cxx-obj + +build/mongo-cxx/mongo-cxx-obj.o: $(common-headers) $(mongo-cxx-lib) src/mongo-cxx/mongo-cxx-obj.cpp + mkdir -p build/mongo-cxx + $(CXX) $(CXXFLAGS) $(MONGOFLAGS) -I $(mongo-cxx-dir) -I $(mongo-cxx-dir)/build/install/include -c -o $@ src/mongo-cxx/mongo-cxx-obj.cpp + +build/mongo-cxx-obj: build/mongo-cxx/mongo-cxx-obj.o $(common-objs) $(mongo-cxx-lib) + $(CXX) $(LDFLAGS) $(MONGOFLAGS) -lboost_system -lboost_thread -o $@ $^ + +.PHONY: run-mongo-cxx-obj +run-mongo-cxx-obj: build/mongo-cxx-obj data-bson + build/mongo-cxx-obj $(OBJECT_SIZES) + diff --git a/src/binn/binn-file.c b/src/binn/binn-file.c new file mode 100644 index 0000000..7f228ef --- /dev/null +++ b/src/binn/binn-file.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "binn.h" + +// the type terminology can be a bit confusing here because what +// the data calls "map" is actually an "object" in binn since +// it has string keys. a binn "map" has integer keys (and is not +// used in this benchmark.) + +// the "natural" way to use binn (and probably the fastest) is +// to write keys and typed values together, which is why the value +// switch is implemented twice: one to append values to a list, and +// one to set values along with their key on an object. + +// binn is in C89 and does not use const, so we have to cast it +// away. binn also uses null-terminated strings throughout. +#define CONST_CAST(s) ((char*)(s)) + +static bool write_list(binn* parent, object_t* object); +static bool write_object(binn* parent, object_t* object); + +static bool write_list(binn* parent, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + object_t* value = object->children + i; + switch (value->type) { + case type_nil: if (!binn_list_add_null(parent)) return false; break; + case type_bool: if (!binn_list_add_bool(parent, (BOOL)value->b)) return false; break; + case type_double: if (!binn_list_add_double(parent, value->d)) return false; break; + case type_int: if (!binn_list_add_int64(parent, value->i)) return false; break; + case type_uint: if (!binn_list_add_uint64(parent, value->u)) return false; break; + case type_str: if (!binn_list_add_str(parent, CONST_CAST(value->str))) return false; break; + + case type_array: { + binn child; + binn_create_list(&child); + bool ok = write_list(&child, value); + if (ok) + ok = binn_list_add_object(parent, &child); + binn_free(&child); + if (!ok) + return false; + } break; + + case type_map: { + binn child; + binn_create_object(&child); + bool ok = write_object(&child, value); + if (ok) + ok = binn_list_add_object(parent, &child); + binn_free(&child); + if (!ok) + return false; + } break; + + default: + return false; + } + } + return true; +} + +static bool write_object(binn* parent, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + char* key = CONST_CAST(object->children[i * 2].str); + object_t* value = object->children + i * 2 + 1; + switch (value->type) { + case type_nil: if (!binn_object_set_null(parent, key)) return false; break; + case type_bool: if (!binn_object_set_bool(parent, key, (BOOL)value->b)) return false; break; + case type_double: if (!binn_object_set_double(parent, key, value->d)) return false; break; + case type_int: if (!binn_object_set_int64(parent, key, value->i)) return false; break; + case type_uint: if (!binn_object_set_uint64(parent, key, value->u)) return false; break; + case type_str: if (!binn_object_set_str(parent, key, CONST_CAST(value->str))) return false; break; + + case type_array: { + binn child; + binn_create_list(&child); + bool ok = write_list(&child, value); + if (ok) + ok = binn_object_set_object(parent, key, &child); + binn_free(&child); + if (!ok) + return false; + } break; + + case type_map: { + binn child; + binn_create_object(&child); + bool ok = write_object(&child, value); + if (ok) + ok = binn_object_set_object(parent, key, &child); + binn_free(&child); + if (!ok) + return false; + } break; + + default: + return false; + } + } + return true; +} + +bool setup_test(size_t object_size) { + object_t* object = benchmark_object_create(object_size); + binn root; + bool ok; + if (object->type == type_map) { + binn_create_object(&root); + ok = write_object(&root, object); + } else { + binn_create_list(&root); + ok = write_list(&root, object); + } + object_destroy(object); + + if (!ok) { + fprintf(stderr, "binn error writing data!\n"); + binn_free(&root); + return false; + } + + char filename[64]; + benchmark_filename(filename, sizeof(filename), object_size, BENCHMARK_FORMAT_BINN, NULL); + FILE* file = fopen(filename, "wb"); + fwrite(binn_ptr(&root), binn_size(&root), 1, file); + fclose(file); + + // like libbson, we call binn_free() regardless of whether + // we placed it on the stack or the heap; it stores a flag + // to say whether it should be freed. this apparently doesn't + // cause warnings since GCC chooses not to inline binn_free(). + binn_free(&root); + return true; +} + +bool run_test(uint32_t* hash_out) { + return false; +} + +void teardown_test(void) { +} + +bool is_benchmark(void) { + return false; +} + +const char* test_version(void) { + return BENCHMARK_BINN_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "Binn"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/binn/binn-load.c b/src/binn/binn-load.c new file mode 100644 index 0000000..c19ebef --- /dev/null +++ b/src/binn/binn-load.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "binn.h" + +// the type terminology can be a bit confusing here because what +// the data calls "map" is actually an "object" in binn since +// it has string keys. a binn "map" has integer keys (and is not +// used in this benchmark.) + +static char* file_data; +static size_t file_size; + +static bool hash_list(binn* parent, uint32_t* hash); +static bool hash_object(binn* parent, uint32_t* hash); + +static bool hash_value(binn* value, uint32_t* hash) { + + switch (binn_type(value)) { + + case BINN_NULL: *hash = hash_nil(*hash); return true; + case BINN_BOOL: *hash = hash_bool(*hash, value->vbool); return true; + case BINN_DOUBLE: *hash = hash_double(*hash, value->vdouble); return true; + case BINN_STRING: { + // for strings, "size" appears to be zero. we have to scan for the + // null-terminator. technically we could add a hash function for cstr + const char* str = (const char*)value->ptr; + *hash = hash_str(*hash, str, strlen(str)); + return true; + } + + // the binn "type family" functions are all private in the library. + // it seems the only way to get dynamically typed data out of binn + // is to switch on the base type and fetch it from the union. + // all ints are hashed as 64-bit (not all libraries read different + // sized types) + + case BINN_UINT8: *hash = hash_u64(*hash, value->vuint8); return true; + case BINN_UINT16: *hash = hash_u64(*hash, value->vuint16); return true; + case BINN_UINT32: *hash = hash_u64(*hash, value->vuint32); return true; + case BINN_UINT64: *hash = hash_u64(*hash, value->vuint64); return true; + + case BINN_INT8: *hash = hash_i64(*hash, value->vint8); return true; + case BINN_INT16: *hash = hash_i64(*hash, value->vint16); return true; + case BINN_INT32: *hash = hash_i64(*hash, value->vint32); return true; + case BINN_INT64: *hash = hash_i64(*hash, value->vint64); return true; + + case BINN_LIST: return hash_list(value, hash); + case BINN_OBJECT: return hash_object(value, hash); + + default: + break; + } + return false; +} + +static bool hash_list(binn* parent, uint32_t* hash) { + binn_iter iter; + binn child; + + // the foreach helper macros don't actually do any error + // checking, so we do it ourselves. the "next" functions + // return false on both error and end of data, so have to + // track the count ourselves to make sure there was no error. + uint32_t count = binn_count(parent); + uint32_t i = 0; + if (!binn_iter_init(&iter, parent, BINN_LIST)) + return false; + while (binn_list_next(&iter, &child)) { + ++i; + if (!hash_value(&child, hash)) + return false; + } + + if (i != count) + return false; + *hash = hash_u32(*hash, count); + return true; +} + +static bool hash_object(binn* parent, uint32_t* hash) { + binn_iter iter; + char key[256]; // binn keys have a length limit of 255 characters plus null-terminator. + binn child; + + // as above, we have to do our own error checking. + uint32_t count = binn_count(parent); + uint32_t i = 0; + if (!binn_iter_init(&iter, parent, BINN_OBJECT)) + return false; + while (binn_object_next(&iter, key, &child)) { + ++i; + *hash = hash_str(*hash, key, strlen(key)); + if (!hash_value(&child, hash)) + return false; + } + + if (i != count) + return false; + *hash = hash_u32(*hash, count); + return true; +} + +bool run_test(uint32_t* hash) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + // so this is terrifying. we can't actually give binn the size + // of our data. if it's been truncated, binn will just keep on + // reading uninitialized memory. maybe i'm misunderstanding how + // to use this and calling the wrong function? otherwise this + // should be considered a critical security vulnerability. + binn root; + binn_load(data, &root); + + bool ok; + if (binn_type(&root) == BINN_LIST) + ok = hash_list(&root, hash); + else + ok = hash_object(&root, hash); + + benchmark_in_situ_free(data); + return ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_BINN, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BENCHMARK_BINN_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "Binn"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/binn/binn-write.c b/src/binn/binn-write.c new file mode 100644 index 0000000..d7cb308 --- /dev/null +++ b/src/binn/binn-write.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "binn.h" + +// the type terminology can be a bit confusing here because what +// the data calls "map" is actually an "object" in binn since +// it has string keys. a binn "map" has integer keys (and is not +// used in this benchmark.) + +// the "natural" way to use binn (and probably the fastest) is +// to write keys and typed values together, which is why the value +// switch is implemented twice: one to append values to a list, and +// one to set values along with their key on an object. + +// binn is in C89 and does not use const, so we have to cast it +// away. binn also uses null-terminated strings throughout. +#define CONST_CAST(s) ((char*)(s)) + +static object_t* root_object; + +static bool write_list(binn* parent, object_t* object); +static bool write_object(binn* parent, object_t* object); + +static bool write_list(binn* parent, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + object_t* value = object->children + i; + switch (value->type) { + case type_nil: if (!binn_list_add_null(parent)) return false; break; + case type_bool: if (!binn_list_add_bool(parent, (BOOL)value->b)) return false; break; + case type_double: if (!binn_list_add_double(parent, value->d)) return false; break; + case type_int: if (!binn_list_add_int64(parent, value->i)) return false; break; + case type_uint: if (!binn_list_add_uint64(parent, value->u)) return false; break; + case type_str: if (!binn_list_add_str(parent, CONST_CAST(value->str))) return false; break; + + case type_array: { + binn child; + binn_create_list(&child); + bool ok = write_list(&child, value); + if (ok) + ok = binn_list_add_object(parent, &child); + binn_free(&child); + if (!ok) + return false; + } break; + + case type_map: { + binn child; + binn_create_object(&child); + bool ok = write_object(&child, value); + if (ok) + ok = binn_list_add_object(parent, &child); + binn_free(&child); + if (!ok) + return false; + } break; + + default: + return false; + } + } + return true; +} + +static bool write_object(binn* parent, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + char* key = CONST_CAST(object->children[i * 2].str); + object_t* value = object->children + i * 2 + 1; + switch (value->type) { + case type_nil: if (!binn_object_set_null(parent, key)) return false; break; + case type_bool: if (!binn_object_set_bool(parent, key, (BOOL)value->b)) return false; break; + case type_double: if (!binn_object_set_double(parent, key, value->d)) return false; break; + case type_int: if (!binn_object_set_int64(parent, key, value->i)) return false; break; + case type_uint: if (!binn_object_set_uint64(parent, key, value->u)) return false; break; + case type_str: if (!binn_object_set_str(parent, key, CONST_CAST(value->str))) return false; break; + + case type_array: { + binn child; + binn_create_list(&child); + bool ok = write_list(&child, value); + if (ok) + ok = binn_object_set_object(parent, key, &child); + binn_free(&child); + if (!ok) + return false; + } break; + + case type_map: { + binn child; + binn_create_object(&child); + bool ok = write_object(&child, value); + if (ok) + ok = binn_object_set_object(parent, key, &child); + binn_free(&child); + if (!ok) + return false; + } break; + + default: + return false; + } + } + return true; +} + +bool run_test(uint32_t* hash_out) { + binn root; + bool ok; + if (root_object->type == type_map) { + binn_create_object(&root); + ok = write_object(&root, root_object); + } else { + binn_create_list(&root); + ok = write_list(&root, root_object); + } + + if (!ok) { + fprintf(stderr, "binn error writing data!\n"); + binn_free(&root); + return false; + } + + *hash_out = hash_str(*hash_out, binn_ptr(&root), binn_size(&root)); + + // like libbson, we call binn_free() regardless of whether + // we placed it on the stack or the heap; it stores a flag + // to say whether it should be freed. this apparently doesn't + // cause warnings since GCC chooses not to inline binn_free(). + binn_free(&root); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BENCHMARK_BINN_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "Binn"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/cmp/cmp-read.c b/src/cmp/cmp-read.c new file mode 100644 index 0000000..400c2e2 --- /dev/null +++ b/src/cmp/cmp-read.c @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "cmp.h" + +static char* file_data; +static size_t file_size; + + + +// cmp doesn't have built-in support for reading from a memory +// buffer, so we implement one here. this means that cmp needs +// to memcpy all data before reading it! + +typedef struct buffer_t { + const char* data; + size_t left; +} buffer_t; + +static bool buffer_cmp_reader(cmp_ctx_t* ctx, void* data, size_t count) { + buffer_t* buffer = (buffer_t*)ctx->buf; + if (count > buffer->left) + return false; + memcpy(data, buffer->data, count); + buffer->data += count; + buffer->left -= count; + return true; +} + + + +static bool hash_element(cmp_ctx_t* cmp, uint32_t* hash) { + buffer_t* buffer = (buffer_t*)cmp->buf; + + cmp_object_t object; + if (!cmp_read_object(cmp, &object)) + return false; + + // note: we fetch values out of the cmp_object_t directly rather + // than going through the cmp_object_is/as* functions. it's much + // faster this way. + + switch (object.type) { + case CMP_TYPE_NIL: *hash = hash_nil(*hash); return true; + case CMP_TYPE_BOOLEAN: *hash = hash_bool(*hash, object.as.boolean); return true; + case CMP_TYPE_DOUBLE: *hash = hash_double(*hash, object.as.dbl); return true; + + // note: all ints are hashed as 64-bit (not all libraries read different sized types) + + case CMP_TYPE_POSITIVE_FIXNUM: *hash = hash_u64(*hash, object.as.u8); return true; + case CMP_TYPE_UINT8: *hash = hash_u64(*hash, object.as.u8); return true; + case CMP_TYPE_UINT16: *hash = hash_u64(*hash, object.as.u16); return true; + case CMP_TYPE_UINT32: *hash = hash_u64(*hash, object.as.u32); return true; + case CMP_TYPE_UINT64: *hash = hash_u64(*hash, object.as.u64); return true; + + case CMP_TYPE_NEGATIVE_FIXNUM: *hash = hash_i64(*hash, object.as.s8); return true; + case CMP_TYPE_SINT8: *hash = hash_i64(*hash, object.as.s8); return true; + case CMP_TYPE_SINT16: *hash = hash_i64(*hash, object.as.s16); return true; + case CMP_TYPE_SINT32: *hash = hash_i64(*hash, object.as.s32); return true; + case CMP_TYPE_SINT64: *hash = hash_i64(*hash, object.as.s64); return true; + + case CMP_TYPE_FIXSTR: + case CMP_TYPE_STR8: + case CMP_TYPE_STR16: + case CMP_TYPE_STR32: + { + uint32_t len = object.as.str_size; + if (buffer->left < len) + return false; + *hash = hash_str(*hash, buffer->data, len); + buffer->data += len; + buffer->left -= len; + return true; + } + + case CMP_TYPE_FIXARRAY: + case CMP_TYPE_ARRAY16: + case CMP_TYPE_ARRAY32: + for (size_t i = 0; i < object.as.array_size; ++i){ + if (!hash_element(cmp, hash)) + return false; + } + *hash = hash_u32(*hash, object.as.array_size); + return true; + + case CMP_TYPE_FIXMAP: + case CMP_TYPE_MAP16: + case CMP_TYPE_MAP32: + { + for (size_t i = 0; i < object.as.map_size; ++i) { + + // we expect keys to be short strings + char buf[16]; + uint32_t size = sizeof(buf); + if (!cmp_read_str(cmp, buf, &size)) + return false; + *hash = hash_str(*hash, buf, size); + + if (!hash_element(cmp, hash)) + return false; + } + *hash = hash_u32(*hash, object.as.map_size); + return true; + } + + default: + break; + } + + return false; +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + buffer_t buffer; + buffer.data = data; + buffer.left = file_size; + + cmp_ctx_t cmp; + cmp_init(&cmp, &buffer, buffer_cmp_reader, NULL); + + bool ok = hash_element(&cmp, hash_out); + benchmark_in_situ_free(data); + return ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_MESSAGEPACK, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + static char buf[16]; + snprintf(buf, sizeof(buf), "v%u", cmp_version()); + return buf; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/cmp/cmp-write.c b/src/cmp/cmp-write.c new file mode 100644 index 0000000..4b10449 --- /dev/null +++ b/src/cmp/cmp-write.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "cmp.h" +#include "buffer.h" + +static object_t* root_object; + +// cmp doesn't have built-in support for writing to a growable buffer. +static size_t buffer_cmp_writer(cmp_ctx_t* ctx, const void* data, size_t count) { + return buffer_write((buffer_t*)ctx->buf, (const char*)data, count) ? count : 0; +} + +static bool write_object(cmp_ctx_t* cmp, object_t* object) { + switch (object->type) { + case type_bool: return cmp_write_bool(cmp, object->b); + case type_nil: return cmp_write_nil(cmp); + case type_int: return cmp_write_sint(cmp, object->i); + case type_uint: return cmp_write_uint(cmp, object->u); + case type_double: return cmp_write_double(cmp, object->d); + case type_str: return cmp_write_str(cmp, object->str, object->l); + + case type_array: + if (!cmp_write_array(cmp, object->l)) + return false; + for (size_t i = 0; i < object->l; ++i) + if (!write_object(cmp, object->children + i)) + return false; + return true; + + case type_map: + if (!cmp_write_map(cmp, object->l)) + return false; + for (size_t i = 0; i < object->l; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + i * 2; + assert(key->type == type_str); + if (!cmp_write_str(cmp, key->str, key->l)) + return false; + + if (!write_object(cmp, object->children + i * 2 + 1)) + return false; + } + return true; + + default: + assert(0); + break; + } + return false; +} + +bool run_test(uint32_t* hash_out) { + buffer_t buffer; + buffer_init(&buffer); + cmp_ctx_t cmp; + cmp_init(&cmp, &buffer, NULL, buffer_cmp_writer); + + if (!write_object(&cmp, root_object)) { + buffer_destroy(&buffer); + return false; + } + + *hash_out = hash_str(*hash_out, buffer.data, buffer.count); + buffer_destroy(&buffer); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + static char buf[16]; + snprintf(buf, sizeof(buf), "v%u", cmp_version()); + return buf; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/common/benchmark.c b/src/common/benchmark.c new file mode 100644 index 0000000..00b3a1c --- /dev/null +++ b/src/common/benchmark.c @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +// This is the benchmarking framework for C/C++ serialization libraries. + +#include "benchmark.h" + +#include + +// with the below seed: +// size 2: 2556 bytes MessagePack, 3349 bytes JSON +// size 4: 187600 bytes MessagePack, 232342 bytes JSON +#define BENCHMARK_OBJECT_SEED 12345678 + +#define FRAGMENT_MEMORY 1 + +#define WORK_TIME 10.0 // work for this many seconds +#define WARM_TIME (WORK_TIME / 4.0) // warm up for this many seconds + +static double dtime(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec / (1000.0 * 1000.0 * 1000.0); +} + +// We wrap run function here to ensure it cannot be considered for inlining. +// This is to help prevent the compiler from optimizing away parts of the test. +__attribute__((noinline)) static bool run_wrapper(uint32_t* hash_out) { + __asm__ (""); // recommended in gcc documentation of noinline + return run_test(hash_out); +} + +object_t* benchmark_object_create(size_t object_size) { + return object_create(BENCHMARK_OBJECT_SEED, object_size); +} + +void benchmark_filename(char* buf, size_t size, size_t object_size, const char* format, const char* config) { + snprintf(buf, size, "build/data%s-%i.%s", config ? config : "", (int)object_size, format); +} + +#if FRAGMENT_MEMORY +#define MEMORY_COUNT 65536 +static void* memory[MEMORY_COUNT]; +#endif + +static void fragment_memory(void) { + #if FRAGMENT_MEMORY + // Pre-fragment memory. We allocate a bunch of random-sized blobs, + // shuffle them and free half of them. This creates a more realistic + // memory layout, testing how well the library deals with real-world + // memory usage rather than a nice flat empty malloc(). + // With this random seed, it peaks at about 22 megs before freeing half. + random_t random; + random_seed(&random, 34986); + for (int i = 0; i < MEMORY_COUNT; ++i) { + size_t bytes = (1 << (random_next(&random) % 12)) + random_next(&random) % 8; + memory[i] = malloc(bytes); + } + for (int i = 0; i < MEMORY_COUNT; ++i) { + int j = random_next(&random) % (MEMORY_COUNT - i) + i; + void* l = memory[i]; + memory[i] = memory[j]; + memory[j] = l; + } + for (int i = 0; i < MEMORY_COUNT / 2; ++i) + free(memory[i]); + #endif +} + +static void free_fragmented_memory(void) { + #if FRAGMENT_MEMORY + for (int i = MEMORY_COUNT / 2; i < MEMORY_COUNT; ++i) + free(memory[i]); + #endif +} + +static bool go(bool result_only, size_t object_size, size_t binary_size, const char* name) { + + // setup + if (!result_only) { + printf("%s: ================\n", name); + printf("%s: setting up size %i\n", name, (int)object_size); + } + if (!setup_test(object_size)) { + fprintf(stderr, "%s: failed to get setup result.\n", name); + return false; + } + + // if this isn't a benchmark (the file creators), nothing left to do + if (!is_benchmark()) { + teardown_test(); + if (!result_only) + printf("%s: done\n", name); + return true; + } + + // figure out a reasonable number of iterations between checking the time + int iterations; + #ifdef __arm__ + iterations = 1; + #else + iterations = 32; + #endif + for (size_t i = 5; i > object_size; --i) + iterations <<= 3; + + uint32_t hash_result; + + // warm up + if (!result_only) + printf("%s: warming for %.0f seconds \n", name, WARM_TIME); + double start_time = dtime(); + while (true) { + for (int i = 0; i < iterations; ++i) { + hash_result = 0; + if (!run_wrapper(&hash_result)) { + fprintf(stderr, "%s: failed to get benchmark result.\n", name); + return false; + } + } + if (dtime() - start_time > WARM_TIME) + break; + } + + // run tests + if (!result_only) + printf("%s: running for %.0f seconds\n", name, WORK_TIME); + int total_iterations = 0; + start_time = dtime(); + double end_time; + while (true) { + for (int i = 0; i < iterations; ++i) { + hash_result = HASH_INITIAL_VALUE; + if (!run_wrapper(&hash_result)) { + fprintf(stderr, "%s: failed to get benchmark result.\n", name); + return false; + } + ++total_iterations; + } + end_time = dtime(); + if (end_time - start_time > WORK_TIME) + break; + } + + // print results + double per_time = (end_time - start_time) / (double)total_iterations * (1000.0 * 1000.0); + if (result_only) { + printf("%f\n", per_time); + } else { + printf("%s: %i iterations took %f seconds\n", name, total_iterations, end_time - start_time); + printf("%s: %f microseconds per iteration\n", name, per_time); + printf("%s: hash result of last run: %08x\n", name, hash_result); + } + + // write score + if (!result_only) { + FILE* file = fopen("results.csv", "a"); + fprintf(file, "\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",%i,%f,%i,%i,\"%08x\"\n", + name, test_language(), test_version(), test_filename(), test_format(), + (int)object_size, per_time, (int)binary_size, + #if BENCHMARK_SIZE_OPTIMIZED + 1, + #else + 0, + #endif + hash_result); + fclose(file); + } + + teardown_test(); + + return true; +} + +int main(int argc, char **argv) { + const char* name = argv[0]; + ++argv; + --argc; + + // argument "-r" will print only the per-iteration time result of the test + bool result_only = false; + if (argc >= 1 && strcmp(argv[0], "-r") == 0) { + result_only = true; + ++argv; + --argc; + } + + // need sizes + if (argc == 0) { + fprintf(stderr, "%s: object sizes in the range [1,5] must be " + "provided as command-line arguments\n", name); + return EXIT_FAILURE; + } + + // generate a throwaway object. we do this so that the generator code + // is included in hash readers (it is necessary for hash-object, so we + // force all benchmarks to include it so we can subtract the + // size correctly) + object_destroy(benchmark_object_create(1)); + + // get executable details + struct stat st; + stat(name, &st); + size_t binary_size = st.st_size; + static const char* build = "build/"; + if (strlen(name) > strlen(build) && memcmp(name, build, strlen(build)) == 0) + name += strlen(build); + if (!result_only) + printf("%s: executable size: %i bytes\n", name, (int)binary_size); + + // run different benchmark sizes + fragment_memory(); + for (; argc > 0; --argc, ++argv) { + + size_t object_size = atoi(argv[0]); + if (object_size < 1 || object_size > 5) { + fprintf(stderr, "%s: object size must be in the range [1,5]\n", name); + return EXIT_FAILURE; + } + + if (!go(result_only, object_size, (int)st.st_size, name)) + return EXIT_FAILURE; + } + free_fragmented_memory(); + return EXIT_SUCCESS; +} + +static char* load_file(const char* filename, size_t* size_out) { + FILE* file = fopen(filename, "rb"); + if (!file) { + fprintf(stderr, "missing file!\n"); + return NULL; + } + + fseek(file, 0, SEEK_END); + long size = ftell(file); + fseek(file, 0, SEEK_SET); + char* data = (char*)malloc(size); + + if (size != fread(data, 1, size, file)) { + fprintf(stderr, "error reading file!\n"); + return NULL; + } + fclose(file); + + *size_out = size; + return data; +} + +char* load_data_file(const char* format, size_t object_size, size_t* size_out) { + return load_data_file_ex(format, object_size, size_out, NULL); +} + +char* load_data_file_ex(const char* format, size_t object_size, size_t* size_out, const char* config) { + char filename[64]; + benchmark_filename(filename, sizeof(filename), object_size, format, config); + return load_file(filename, size_out); +} + diff --git a/src/common/benchmark.h b/src/common/benchmark.h new file mode 100644 index 0000000..32cebaa --- /dev/null +++ b/src/common/benchmark.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +// This is the benchmarking framework for C/C++ serialization libraries. + +#ifndef BENCHMARK_H +#define BENCHMARK_H 1 + +#include "platform.h" +#include "generator.h" +#include "hash.h" + +#define BENCHMARK_FORMAT_MESSAGEPACK "mp" +#define BENCHMARK_FORMAT_JSON "json" +#define BENCHMARK_FORMAT_UBJSON "ubjson" +#define BENCHMARK_FORMAT_BSON "bson" +#define BENCHMARK_FORMAT_BINN "binn" + +#define BENCHMARK_LANGUAGE_C "C" +#define BENCHMARK_LANGUAGE_CXX "C++" + +#define BENCHMARK_NODE_MAX (32*4096) /* upper bound */ +#define BENCHMARK_VERSION 0.1 + +#define BENCHMARK_STRINGIFY2(x) #x +#define BENCHMARK_STRINGIFY(x) BENCHMARK_STRINGIFY2(x) +#define BENCHMARK_VERSION_STR BENCHMARK_STRINGIFY(BENCHMARK_VERSION) + +// when this is enabled, all read/dom tests make a full copy of +// the data on each iteration whether or not they modify it. this +// allows correctly comparing against in-situ parsers. +// the actual in-situ parsing is enabled by BENCHMARK_IN_SITU in the Makefile. +#define BENCHMARK_MAKE_IN_SITU_COPIES 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * These functions should be implemented by the test. + * run_test() is run repeatedly until a time limit has been reached. + * + * A hash of the resulting data must be output to ensure all data was + * read correctly and to prevent the compiler from optimizing away + * parts of the test. + */ +bool is_benchmark(); +bool run_test(uint32_t* hash_out); +bool setup_test(size_t object_size); +void teardown_test(void); + +const char* test_language(void); +const char* test_version(void); +const char* test_format(void); +const char* test_filename(void); + +// Loads a data file. Should be freed with free(). +char* load_data_file(const char* format, size_t object_size, size_t* size_out); + +// Loads a special data file. Should be freed with free(). +char* load_data_file_ex(const char* format, size_t object_size, size_t* size_out, const char* config); + +// Generates a random object for benchmarking. +object_t* benchmark_object_create(size_t object_size); + +// Generates the filename for a data file +void benchmark_filename(char* buf, size_t size, size_t object_size, const char* format, const char* config); + +// Copies a data buffer if in-situ parsing is enabled. All +// parsing tests must call this on every iteration. +static inline char* benchmark_in_situ_copy(char* source, size_t size) { + #if BENCHMARK_MAKE_IN_SITU_COPIES + char* data = (char*)malloc(size + 1); + if (!data) + return NULL; + memcpy(data, source, size); + // some APIs (e.g. yajl, or RapidJSON with in-situ mode) require the + // entire document to be null-terminated + data[size] = '\0'; + return data; + #else + return source; + #endif +} + +// Frees a data buffer if in-situ parsing is enabled +static inline void benchmark_in_situ_free(char* data) { + #if BENCHMARK_MAKE_IN_SITU_COPIES + free(data); + #endif +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/common/buffer.h b/src/common/buffer.h new file mode 100644 index 0000000..0a02f09 --- /dev/null +++ b/src/common/buffer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef BENCHMARK_BUFFER_H +#define BENCHMARK_BUFFER_H 1 + +// some libraries don't have support for writing to a growable +// buffer (e.g. cmp or ubj) so we implement one here. + +typedef struct buffer_t { + char* data; + size_t count; + size_t capacity; +} buffer_t; + +static void buffer_init(buffer_t* buffer) { + buffer->count = 0; + buffer->capacity = 4096; + buffer->data = (char*)malloc(buffer->capacity); +} + +static void buffer_destroy(buffer_t* buffer) { + free(buffer->data); +} + +static bool buffer_write(buffer_t* buffer, const char* data, size_t count) { + if (buffer->count + count > buffer->capacity) { + size_t new_capacity = buffer->capacity * 2; + while (buffer->count + count > new_capacity) + new_capacity *= 2; + char* new_data = (char*)realloc(buffer->data, new_capacity); + if (!new_data) + return false; + buffer->data = new_data; + buffer->capacity = new_capacity; + } + memcpy(buffer->data + buffer->count, data, count); + buffer->count += count; + return true; +} + +#endif diff --git a/src/common/generator.c b/src/common/generator.c new file mode 100644 index 0000000..dec46a0 --- /dev/null +++ b/src/common/generator.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "generator.h" + + + +uint32_t random_next(random_t* random) { + static const uint64_t a = UINT64_C(3636507990); + uint64_t r = (a * random->v) + random->c; + random->v = (uint32_t)((0xffffffff - 1 - r) & 0xffffffff); + random->c = (uint32_t)(r >> 32); + return random->v; +} + +void random_seed(random_t* random, uint64_t val) { + // we force a few bits just to make sure these values are not 0 or UINT_MAX-1 + random->v = (val & 0xfffffffe) | 0x0000100; + random->c = ((val >> 32) & 0xfffeffff) | 0x0100000; +} + +// generates a random number with (approximately) inverse distribution +// up to (approximately) the given max, so we get lots of small numbers +// and a few big ones +uint32_t random_inverse(random_t* random, uint32_t max) { + if (max == 0) + return max; + if (max == 1) + return random_next(random) & 1; + + uint32_t bits = 1; + uint32_t counter = max; + while ((counter >>= 1)) + ++bits; + + uint32_t div = (random_next(random) % (1 << (bits * 2 / 3))) + 1; + uint32_t ret = random_next(random) % (max / div); + return ret + (random_next(random) % (1 << (bits / 3))); +} + + + +// Some random object generation functions + +// generates short lowercase ascii keys, realistic for real-world data +// (null-terminated for certain APIs that may require it) +static char* random_key(random_t* random) { + uint32_t length = random_next(random) % 9 + 2; + char* str = (char*)malloc(length + 1); + for (int i = 0; i < length; ++i) + str[i] = 'a' + (random_next(random) % ('z' - 'a' + 1)); + str[length] = '\0'; + return str; +} + +// generates a random string of the given length (null-terminated for +// certain APIs that may require it) +static char* random_string(random_t* random, uint32_t length) { + char* str = (char*)malloc(length + 1); + + // we'll assume most non-key strings don't have non-ascii characters + // (all key strings are generated as lowercase ascii letters) + bool ascii = random_next(random) % 4 != 0; + + // a string might have either lots of spaces (words) or + // no spaces (miscellaneous small data, urls, etc.) + bool spaces = length > 50 || (random_next(random) % 4) != 0; + int next_space = (random_next(random) % 8) + 2; + + for (int i = 0; i < length; ++i) { + + // lots of spaces + if (spaces && next_space-- == 0) { + next_space = (random_next(random) % 8) + 2; + str[i] = ' '; + continue; + } + + // rarely, generate a character that might need to be escaped + if (random_next(random) % 128 == 0) { + char specials[] = {'\n', '"', '\\'}; + str[i] = specials[random_next(random) % sizeof(specials)]; + continue; + } + + // generate a utf-8 non-ascii character (for now a + // character from Latin-1 supplement) + if (!ascii && (length - i) >= 2 && random_next(random) % 4 == 0) { + uint32_t codepoint = 0xA1 + random_next(random) % 0x5F; + str[i++] = (char)(0xC0 | ((codepoint >> 6) & 0x1F)); + str[i] = (char)(0x80 | (codepoint & 0x3F)); + continue; + } + + // sometimes give us any ascii character (this will add a few + // more quotes and backslashes) + if (random_next(random) % 32 == 0) { + str[i] = 33 + random_next(random) % 94; + continue; + } + + // sometimes give us a capital letter, but usually lowercase. + str[i] = 'a' + (random_next(random) % ('z' - 'a' + 1)); + if (random_next(random) % 32 == 0) + str[i] -= 'a' - 'A'; + } + + str[length] = '\0'; + return str; +} + +static type_t random_type(random_t* random, int size, int depth, uint32_t* length) { + *length = 0; + + // the odds of a map or array are proportional to the depth. at + // the base depth it's always one or the other. + int odds = 2; + int d = depth; + while (d-- > 0) + odds <<= 1; + if (depth < 31 && random_next(random) % odds <= 2) { + type_t type = (random_next(random) & 1) ? type_map : type_array; + + // generate a random length close to the size + int len = 3; + while (size-- > depth) + len *= 2; + len += random_next(random) % len; + if (type == type_map) + len /= 2; + *length = len; + return type; + } + + // reals are probably pretty rare + if (random_next(random) % 64 == 0) + return type_double; + + // the rest we distribute with a simple switch + switch (random_next(random) % 8) { + + case 0: + return type_nil; + case 1: + return type_bool; + case 3: + return type_uint; + case 4: + case 5: + case 6: + return type_int; + case 7: + // we get plenty of short strings as map keys. we'll + // take 1/8 map/array values as potentially long strings + break; + + default: + break; + } + + *length = random_inverse(random, 1000); + return type_str; +} + +// expand the given size to preserve object alignment +static size_t object_align(size_t size) { + const size_t alignment = __alignof__(object_t); + return (size + alignment - 1) & (~(alignment-1)); +} + +static void object_init(object_t* object, random_t* random, int size, int depth, size_t* total_size) { + uint32_t length; + object->type = random_type(random, size, depth, &length); + + switch (object->type) { + case type_bool: + object->b = random_next(random) & 1; + break; + + case type_double: { + // we should probably make it possible to generate nan and infinity here + object->d = (double)((int)(random_next(random) % 2048) - 1024); + // we add lots of mantissa to try to use the full range of doubles + object->d += (double)(random_next(random) % 1024) / 1024.0; + object->d += (double)(random_next(random) % 1024) / (1024.0 * 1024.0); + object->d += (double)(random_next(random) % 1024) / (1024.0 * 1024.0 * 1024.0); + object->d += (double)(random_next(random) % 1024) / (1024.0 * 1024.0 * 1024.0 * 1024.0); + object->d += (double)(random_next(random) % 1024) / (1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0); + break; + } + + // sometimes numbers are huge, and we want to test 64-bit. but + // usually they're very small. + case type_uint: + if (random_inverse(random, 10000) > 5000) { + // note: we don't allow numbers in the range [INT64_MAX, UINT64_MAX) + object->u = ((uint64_t)(random_next(random) & ~(1<<31))) << 32; + object->u |= (uint64_t)random_next(random); + } else { + object->u = random_inverse(random, 0xfffff); + } + break; + case type_int: + if (random_inverse(random, 10000) > 5000) { + uint64_t u = ((uint64_t)random_next(random)) << 32; + u |= (uint64_t)random_next(random); + object->i = (int64_t)u; + } else { + object->i = random_inverse(random, 0xfffff); + object->i *= (random_next(random) & 1) ? -1 : 1; + } + break; + + case type_str: + *total_size += object_align(length + 1); + object->l = length; + object->str = random_string(random, length); + break; + + case type_array: + *total_size += object_align(length * sizeof(object_t)); + object->l = length; + object->children = (object_t*)malloc(length * sizeof(object_t)); + for (int i = 0; i < length; ++i) + object_init(object->children + i, random, size, depth + 1, total_size); + break; + + case type_map: + *total_size += object_align(2 * length * sizeof(object_t)); + object->l = length; + object->children = (object_t*)malloc(2 * length * sizeof(object_t)); + for (int i = 0; i < length; ++i) { + + // we make sure we don't have duplicate keys. some libraries + // actually check for this (e.g. binn.) + bool unique; + char* key = NULL; + do { + free(key); + key = random_key(random); + unique = true; + for (int j = 0; j < i; ++j) { + if (strcmp(key, object->children[j * 2].str) == 0) { + unique = false; + break; + } + } + } while (!unique); + + // maps must have string keys for json compatibility. they are + // realistically always short lowercase ascii text. + object->children[i * 2].type = type_str; + object->children[i * 2].str = key; + object->children[i * 2].l = strlen(object->children[i * 2].str); + *total_size += object_align(object->children[i * 2].l + 1); + + object_init(object->children + i * 2 + 1, random, size, depth + 1, total_size); + } + break; + + default: + break; + } +} + +static void object_teardown(object_t* object) { + if (object->type == type_str) { + free(object->str); + } else if (object->type == type_map) { + for (int i = 0; i < object->l * 2; ++i) + object_teardown(object->children + i); + free(object->children); + } else if (object->type == type_array) { + for (int i = 0; i < object->l; ++i) + object_teardown(object->children + i); + free(object->children); + } +} + +static char* object_copy(object_t* dest, object_t* src, char* pool) { + *dest = *src; + + if (src->type == type_map) { + dest->children = (object_t*)pool; + pool += object_align(sizeof(object_t) * dest->l * 2); + for (size_t i = 0; i < src->l * 2; ++i) + pool = object_copy(dest->children + i, src->children + i, pool); + + } else if (dest->type == type_array) { + dest->children = (object_t*)pool; + pool += object_align(sizeof(object_t) * dest->l); + for (size_t i = 0; i < src->l; ++i) + pool = object_copy(dest->children + i, src->children + i, pool); + + } else if (src->type == type_str) { + dest->str = pool; + memcpy(dest->str, src->str, src->l + 1); + pool += object_align(src->l + 1); + } + + return pool; +} + +object_t* object_create(uint64_t seed, int size) { + random_t random; + random_seed(&random, seed); + + // first we create an object, tracking its total size + object_t* src = (object_t*)malloc(sizeof(object_t)); + size_t total_size = object_align(sizeof(object_t)); + object_init(src, &random, size, 0, &total_size); + + // next we allocate a contiguous chunk of memory and copy + // the object into it + char* pool = (char*)malloc(total_size); + object_t* dest = (object_t*)pool; + pool += object_align(sizeof(object_t)); + object_copy(dest, src, pool); + + object_teardown(src); + free(src); + return dest; + /* + (void)object_teardown; + return src; + */ +} + +void object_destroy(object_t* object) { + // the external object is a flat array of data + free(object); +} + +#if 0 +int main(void) { + random_t random; + random_seed(&random, 4); + +#if 0 + for (int i = 0; i < 500; ++i) { + printf("%4u ", random_inverse(&random, 1000)); + if (i % 10 == 9) + printf("\n"); + } + printf("\n"); +#endif + +#if 0 + for (int i = 0; i < 50; ++i) { + //char* str = random_key(&random); + char* str = random_string(&random, random_inverse(&random, 1000)); + printf("%s\n", str); + free(str); + } +#endif + +#if 0 + for (int i = 0; i < 50; ++i) { + } +#endif +} +#endif diff --git a/src/common/generator.h b/src/common/generator.h new file mode 100644 index 0000000..e0a15f6 --- /dev/null +++ b/src/common/generator.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef BENCHMARK_GENERATOR_H +#define BENCHMARK_GENERATOR_H 1 + +// This file implements a random structured data generator. +// It's used for generating random MessagePack or JSON. + +#include "platform.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +// A simple multiply-with-carry PRNG, implemented here for +// cross-platform consistency. +typedef struct random_t { + uint32_t v; + uint32_t c; +} random_t; + +void random_seed(random_t* random, uint64_t val); +uint32_t random_next(random_t* random); + +// generates a random number with (approximately) inverse distribution +// up to (approximately) the given max, so we get lots of small numbers +// and a few big ones +uint32_t random_inverse(random_t* random, uint32_t max); + + +typedef enum type_t { + type_nil = 1, + type_bool, + type_double, + type_int, + type_uint, + type_str, + type_array, + type_map +} type_t; + +typedef struct object_t { + type_t type; + uint32_t l; // length of str, element count of array, key/value pair count of map + union { + bool b; + double d; + int64_t i; + uint64_t u; + struct object_t* children; + char* str; // null-terminated, but l is also the non-terminated length + }; +} object_t; + +// Generates a random object with the given arbitrary "size". This should +// somewhat represent "real-world" data. +// +// The resulting object is stored depth-first in one contiguous chunk of +// memory to minimize impact on benchmarks. +// +// (Storing the data contiguously has essentially no effect on fast modern +// computers, but on very low-end devices like a Raspberry Pi, it is +// necessary to properly subtract out the hashing time. Without it, the +// mpack-read test actually beats the hash-object test on RPi! Parsing and +// hashing contiguous encoded data is actually faster than hashing already +// decoded data scattered in memory. Just goes to show how slow memory +// access is on the RPi.) +object_t* object_create(uint64_t seed, int size); + +// destroys the object +void object_destroy(object_t* object); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/common/hash.h b/src/common/hash.h new file mode 100644 index 0000000..d0edff8 --- /dev/null +++ b/src/common/hash.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef BENCHMARK_HASH_H +#define BENCHMARK_HASH_H 1 + +/* + * This is a trivial multiplicative hash. All output data of a benchmark test + * are hashed to ensure it was serialized correctly, to simulate accessing the + * data, and to ensure that no parts of it were optimized away. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define HASH_INITIAL_VALUE 15373 + +#if 0 +// debugger to print the first few hash values. use it to +// debug why a new decoder's hash doesn't match +static void hash_p(const char* s, uint32_t hash, uint32_t val) { + if (hash == 1) { + printf("BREAK!\n"); + } + static int i = 0; + if (i < 20) { + i++; + printf("hashing into %08x (%s %u)\n", hash, s, val); + } +} +#else +#define hash_p(...) /* nothing */ +#endif + +static inline uint32_t hash_u32(uint32_t hash, uint32_t val) { + hash_p("u32", hash, val); + return hash * 31 ^ val; +} + +static inline uint32_t hash_u64(uint32_t hash, uint64_t val) { + return hash_u32(hash_u32(hash, (uint32_t)(val >> 32)), (uint32_t)(val)); +} + +static inline uint32_t hash_u8(uint32_t hash, uint8_t val) {return hash_u32(hash, val);} +static inline uint32_t hash_u16(uint32_t hash, uint16_t val) {return hash_u32(hash, val);} + +static inline uint32_t hash_i8 (uint32_t hash, int8_t val) {return hash_u8 (hash, (uint8_t) val);} +static inline uint32_t hash_i16(uint32_t hash, int16_t val) {return hash_u16(hash, (uint16_t)val);} +static inline uint32_t hash_i32(uint32_t hash, int32_t val) {return hash_u32(hash, (uint32_t)val);} +static inline uint32_t hash_i64(uint32_t hash, int64_t val) {return hash_u64(hash, (uint64_t)val);} + +static inline uint32_t hash_bool(uint32_t hash, bool val) { + uint32_t i = val ? 1 : 0; + return hash_u32(hash, i); +} + +static inline uint32_t hash_double(uint32_t hash, double val) { + // to avoid floating point differences between different parsers and + // architectures, we skip over floats. there are very few floats in + // the data anyway. instead we just mix in a prime. + return hash_u32(hash, 43013); +} + +static inline uint32_t hash_float(uint32_t hash, float val) { + return hash_double(hash, val); +} + +static inline uint32_t hash_str(uint32_t hash, const char* str, size_t len) { + hash_p("str", hash, len); + hash = hash_u32(hash, (uint32_t)len); + + // the string is hashed as a series of little-endian uint32, zero-padded. + + // hash every four bytes together + const unsigned char* ustr = (const unsigned char*)str; + for (; len >= 4; len -= 4) { + uint32_t val = (((uint32_t)ustr[3]) << 24) | (((uint32_t)ustr[2]) << 16) | + (((uint32_t)ustr[1]) << 8) | (((uint32_t)ustr[0]) << 0); + hash = hash_u32(hash, val); + ustr += 4; + } + + // hash the remaining 0-3 bytes + uint32_t val = 0; + switch (len) { + case 3: + val |= ((uint32_t)ustr[2]) << 16; + // fallthrough + case 2: + val |= ((uint32_t)ustr[1]) << 8; + // fallthrough + case 1: + val |= ((uint32_t)ustr[0]); + hash = hash_u32(hash, val); + break; + case 0: + break; + default: + #if defined(__GNUC__) || defined(__clang__) + __builtin_unreachable(); + #endif + break; + } + + hash_p("strdone", hash, 0); + return hash; +} + +static inline uint32_t hash_nil(uint32_t hash) { + // don't use a simple number or string so that it can't + // give the same hash as any other simple type + return hash_str(hash_u32(hash, 0), "nil", 3); +} + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/common/platform.h b/src/common/platform.h new file mode 100644 index 0000000..480df1f --- /dev/null +++ b/src/common/platform.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef BENCHMARK_PLATFORM_H +#define BENCHMARK_PLATFORM_H 1 + +// enable clock_gettime() +#if __STDC_VERSION__ >= 199901L +#define _XOPEN_SOURCE 600 +#else +#define _XOPEN_SOURCE 500 +#endif /* __STDC_VERSION__ */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/src/hash/hash-data.c b/src/hash/hash-data.c new file mode 100644 index 0000000..b3865cf --- /dev/null +++ b/src/hash/hash-data.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" + +char* hash_data; +size_t hash_size; + +bool run_test(uint32_t* hash_out) { + *hash_out = hash_str(*hash_out, hash_data, hash_size); + return true; +} + +bool setup_test(size_t object_size) { + + // This is a very rough approximation of the size of encoded binary data + // in any format. It doesn't actually matter if it's very close; the hash + // time for flat data is nearly insignificant. we're just interested in + // including the hash code (and object generation code and all other code in + // benchmark.c) so its compiled size can be subtracted out of the results. + hash_size = 100; + for (size_t i = 0; i < object_size; ++i) + hash_size <<= 3; + + srand(123); + hash_data = (char*)malloc(hash_size); + for (size_t i = 0; i < hash_size; ++i) + hash_data[i] = (char)rand(); + + return true; +} + +void teardown_test(void) { + free(hash_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BENCHMARK_VERSION_STR; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "random data"; +} + +const char* test_filename(void) { + return __FILE__; +} + diff --git a/src/hash/hash-object.c b/src/hash/hash-object.c new file mode 100644 index 0000000..b2ec25c --- /dev/null +++ b/src/hash/hash-object.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" + +static object_t* root_object; +char* insitu_data; +size_t insitu_size; + +static void hash_object(object_t* object, uint32_t* hash) { + switch (object->type) { + case type_nil: *hash = hash_nil(*hash); return; + case type_bool: *hash = hash_bool(*hash, object->b); return; + case type_double: *hash = hash_double(*hash, object->d); return; + case type_int: *hash = hash_i64(*hash, object->i); return; + case type_uint: *hash = hash_u64(*hash, object->u); return; + + case type_str: + *hash = hash_str(*hash, object->str, object->l); + return; + + // unused types in this benchmark + #if 0 + case type_float: + write_float(hash, node_float(node)); + return; + case type_bin: + *hash = hash_str(*hash, node_data(node), node_data_len(node)); + return; + case type_ext: + *hash = hash_u8(*hash, node_exttype(node)); + *hash = hash_str(*hash, node_data(node), node_data_len(node)); + return; + #endif + + case type_array: { + uint32_t count = object->l; + for (uint32_t i = 0; i < count; ++i) + hash_object(object->children + i, hash); + *hash = hash_u32(*hash, count); + return; + } + + case type_map: { + uint32_t count = object->l; + for (uint32_t i = 0; i < count; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + (i * 2); + *hash = hash_str(*hash, key->str, key->l); + + hash_object(object->children + (i * 2) + 1, hash); + } + *hash = hash_u32(*hash, count); + return; + } + + default: + break; + } + + abort(); +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(insitu_data, insitu_size); + if (!data) + return false; + + hash_object(root_object, hash_out); + + benchmark_in_situ_free(data); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + + // As with hash-data, this is just a rough approximation of encoded + // binary data. We need it here to create unused in-situ copies to + // match all parsing tests. + insitu_size = 100; + for (size_t i = 0; i < object_size; ++i) + insitu_size <<= 3; + + srand(123); + insitu_data = (char*)malloc(insitu_size); + for (size_t i = 0; i < insitu_size; ++i) + insitu_data[i] = (char)rand(); + + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BENCHMARK_VERSION_STR; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "C structs"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/jansson/jansson-dump.c b/src/jansson/jansson-dump.c new file mode 100644 index 0000000..a037c23 --- /dev/null +++ b/src/jansson/jansson-dump.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "jansson.h" + +// Jansson does not preserve order by default. We can turn this on +// to verify that it is hashing correctly, but otherwise we let it +// re-order the data however it wants. It seems marginally faster +// without preserving order (because it puts everything in hashtables, +// and has to re-sort by "serial number" to print in order.) + +static object_t* root_object; + +static json_t* convert(object_t* object) { + switch (object->type) { + case type_bool: return json_boolean(object->b); + case type_nil: return json_null(); + case type_int: return json_integer(object->i); + case type_double: return json_real(object->d); + + case type_uint: + // Note: The generator limits unsigned int to INT64_MAX. Jansson + // doesn't allow integers outside the range of signed int64. + return json_integer((int64_t)object->u); + + case type_str: + return json_stringn((const char*)object->str, object->l); + + case type_array: { + json_t* array = json_array(); + for (size_t i = 0; i < object->l; ++i) { + json_t* child = convert(object->children + i); + if (!child || json_array_append_new(array, child) != 0) { + json_decref(array); + return NULL; + } + } + return array; + } + + case type_map: { + json_t* map = json_object(); + for (size_t i = 0; i < object->l; ++i) { + const char* key = object->children[i * 2].str; + json_t* child = convert(object->children + i * 2 + 1); + if (!child || json_object_set_new(map, key, child) != 0) { + json_decref(map); + return NULL; + } + } + return map; + } + + default: + assert(0); + break; + } + return false; +} + +bool run_test(uint32_t* hash_out) { + json_t* root = convert(root_object); + if (!root) + return false; + + size_t flags = JSON_COMPACT; + #if PRESERVE_ORDER + flags |= JSON_PRESERVE_ORDER; + #endif + + // we have to strlen() to get the result size i guess? + char* result = json_dumps(root, flags); + *hash_out = hash_str(*hash_out, result, strlen(result)); + free(result); + + json_decref(root); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return JANSSON_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} + diff --git a/src/jansson/jansson-load.c b/src/jansson/jansson-load.c new file mode 100644 index 0000000..2be86a7 --- /dev/null +++ b/src/jansson/jansson-load.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "jansson.h" + +// Jansson does not support iterating over objects in their original +// order. We implement a bunch of hacks that access the internals in +// order to iterate in order so we can check that the hash values +// actually work out. This is not on by default obviously. + +#if PRESERVE_ORDER +#include "jansson_private.h" + +struct entry { + size_t serial; + const char* key; + json_t* child; +}; + +int compare_entry(const void* left, const void* right) { + size_t ls = ((const struct entry*)left)->serial; + size_t rs = ((const struct entry*)right)->serial; + return (ls < rs) ? -1 : (ls > rs) ? 1 : 0; +} +#endif + +static char* file_data; +static size_t file_size; + +static bool hash_json(json_t* json, uint32_t* hash) { + switch (json_typeof(json)) { + case JSON_NULL: *hash = hash_nil(*hash); return true; + case JSON_TRUE: *hash = hash_bool(*hash, true); return true; + case JSON_FALSE: *hash = hash_bool(*hash, false); return true; + case JSON_REAL: *hash = hash_double(*hash, json_real_value(json)); return true; + case JSON_STRING: *hash = hash_str(*hash, json_string_value(json), json_string_length(json)); return true; + + case JSON_INTEGER: + // Jansson does not support JSON big integers at all. Unlike + // YAJL, it does not even allow us to parse them ourselves by + // giving us the original string. If you don't set a flag to + // convert ints as reals, they will cause a parse error. + *hash = hash_i64(*hash, json_integer_value(json)); + return true; + + case JSON_ARRAY: { + size_t index; + json_t* child; + json_array_foreach(json, index, child) + if (!hash_json(child, hash)) + return false; + *hash = hash_u32(*hash, json_array_size(json)); + return true; + } + + case JSON_OBJECT: { + #if PRESERVE_ORDER + size_t count = json_object_size(json); + struct entry* entries = malloc(sizeof(struct entry) * count); + + size_t i = 0; + const char* key; + json_t* child; + json_object_foreach(json, key, child) { + entries[i].serial = hashtable_iter_serial(json_object_key_to_iter(key)); + entries[i].key = key; + entries[i].child = child; + ++i; + } + + qsort(entries, count, sizeof(struct entry), &compare_entry); + + for (i = 0; i < count; ++i) { + *hash = hash_str(*hash, entries[i].key, strlen(entries[i].key)); + if (!hash_json(entries[i].child, hash)) + return false; + } + + free(entries); + #else + const char* key; + json_t* child; + json_object_foreach(json, key, child) { + *hash = hash_str(*hash, key, strlen(key)); + hash_json(child, hash); + } + #endif + + *hash = hash_u32(*hash, json_object_size(json)); + return true; + } + + default: + break; + } + + return true; +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + // Jansson has a flag to reject duplicate keys, but for some reason it's + // not on by default, even though it puts everything in hashtables + // and re-orders everything and does not even support iterating objects + // in their original order. How are you supposed to deal with duplicates + // if it re-orders them?? Whatever, we'll just let it do its thing. + // (We don't require libraries in this benchmark to check for duplicates + // anyway.) + int flags = 0; + + json_error_t error; + json_t* root = json_loadb(data, file_size, flags, &error); + if (!root) { + benchmark_in_situ_free(data); + return false; + } + + bool ok = hash_json(root, hash_out); + json_decref(root); + benchmark_in_situ_free(data); + return ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_JSON, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return JANSSON_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} + diff --git a/src/libbson/libbson-append.c b/src/libbson/libbson-append.c new file mode 100644 index 0000000..e79a557 --- /dev/null +++ b/src/libbson/libbson-append.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "bson.h" + +static object_t* root_object; + +static bool append_document(bson_t* bson, object_t* object); +static bool append_array(bson_t* bson, object_t* object); + +// libbson doesn't automatically write the shortest int +static bool append_int(bson_t* bson, const char* key, size_t length, int64_t value) { + if (value >= INT32_MIN && value <= INT32_MAX) + bson_append_int32(bson, key, length, (int32_t)value); + else + bson_append_int64(bson, key, length, value); +} + +static bool append_value(bson_t* bson, const char* key, size_t length, object_t* value) { + switch (value->type) { + case type_nil: bson_append_null(bson, key, length); break; + case type_bool: bson_append_bool(bson, key, length, value->b); break; + case type_double: bson_append_double(bson, key, length, value->d); break; + case type_str: bson_append_utf8(bson, key, length, value->str, value->l); break; + + case type_int: append_int(bson, key, length, value->i); break; + case type_uint: append_int(bson, key, length, (int64_t)value->u); break; + + case type_map: { + bson_t child; + bson_append_document_begin(bson, key, length, &child); + append_document(&child, value); + bson_append_document_end(bson, &child); + } break; + + case type_array: { + bson_t child; + bson_append_array_begin(bson, key, length, &child); + append_array(&child, value); + bson_append_array_end(bson, &child); + } break; + + default: + return false; + } + return true; +} + +static bool append_document(bson_t* bson, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + object_t* key = object->children + i * 2; + object_t* value = object->children + i * 2 + 1; + if (!append_value(bson, key->str, key->l, value)) + return false; + } + return true; +} + +static bool append_array(bson_t* bson, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + char str[16]; + const char* key; + size_t len = bson_uint32_to_string(i, &key, str, sizeof(str)); + if (!append_value(bson, key, len, object->children + i)) + return false; + } + return true; +} + +bool run_test(uint32_t* hash_out) { + bson_t bson = BSON_INITIALIZER; + + bool ok; + if (root_object->type == type_map) + ok = append_document(&bson, root_object); + else + ok = append_array(&bson, root_object); + + if (!ok) { + fprintf(stderr, "libbson error writing data!\n"); + bson_destroy(&bson); + return false; + } + + *hash_out = hash_str(*hash_out, (const char*)bson_get_data(&bson), bson.len); + + // The documentation says that bson_destroy() should be called + // regardless of whether the bson_t was initialized via bson_init() + // bson_new() or BSON_INITIALIZER. This is because it stores a flag + // to say whether it should be freed when destroyed. + // This causes a warning under -flto about freeing a stack object + // even though the bson_t is set for static. + bson_destroy(&bson); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BSON_VERSION_S; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "BSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/libbson/libbson-file.c b/src/libbson/libbson-file.c new file mode 100644 index 0000000..4da8b76 --- /dev/null +++ b/src/libbson/libbson-file.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "bson.h" + +static bool append_document(bson_t* bson, object_t* object); +static bool append_array(bson_t* bson, object_t* object); + +// libbson doesn't automatically write the shortest int +static bool append_int(bson_t* bson, const char* key, size_t length, int64_t value) { + if (value >= INT32_MIN && value <= INT32_MAX) + bson_append_int32(bson, key, length, (int32_t)value); + else + bson_append_int64(bson, key, length, value); +} + +static bool append_value(bson_t* bson, const char* key, size_t length, object_t* value) { + switch (value->type) { + case type_nil: bson_append_null(bson, key, length); break; + case type_bool: bson_append_bool(bson, key, length, value->b); break; + case type_double: bson_append_double(bson, key, length, value->d); break; + case type_str: bson_append_utf8(bson, key, length, value->str, value->l); break; + case type_int: append_int(bson, key, length, value->i); break; + case type_uint: append_int(bson, key, length, (int64_t)value->u); break; + + case type_map: { + bson_t child; + bson_append_document_begin(bson, key, length, &child); + append_document(&child, value); + bson_append_document_end(bson, &child); + } break; + + case type_array: { + bson_t child; + bson_append_array_begin(bson, key, length, &child); + append_array(&child, value); + bson_append_array_end(bson, &child); + } break; + + default: + return false; + } + return true; +} + +static bool append_document(bson_t* bson, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + object_t* key = object->children + i * 2; + object_t* value = object->children + i * 2 + 1; + if (!append_value(bson, key->str, key->l, value)) + return false; + } + return true; +} + +static bool append_array(bson_t* bson, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + char str[16]; + const char* key; + size_t len = bson_uint32_to_string(i, &key, str, sizeof(str)); + if (!append_value(bson, key, len, object->children + i)) + return false; + } + return true; +} + +bool setup_test(size_t object_size) { + bson_t bson = BSON_INITIALIZER; + + object_t* object = benchmark_object_create(object_size); + if (object->type == type_map) + append_document(&bson, object); + else + append_array(&bson, object); + object_destroy(object); + + char filename[64]; + benchmark_filename(filename, sizeof(filename), object_size, BENCHMARK_FORMAT_BSON, NULL); + FILE* file = fopen(filename, "wb"); + fwrite(bson_get_data(&bson), bson.len, 1, file); + + // The documentation says that bson_destroy() should be called + // regardless of whether the bson_t was initialized via bson_init(), + // bson_new() or BSON_INITIALIZER. This is because it stores a flag + // to say whether it should be freed when destroyed. + // This causes a warning under -flto about freeing a stack object + // even though the bson_t is set for static. + bson_destroy(&bson); + return true; +} + +bool run_test(uint32_t* hash_out) { + return false; +} + +void teardown_test(void) { +} + +bool is_benchmark(void) { + return false; +} + +const char* test_version(void) { + return BSON_VERSION_S; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "BSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/libbson/libbson-iter.c b/src/libbson/libbson-iter.c new file mode 100644 index 0000000..1fd9d6c --- /dev/null +++ b/src/libbson/libbson-iter.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "bson.h" + +static char* file_data; +static size_t file_size; + +bool hash_bson(bson_iter_t* iter, bool is_array, uint32_t* hash) { + uint32_t count = 0; + while (bson_iter_next(iter)) { + ++count; + + if (!is_array) { + const char* key = bson_iter_key(iter); + *hash = hash_str(*hash, key, strlen(key)); + } + + switch (bson_iter_type(iter)) { + case BSON_TYPE_NULL: *hash = hash_nil(*hash); break; + case BSON_TYPE_BOOL: *hash = hash_bool(*hash, bson_iter_bool(iter)); break; + case BSON_TYPE_INT32: *hash = hash_i64(*hash, bson_iter_int32(iter)); break; + case BSON_TYPE_INT64: *hash = hash_i64(*hash, bson_iter_int64(iter)); break; + case BSON_TYPE_DOUBLE: *hash = hash_double(*hash, bson_iter_double(iter)); break; + + case BSON_TYPE_UTF8: { + uint32_t length; + const char* str = bson_iter_utf8(iter, &length); + *hash = hash_str(*hash, str, length); + break; + } + + case BSON_TYPE_DOCUMENT: { + bson_iter_t child; + bool ret = bson_iter_recurse(iter, &child); + if (ret) + hash_bson(&child, false, hash); + else + return false; + break; + } + + case BSON_TYPE_ARRAY: { + bson_iter_t child; + bool ret = bson_iter_recurse(iter, &child); + if (ret) + hash_bson(&child, true, hash); + else + return false; + break; + } + + default: + return false; + } + } + + *hash = hash_u32(*hash, count); + return true; +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + bson_t bson; + bool ret = bson_init_static(&bson, (const uint8_t*)data, file_size); + if (ret) { + bson_iter_t iter; + + // The BSON top-level document doesn't contain its own type, so + // it doesn't say whether it's an array. This seems like a + // serious flaw in BSON since it's supposed to be compatible + // with JSON, but JSON supports both arrays and objects as the + // document root. I feel like I'm misunderstanding how this should + // work so feel free to correct me here. In the meantime, as a + // workaround we just look at the first element and check if it's + // a string zero. + bool is_array = false; + ret &= bson_iter_init(&iter, &bson); + if (ret) + is_array = (bson_iter_next(&iter) && strcmp(bson_iter_key(&iter), "0") == 0); + + ret &= bson_iter_init(&iter, &bson); + if (ret) { + *hash_out = HASH_INITIAL_VALUE; + ret &= hash_bson(&iter, is_array, hash_out); + } + } + + // The documentation says that bson_destroy() should be called + // regardless of whether the bson_t was initialized via bson_init(), + // bson_new() or BSON_INITIALIZER. This is because it stores a flag + // to say whether it should be freed when destroyed. + // This causes a warning under -flto about freeing a stack object + // even though the bson_t is set for static. + bson_destroy(&bson); + + benchmark_in_situ_free(data); + return ret; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_BSON, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BSON_VERSION_S; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "BSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/mongo-cxx/mongo-cxx-builder.cpp b/src/mongo-cxx/mongo-cxx-builder.cpp new file mode 100644 index 0000000..c387513 --- /dev/null +++ b/src/mongo-cxx/mongo-cxx-builder.cpp @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "mongo/bson/bson.h" +#include "mongo/version.h" + +static object_t* root_object; + +static void append_object(mongo::BSONObjBuilder& builder, object_t* object); +static void append_array(mongo::BSONArrayBuilder& builder, object_t* object); + +// The builders have a function appendNumber() to write the shortest +// representation, but for some bizarre reason they convert ints +// in the 30-40 bit range to doubles + +static void append_object_int(mongo::BSONObjBuilder& builder, mongo::StringData& keystr, long long value) { + if (value >= INT32_MIN && value <= INT32_MAX) + builder.append(keystr, (int)value); + else + builder.append(keystr, value); +} + +static void append_array_int(mongo::BSONArrayBuilder& builder, long long value) { + if (value >= INT32_MIN && value <= INT32_MAX) + builder.append((int)value); + else + builder.append(value); +} + +static void append_object(mongo::BSONObjBuilder& builder, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + object_t* key = object->children + i * 2; + object_t* value = object->children + i * 2 + 1; + mongo::StringData keystr(key->str, key->l); + + switch (value->type) { + case type_nil: builder.appendNull(keystr); break; + case type_bool: builder.appendBool(keystr, value->b); break; + case type_double: builder.append(keystr, value->d); break; + case type_str: builder.append(keystr, mongo::StringData(value->str, value->l)); break; + + case type_int: append_object_int(builder, keystr, (long long)value->i); break; + case type_uint: append_object_int(builder, keystr, (long long)value->u); break; + + case type_array: { + mongo::BSONArrayBuilder sub(builder.subarrayStart(keystr)); + append_array(sub, value); + sub.done(); + break; + } + + case type_map: { + mongo::BSONObjBuilder sub(builder.subobjStart(keystr)); + append_object(sub, value); + sub.done(); + break; + } + + default: + throw std::exception(); + } + } +} + +static void append_array(mongo::BSONArrayBuilder& builder, object_t* object) { + for (size_t i = 0; i < object->l; ++i) { + object_t* value = object->children + i; + + switch (value->type) { + case type_nil: builder.appendNull(); break; + case type_bool: builder.appendBool(value->b); break; + case type_double: builder.append(value->d); break; + case type_str: builder.append(mongo::StringData(value->str, value->l)); break; + + case type_int: append_array_int(builder, (long long)value->i); break; + case type_uint: append_array_int(builder, (long long)value->u); break; + + case type_array: { + mongo::BSONArrayBuilder sub(builder.subarrayStart()); + append_array(sub, value); + sub.done(); + break; + } + + case type_map: { + mongo::BSONObjBuilder sub(builder.subobjStart()); + append_object(sub, value); + sub.done(); + break; + } + + default: + throw std::exception(); + } + } +} + +bool run_test(uint32_t* hash_out) { + try { + mongo::BSONObj obj; + if (root_object->type == type_map) { + mongo::BSONObjBuilder builder; + append_object(builder, root_object); + obj = builder.obj(); + } else { + mongo::BSONArrayBuilder builder; + append_array(builder, root_object); + obj = builder.obj(); + } + + *hash_out = hash_str(*hash_out, obj.objdata(), obj.objsize()); + + } catch (std::exception e) { + return false; + } + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return mongo::client::kVersionString; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_CXX; +} + +const char* test_format(void) { + return "BSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/mongo-cxx/mongo-cxx-obj.cpp b/src/mongo-cxx/mongo-cxx-obj.cpp new file mode 100644 index 0000000..328a502 --- /dev/null +++ b/src/mongo-cxx/mongo-cxx-obj.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "mongo/bson/bson.h" +#include "mongo/version.h" + +static char* file_data; +static size_t file_size; + +bool hash_bson(mongo::BSONObj& obj, bool is_array, uint32_t* hash) { + size_t count = 0; + auto it = obj.begin(); + while (it.more()) { + mongo::BSONElement e = it.next(); + if (e.eoo()) + break; + ++count; + + if (!is_array) { + mongo::StringData key = e.fieldNameStringData(); + *hash = hash_str(*hash, key.rawData(), key.size()); + } + + switch (e.type()) { + case mongo::jstNULL: *hash = hash_nil(*hash); break; + case mongo::Bool: *hash = hash_bool(*hash, e.Bool()); break; + case mongo::NumberInt: *hash = hash_i64(*hash, e.numberInt()); break; + case mongo::NumberLong: *hash = hash_i64(*hash, e.numberLong()); break; + case mongo::NumberDouble: *hash = hash_double(*hash, e.numberDouble()); break; + + case mongo::String: { + mongo::StringData str = e.checkAndGetStringData(); + *hash = hash_str(*hash, str.rawData(), str.size()); + break; + } + + case mongo::Object: { + mongo::BSONObj obj = e.Obj(); + hash_bson(obj, false, hash); + break; + } + + case mongo::Array: { + mongo::BSONObj obj = e.Obj(); + hash_bson(obj, true, hash); + break; + } + + default: + return false; + } + } + + *hash = hash_u32(*hash, count); + return true; +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + bool ok = true; + try { + // there doesn't seem to be a BSONObj constructor that takes a + // length. how is it supposed to check whether the data was + // truncated? will it just read uninitialized memory?? + mongo::BSONObj obj(file_data); + + // as with libbson-iter, we look at the first key to see if + // it's a string zero to test whether we have an array or map. + bool is_array = (0 == strcmp(obj.begin().next().fieldName(), "0")); + + ok = hash_bson(obj, is_array, hash_out); + + } catch (...) { + ok = false; + } + benchmark_in_situ_free(data); + return ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_BSON, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return mongo::client::kVersionString; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_CXX; +} + +const char* test_format(void) { + return "BSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/mpack/mpack-file.c b/src/mpack/mpack-file.c new file mode 100644 index 0000000..180b0de --- /dev/null +++ b/src/mpack/mpack-file.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "mpack/mpack.h" + +static void write_object(mpack_writer_t* writer, object_t* object) { + switch (object->type) { + case type_nil: mpack_write_nil (writer); break; + case type_bool: mpack_write_bool (writer, object->b); break; + case type_double: mpack_write_double(writer, object->d); break; + case type_int: mpack_write_i64 (writer, object->i); break; + case type_uint: mpack_write_u64 (writer, object->u); break; + case type_str: mpack_write_str (writer, object->str, object->l); break; + + case type_array: + mpack_start_array(writer, object->l); + for (size_t i = 0; i < object->l; ++i) + write_object(writer, object->children + i); + mpack_finish_array(writer); + break; + + case type_map: + mpack_start_map(writer, object->l); + + for (size_t i = 0; i < object->l; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + i * 2; + assert(key->type == type_str); + mpack_write_str(writer, key->str, key->l); + + write_object(writer, object->children + i * 2 + 1); + } + mpack_finish_map(writer); + break; + + default: + assert(0); + mpack_writer_flag_error(writer, mpack_error_bug); + break; + } +} + +bool setup_test(size_t object_size) { + char filename[64]; + benchmark_filename(filename, sizeof(filename), object_size, BENCHMARK_FORMAT_MESSAGEPACK, NULL); + + mpack_writer_t writer; + mpack_writer_init_file(&writer, filename); + + object_t* object = benchmark_object_create(object_size); + write_object(&writer, object); + object_destroy(object); + + mpack_error_t error = mpack_writer_destroy(&writer); + if (error != mpack_ok) { + fprintf(stderr, "mpack writer error %i writing file!\n", (int)error); + return false; + } + return true; +} + +bool run_test(uint32_t* hash_out) { + return false; +} + +void teardown_test(void) { +} + +bool is_benchmark(void) { + return false; +} + +const char* test_version(void) { + return MPACK_VERSION_STRING; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/mpack/mpack-node.c b/src/mpack/mpack-node.c new file mode 100644 index 0000000..5bd4599 --- /dev/null +++ b/src/mpack/mpack-node.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "mpack/mpack.h" + +static char* file_data; +static size_t file_size; + +static void hash_node(mpack_node_t node, uint32_t* hash) { + switch (mpack_node_type(node)) { + case mpack_type_nil: *hash = hash_nil(*hash); return; + case mpack_type_bool: *hash = hash_bool(*hash, mpack_node_bool(node)); return; + case mpack_type_double: *hash = hash_double(*hash, mpack_node_double(node)); return; + case mpack_type_int: *hash = hash_i64(*hash, mpack_node_i64(node)); return; + case mpack_type_uint: *hash = hash_u64(*hash, mpack_node_u64(node)); return; + + case mpack_type_str: + #if CHECK_UTF8 + mpack_node_check_utf8(node); + #endif + *hash = hash_str(*hash, mpack_node_data(node), mpack_node_data_len(node)); + return; + + case mpack_type_array: { + uint32_t count = mpack_node_array_length(node); + for (uint32_t i = 0; i < count; ++i) { + hash_node(mpack_node_array_at(node, i), hash); + if (mpack_node_error(node) != mpack_ok) + return; + } + *hash = hash_u32(*hash, count); + return; + } + + case mpack_type_map: { + uint32_t count = mpack_node_map_count(node); + for (uint32_t i = 0; i < count; ++i) { + + // we expect keys to be short strings + mpack_node_t key = mpack_node_map_key_at(node, i); + #if CHECK_UTF8 + mpack_node_check_utf8(key); + #endif + *hash = hash_str(*hash, mpack_node_str(key), mpack_node_strlen(key)); + + hash_node(mpack_node_map_value_at(node, i), hash); + + if (mpack_node_error(node) != mpack_ok) + return; + } + *hash = hash_u32(*hash, count); + return; + } + + default: + mpack_node_flag_error(node, mpack_error_data); + break; + } +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + mpack_tree_t tree; + mpack_tree_init(&tree, data, file_size); + hash_node(mpack_tree_root(&tree), hash_out); + + mpack_error_t error = mpack_tree_destroy(&tree); + benchmark_in_situ_free(data); + return error == mpack_ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_MESSAGEPACK, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return MPACK_VERSION_STRING; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/mpack/mpack-read.c b/src/mpack/mpack-read.c new file mode 100644 index 0000000..74c6180 --- /dev/null +++ b/src/mpack/mpack-read.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "mpack/mpack.h" + +static char* file_data; +static size_t file_size; + +static void hash_element(mpack_reader_t* reader, uint32_t* hash) { + const mpack_tag_t tag = mpack_read_tag(reader); + + switch (tag.type) { + case mpack_type_nil: *hash = hash_nil(*hash); return; + case mpack_type_bool: *hash = hash_bool(*hash, tag.v.b); return; + case mpack_type_int: *hash = hash_i64(*hash, tag.v.i); return; + case mpack_type_uint: *hash = hash_u64(*hash, tag.v.u); return; + + // note: all reals are hashed as doubles (not all libraries read different sized types) + case mpack_type_double: *hash = hash_double(*hash, tag.v.d); return; + case mpack_type_float: *hash = hash_float(*hash, tag.v.f); return; + + case mpack_type_str: { + const char* str = + #if CHECK_UTF8 + mpack_read_utf8_inplace(reader, tag.v.l); + #else + mpack_read_bytes_inplace(reader, tag.v.l); + #endif + if (mpack_reader_error(reader) != mpack_ok) + return; + *hash = hash_str(*hash, str, tag.v.l); + mpack_done_str(reader); + return; + } + + case mpack_type_array: + for (size_t i = 0; i < tag.v.n; ++i) { + hash_element(reader, hash); + if (mpack_reader_error(reader) != mpack_ok) + return; + } + *hash = hash_u32(*hash, tag.v.n); + mpack_done_array(reader); + return; + + case mpack_type_map: + for (size_t i = 0; i < tag.v.n; ++i) { + + // we expect keys to be short strings + uint32_t len = mpack_expect_str(reader); + const char* str = + #if CHECK_UTF8 + mpack_read_utf8_inplace(reader, len); + #else + mpack_read_bytes_inplace(reader, len); + #endif + if (mpack_reader_error(reader) != mpack_ok) + return; + *hash = hash_str(*hash, str, len); + mpack_done_str(reader); + + hash_element(reader, hash); + } + *hash = hash_u32(*hash, tag.v.n); + mpack_done_map(reader); + return; + + default: + mpack_reader_flag_error(reader, mpack_error_data); + break; + } +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + mpack_reader_t reader; + mpack_reader_init_data(&reader, data, file_size); + + hash_element(&reader, hash_out); + + mpack_error_t error = mpack_reader_destroy(&reader); + benchmark_in_situ_free(data); + return error == mpack_ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_MESSAGEPACK, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return MPACK_VERSION_STRING; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/mpack/mpack-write.c b/src/mpack/mpack-write.c new file mode 100644 index 0000000..d0ade02 --- /dev/null +++ b/src/mpack/mpack-write.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "mpack/mpack.h" + +static object_t* root_object; + +static void write_object(mpack_writer_t* writer, object_t* object) { + switch (object->type) { + case type_nil: mpack_write_nil (writer); break; + case type_bool: mpack_write_bool (writer, object->b); break; + case type_double: mpack_write_double(writer, object->d); break; + case type_int: mpack_write_i64 (writer, object->i); break; + case type_uint: mpack_write_u64 (writer, object->u); break; + case type_str: mpack_write_str (writer, object->str, object->l); break; + + case type_array: + mpack_start_array(writer, object->l); + for (size_t i = 0; i < object->l; ++i) + write_object(writer, object->children + i); + mpack_finish_array(writer); + break; + + case type_map: + mpack_start_map(writer, object->l); + + for (size_t i = 0; i < object->l; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + i * 2; + assert(key->type == type_str); + mpack_write_str(writer, key->str, key->l); + + write_object(writer, object->children + i * 2 + 1); + } + mpack_finish_map(writer); + break; + + default: + assert(0); + mpack_writer_flag_error(writer, mpack_error_bug); + break; + } +} + +bool run_test(uint32_t* hash_out) { + char* data; + size_t size; + mpack_writer_t writer; + mpack_writer_init_growable(&writer, &data, &size); + + write_object(&writer, root_object); + + mpack_error_t error = mpack_writer_destroy(&writer); + if (error != mpack_ok) + return false; + + *hash_out = hash_str(*hash_out, data, size); + free(data); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return MPACK_VERSION_STRING; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/msgpack/msgpack-c-pack.c b/src/msgpack/msgpack-c-pack.c new file mode 100644 index 0000000..6c9681f --- /dev/null +++ b/src/msgpack/msgpack-c-pack.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "msgpack.h" + +static object_t* root_object; + +static bool pack_object(msgpack_packer* packer, object_t* object) { + switch (object->type) { + case type_bool: + return (object->b ? msgpack_pack_true(packer) : msgpack_pack_false(packer)) == 0; + + case type_nil: return msgpack_pack_nil(packer) == 0; + case type_int: return msgpack_pack_long_long(packer, object->i) == 0; + case type_uint: return msgpack_pack_unsigned_long_long(packer, object->u) == 0; + case type_double: return msgpack_pack_double(packer, object->d) == 0; + + case type_str: + if (msgpack_pack_str(packer, object->l) != 0) + return false; + return msgpack_pack_str_body(packer, object->str, object->l) == 0; + + case type_array: + if (msgpack_pack_array(packer, object->l) != 0) + return false; + for (size_t i = 0; i < object->l; ++i) + if (!pack_object(packer, object->children + i)) + return false; + return true; + + case type_map: + if (msgpack_pack_map(packer, object->l) != 0) + return false; + for (size_t i = 0; i < object->l; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + i * 2; + assert(key->type == type_str); + if (msgpack_pack_str(packer, key->l) != 0) + return false; + if (msgpack_pack_str_body(packer, key->str, key->l) != 0) + return false; + + if (!pack_object(packer, object->children + i * 2 + 1)) + return false; + } + return true; + + default: + assert(0); + break; + } + return false; +} + +bool run_test(uint32_t* hash_out) { + msgpack_sbuffer buffer; + msgpack_sbuffer_init(&buffer); + msgpack_packer packer; + msgpack_packer_init(&packer, &buffer, msgpack_sbuffer_write); + + if (!pack_object(&packer, root_object)) + return false; + + *hash_out = hash_str(*hash_out, buffer.data, buffer.size); + msgpack_sbuffer_destroy(&buffer); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return MSGPACK_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/msgpack/msgpack-c-unpack.c b/src/msgpack/msgpack-c-unpack.c new file mode 100644 index 0000000..d34d40a --- /dev/null +++ b/src/msgpack/msgpack-c-unpack.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "msgpack.h" + +static char* file_data; +static size_t file_size; + +static bool hash_object(msgpack_object* object, uint32_t* hash) { + switch (object->type) { + case MSGPACK_OBJECT_NIL: *hash = hash_nil(*hash); return true; + case MSGPACK_OBJECT_BOOLEAN: *hash = hash_bool(*hash, object->via.boolean); return true; + case MSGPACK_OBJECT_FLOAT: *hash = hash_double(*hash, object->via.f64); return true; + case MSGPACK_OBJECT_NEGATIVE_INTEGER: *hash = hash_i64(*hash, object->via.i64); return true; + case MSGPACK_OBJECT_POSITIVE_INTEGER: *hash = hash_u64(*hash, object->via.u64); return true; + case MSGPACK_OBJECT_STR: *hash = hash_str(*hash, object->via.str.ptr, object->via.str.size); return true; + + case MSGPACK_OBJECT_ARRAY: + for (size_t i = 0; i < object->via.array.size; ++i) + if (!hash_object(object->via.array.ptr + i, hash)) + return false; + *hash = hash_u32(*hash, object->via.array.size); + return true; + + case MSGPACK_OBJECT_MAP: + for (size_t i = 0; i < object->via.map.size; ++i) { + + // we expect keys to be short strings + msgpack_object* key = &object->via.map.ptr[i].key; + assert(key->type == MSGPACK_OBJECT_STR); + *hash = hash_str(*hash, key->via.str.ptr, key->via.str.size); + + if (!hash_object(&object->via.map.ptr[i].val, hash)) + return false; + } + *hash = hash_u32(*hash, object->via.array.size); + return true; + + default: + break; + } + + return false; +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + // The very first example code in the msgpack-c README shows how to + // use msgpack_unpack(), but it's apparently obsolete. + // + // Instead, the quickstart guide shows using a msgpack_unpacked with + // msgpack_unpacked_next(). The quickstart guide claims it returns + // bool, but it actually returns an enum. That example also doesn't + // destroy the msgpack_unpacked, so it leaks its zone. We fix those + // problems here. + + msgpack_unpacked msg; + msgpack_unpacked_init(&msg); + msgpack_unpack_return ret = msgpack_unpack_next(&msg, data, file_size, NULL); + + if (ret != MSGPACK_UNPACK_SUCCESS) { + benchmark_in_situ_free(data); + return false; + } + + bool ok = hash_object(&msg.data, hash_out); + msgpack_unpacked_destroy(&msg); + benchmark_in_situ_free(data); + return ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_MESSAGEPACK, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return MSGPACK_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/msgpack/msgpack-cpp-pack.cpp b/src/msgpack/msgpack-cpp-pack.cpp new file mode 100644 index 0000000..351bc9f --- /dev/null +++ b/src/msgpack/msgpack-cpp-pack.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "msgpack.hpp" + +static object_t* root_object; + +static void pack_object(msgpack::packer& packer, object_t* object) { + switch (object->type) { + case type_bool: + object->b ? packer.pack_true() : packer.pack_false(); + return; + + case type_nil: packer.pack_nil(); return; + case type_int: packer.pack(object->i); return; + case type_uint: packer.pack(object->u); return; + case type_double: packer.pack(object->d); return; + + case type_str: + packer.pack_str(object->l); + packer.pack_str_body(object->str, object->l); + return; + + case type_array: + packer.pack_array(object->l); + for (size_t i = 0; i < object->l; ++i) + pack_object(packer, object->children + i); + return; + + case type_map: + packer.pack_map(object->l); + for (size_t i = 0; i < object->l; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + i * 2; + assert(key->type == type_str); + packer.pack_str(key->l); + packer.pack_str_body(key->str, key->l); + + pack_object(packer, object->children + i * 2 + 1); + } + return; + + default: + break; + } + + throw msgpack::unpack_error(""); +} + +bool run_test(uint32_t* hash_out) { + try { + msgpack::sbuffer buffer; + msgpack::packer packer(buffer); + pack_object(packer, root_object); + *hash_out = hash_str(*hash_out, buffer.data(), buffer.size()); + + } catch (std::exception e) { + return false; + } + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return MSGPACK_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_CXX; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/msgpack/msgpack-cpp-unpack.cpp b/src/msgpack/msgpack-cpp-unpack.cpp new file mode 100644 index 0000000..bd77bfd --- /dev/null +++ b/src/msgpack/msgpack-cpp-unpack.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "msgpack.hpp" + +static char* file_data; +static size_t file_size; + +static void hash_object(const msgpack::object& object, uint32_t* hash) { + switch (object.type) { + case msgpack::type::NIL: *hash = hash_nil(*hash); return; + case msgpack::type::BOOLEAN: *hash = hash_bool(*hash, object.via.boolean); return; + case msgpack::type::FLOAT: *hash = hash_double(*hash, object.via.f64); return; + case msgpack::type::NEGATIVE_INTEGER: *hash = hash_i64(*hash, object.via.i64); return; + case msgpack::type::POSITIVE_INTEGER: *hash = hash_u64(*hash, object.via.u64); return; + case msgpack::type::STR: *hash = hash_str(*hash, object.via.str.ptr, object.via.str.size); return; + + case msgpack::type::ARRAY: + for (size_t i = 0; i < object.via.array.size; ++i) + hash_object(*(object.via.array.ptr + i), hash); + *hash = hash_u32(*hash, object.via.array.size); + return; + + case msgpack::type::MAP: + for (size_t i = 0; i < object.via.map.size; ++i) { + + // we expect keys to be short strings + const msgpack::object& key = object.via.map.ptr[i].key; + assert(key.type == msgpack::type::STR); + *hash = hash_str(*hash, key.via.str.ptr, key.via.str.size); + + hash_object(object.via.map.ptr[i].val, hash); + } + *hash = hash_u32(*hash, object.via.map.size); + return; + + default: + break; + } + + throw msgpack::unpack_error(""); +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + msgpack::unpacked msg; + try { + msgpack::unpack(&msg, data, file_size); + hash_object(msg.get(), hash_out); + } catch (msgpack::unpack_error error) { + benchmark_in_situ_free(data); + return false; + } + + benchmark_in_situ_free(data); + return true; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_MESSAGEPACK, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return MSGPACK_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_CXX; +} + +const char* test_format(void) { + return "MessagePack"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/rapidjson/rapidjson-dom.cpp b/src/rapidjson/rapidjson-dom.cpp new file mode 100644 index 0000000..6d06462 --- /dev/null +++ b/src/rapidjson/rapidjson-dom.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" + +#include "rapidjson/rapidjson.h" +#include "rapidjson/memorystream.h" +#include "rapidjson/document.h" + +using namespace rapidjson; + +static char* file_data; +static size_t file_size; + +static bool hash_value(Value& value, uint32_t* hash) { + switch (value.GetType()) { + case kNullType: *hash = hash_nil(*hash); return true; + case kFalseType: *hash = hash_bool(*hash, false); return true; + case kTrueType: *hash = hash_bool(*hash, true); return true; + + case kNumberType: + if (value.IsDouble()) { + *hash = hash_double(*hash, value.GetDouble()); + return true; + } + if (value.IsInt64()) { + *hash = hash_i64(*hash, value.GetInt64()); + return true; + } + *hash = hash_u64(*hash, value.GetUint64()); + return true; + + case kStringType: + *hash = hash_str(*hash, value.GetString(), value.GetStringLength()); + return true; + + case kArrayType: { + auto it = value.Begin(), end = value.End(); + for (; it != end; ++it) + if (!hash_value(*it, hash)) + return false; + *hash = hash_u32(*hash, value.Size()); + return true; + } + + case kObjectType: { + auto it = value.MemberBegin(), end = value.MemberEnd(); + for (; it != end; ++it) { + *hash = hash_str(*hash, it->name.GetString(), it->name.GetStringLength()); + if (!hash_value(it->value, hash)) + return false; + } + *hash = hash_u32(*hash, value.MemberCount()); + return true; + } + + default: + assert(0); + break; + } + + return false; +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + Document document; + #if BENCHMARK_IN_SITU + document.ParseInsitu(data); + #else + MemoryStream s(data, file_size); + document.ParseStream(s); + #endif + + bool ok = hash_value(document, hash_out); + benchmark_in_situ_free(data); + return ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_JSON, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return RAPIDJSON_VERSION_STRING; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_CXX; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/rapidjson/rapidjson-file.cpp b/src/rapidjson/rapidjson-file.cpp new file mode 100644 index 0000000..fef78ba --- /dev/null +++ b/src/rapidjson/rapidjson-file.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" + +#include "rapidjson/rapidjson.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/writer.h" +#include "rapidjson/prettywriter.h" + +using namespace rapidjson; + +template +static void write_object(WriterType& writer, object_t* object) { + switch (object->type) { + case type_nil: writer.Null(); break; + case type_bool: writer.Bool(object->b); break; + case type_double: writer.Double(object->d); break; + case type_int: writer.Int64(object->i); break; + case type_uint: writer.Uint64(object->u); break; + case type_str: writer.String(object->str, object->l); break; + + case type_array: + writer.StartArray(); + for (size_t i = 0; i < object->l; ++i) + write_object(writer, object->children + i); + writer.EndArray(); + break; + + case type_map: + writer.StartObject(); + for (size_t i = 0; i < object->l; ++i) { + writer.Key(object->children[i * 2].str, object->children[i * 2].l); + write_object(writer, object->children + i * 2 + 1); + } + writer.EndObject(); + break; + + default: + assert(0); + break; + } +} + +template +void set_indent(WriterType& writer) {} + +template <> +void set_indent >(PrettyWriter& writer) { + writer.SetIndent('\t', 1); +} + +template +bool write_file(const char* filename, object_t* object) { + FILE *file = fopen(filename, "wb"); + if (!file) { + fprintf(stderr, "failed to open file %s for writing!\n", filename); + return false; + } + + { + char buffer[65536]; + FileWriteStream stream(file, buffer, sizeof(buffer)); + WriterType writer(stream); + set_indent(writer); + write_object(writer, object); + } + + fclose(file); + return true; +} + +bool setup_test(size_t object_size) { + object_t* object = benchmark_object_create(object_size); + bool ret = true; + + char filename[64]; + benchmark_filename(filename, sizeof(filename), object_size, BENCHMARK_FORMAT_JSON, NULL); + ret &= write_file >(filename, object); + + benchmark_filename(filename, sizeof(filename), object_size, BENCHMARK_FORMAT_JSON, "-pretty"); + ret &= write_file >(filename, object); + + object_destroy(object); + return ret; +} + +bool run_test(uint32_t* hash_out) { + return false; +} + +void teardown_test(void) { +} + +bool is_benchmark(void) { + return false; +} + +const char* test_version(void) { + return RAPIDJSON_VERSION_STRING; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_CXX; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/rapidjson/rapidjson-sax.cpp b/src/rapidjson/rapidjson-sax.cpp new file mode 100644 index 0000000..68ec6e0 --- /dev/null +++ b/src/rapidjson/rapidjson-sax.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" + +#include "rapidjson/rapidjson.h" +#include "rapidjson/memorystream.h" +#include "rapidjson/document.h" + +using namespace rapidjson; + +static char* file_data; +static size_t file_size; + +struct Hasher { + Hasher(uint32_t initial_value) : hash(initial_value) {} + + bool Null() {hash = hash_nil (hash ); return true;} + bool Bool(bool b) {hash = hash_bool (hash, b); return true;} + bool Double(double d) {hash = hash_double (hash, d); return true;} + + // note: all ints are hashed as 64-bit (not all libraries read different sized types) + bool Int(int i) {hash = hash_i64(hash, i); return true;} + bool Uint(unsigned u) {hash = hash_u64(hash, u); return true;} + bool Int64(int64_t i) {hash = hash_i64(hash, i); return true;} + bool Uint64(uint64_t u) {hash = hash_u64(hash, u); return true;} + + bool String(const char* str, SizeType length, bool copy) { + hash = hash_str(hash, str, length); + return true; + } + bool Key(const char* str, SizeType length, bool copy) { + hash = hash_str(hash, str, length); + return true; + } + + bool StartObject() { + return true; + } + bool EndObject(SizeType memberCount) { + hash = hash_u32(hash, memberCount); + return true; + } + + bool StartArray() { + return true; + } + bool EndArray(SizeType elementCount) { + hash = hash_u32(hash, elementCount); + return true; + } + + uint32_t hash; +}; + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + try { + Hasher hasher(*hash_out); + Reader reader; + #if BENCHMARK_IN_SITU + // why isn't there a helper for sax in-situ parsing? + InsituStringStream s(data); + reader.Parse(s, hasher); + #else + MemoryStream s(data, file_size); + reader.Parse(s, hasher); + #endif + *hash_out = hasher.hash; + } catch (...) { + benchmark_in_situ_free(data); + return false; + } + + benchmark_in_situ_free(data); + return true; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_JSON, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return RAPIDJSON_VERSION_STRING; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_CXX; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/rapidjson/rapidjson-write.cpp b/src/rapidjson/rapidjson-write.cpp new file mode 100644 index 0000000..0ae4929 --- /dev/null +++ b/src/rapidjson/rapidjson-write.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" + +#include "rapidjson/rapidjson.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/writer.h" +#include "rapidjson/prettywriter.h" + +using namespace rapidjson; + +static object_t* root_object; + +static void write_object(Writer& writer, object_t* object) { + switch (object->type) { + case type_nil: writer.Null(); break; + case type_bool: writer.Bool(object->b); break; + case type_double: writer.Double(object->d); break; + case type_int: writer.Int64(object->i); break; + case type_uint: writer.Uint64(object->u); break; + case type_str: writer.String(object->str, object->l); break; + + case type_array: + writer.StartArray(); + for (size_t i = 0; i < object->l; ++i) + write_object(writer, object->children + i); + writer.EndArray(); + break; + + case type_map: + writer.StartObject(); + for (size_t i = 0; i < object->l; ++i) { + writer.Key(object->children[i * 2].str, object->children[i * 2].l); + write_object(writer, object->children + i * 2 + 1); + } + writer.EndObject(); + break; + + default: + assert(0); + break; + } +} + +bool run_test(uint32_t* hash_out) { + StringBuffer buffer; + Writer writer(buffer); + write_object(writer, root_object); + *hash_out = hash_str(*hash_out, buffer.GetString(), buffer.GetSize()); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return RAPIDJSON_VERSION_STRING; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_CXX; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/ubj/ubj-file.c b/src/ubj/ubj-file.c new file mode 100644 index 0000000..1ac79ce --- /dev/null +++ b/src/ubj/ubj-file.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "ubj.h" + +static UBJ_TYPE get_ubj_type(object_t* object) { + switch (object->type) { + case type_nil: return UBJ_NULLTYPE; + case type_bool: return object->b ? UBJ_BOOL_TRUE : UBJ_BOOL_FALSE; + case type_str: return UBJ_STRING; + case type_double: return UBJ_FLOAT64; + + // Note that ubjw_min_integer_type() has a bug where it incorrectly + // excludes the minimum bound of each signed integer type. For example + // ubjw_min_integer_type(INT8_MIN) returns UBJ_INT16, not UBJ_INT8. + // This is contrary to the spec which allows -128 in an int8. + case type_int: return ubjw_min_integer_type(object->i); + case type_uint: return ubjw_min_integer_type(object->u); + + case type_array: return UBJ_ARRAY; + case type_map: return UBJ_OBJECT; + default: break; + } + return UBJ_MIXED; +} + +static bool write_object(ubjw_context_t* dst, object_t* object, bool sized) { + switch (object->type) { + case type_nil: ubjw_write_null (dst); break; + case type_bool: ubjw_write_bool (dst, object->b); break; + case type_double: ubjw_write_float64(dst, object->d); break; + case type_int: ubjw_write_integer(dst, object->i); break; + + case type_str: + ubjw_write_string(dst, object->str); + break; + + case type_uint: + // UBJSON does not support unsigned 64-bit numbers (unless + // they are written as high precision big integers, represented + // by a decimal string.) + if (object->u > INT64_MAX) + return false; + ubjw_write_integer(dst, (int64_t)object->u); + break; + + case type_array: { + UBJ_TYPE type = UBJ_MIXED; + + // in order to try to take advantage as much as possible of ubjson's + // space-saving optimization, we check if each element of the array + // is the same type. (we need all integers to be the same width to + // use an int type, otherwise it would be a waste of space.) + if (sized && object->l >= 2) { + type = get_ubj_type(&object->children[0]); + if (type != UBJ_MIXED) { + for (size_t i = 1; i < object->l; ++i) { + if (get_ubj_type(&object->children[i]) != type) { + type = UBJ_MIXED; + break; + } + } + } + } + + ubjw_begin_array(dst, type, sized ? object->l : 0); + for (size_t i = 0; i < object->l; ++i) + write_object(dst, object->children + i, sized); + ubjw_end(dst); + } break; + + case type_map: { + UBJ_TYPE type = UBJ_MIXED; + + // as above, check if all map values are the same type + if (sized && object->l >= 2) { + type = get_ubj_type(&object->children[1]); + if (type != UBJ_MIXED) { + for (size_t i = 1; i < object->l; ++i) { + if (get_ubj_type(&object->children[i * 2 + 1]) != type) { + type = UBJ_MIXED; + break; + } + } + } + } + + ubjw_begin_object(dst, type, sized ? object->l : 0); + for (size_t i = 0; i < object->l; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + i * 2; + assert(key->type == type_str); + ubjw_write_key(dst, key->str); + + write_object(dst, object->children + i * 2 + 1, sized); + } + ubjw_end(dst); + } break; + + default: + return false; + } + + return true; +} + +bool write_file(size_t object_size, bool sized) { + char filename[64]; + benchmark_filename(filename, sizeof(filename), object_size, BENCHMARK_FORMAT_UBJSON, sized ? NULL : "-unopt"); + + FILE* file = fopen(filename, "wb"); + if (!file) { + fprintf(stderr, "error opening file for writing!\n"); + return false; + } + + // ubjw_open_file() doesn't register any error handler, and + // does not check the return value of calls to fwrite() either. + // it doesn't seem to have any way of handling i/o errors at all, + // such as disk full errors or broken pipes. this seems like + // a serious flaw; there's no way to tell if the data was + // truncated. + ubjw_context_t* dst = ubjw_open_file(file); + + object_t* object = benchmark_object_create(object_size); + bool ok = write_object(dst, object, sized); + object_destroy(object); + + ubjw_close_context(dst); + return ok; +} + +bool setup_test(size_t object_size) { + if (!write_file(object_size, true)) + return false; + if (!write_file(object_size, false)) + return false; + return true; +} + +bool run_test(uint32_t* hash_out) { + return false; +} + +void teardown_test(void) { +} + +bool is_benchmark(void) { + return false; +} + +const char* test_version(void) { + return BENCHMARK_UBJ_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "UBJSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/ubj/ubj-read.c b/src/ubj/ubj-read.c new file mode 100644 index 0000000..250f331 --- /dev/null +++ b/src/ubj/ubj-read.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "ubj.h" + +static char* file_data; +static size_t file_size; + +// A ubjr_array_t or ubjr_object_t contains an array of its values +// stored as a void*. If the array is not of a fixed type, the void* +// is a ubjr_dynamic_t*, since each element in the array stores its +// own type. Otherwise the void pointer is an array of specific +// types (such as int64_t, double, char*, ubjr_array_t, etc.) +// +// In order to handle this, hash_value() takes a void pointer +// and switches on the type to figure out what the void* contains +// (which is very similar to what ubj does internally.) It has a +// special case for UBJ_MIXED when reading values of a mixed +// array or object that figures out the type, offsets to the +// content of the dynamic and recurses into itself to hash it. +// This way an array or object reader can simply pass its own +// fixed type to hash_value(), and if it's mixed, it will sort +// out the type for each element. +// +// This seems like a very unnatural way to access the data, but that's +// pretty much what the library does, and I don't know how to make it +// better. + +static void hash_value(void* data, UBJ_TYPE type, size_t index, uint32_t* hash) { + switch (type) { + case UBJ_MIXED: { + ubjr_dynamic_t* dynamic = &((ubjr_dynamic_t*)data)[index]; + switch (dynamic->type) { + case UBJ_INT8: + case UBJ_UINT8: + case UBJ_INT16: + case UBJ_INT32: + case UBJ_INT64: + // the union in a dynamic doesn't actually contain these types, so + // we can't just cast it and recurse like the others here. if an + // integer is read in a dynamic, the int64_t integer is set to the + // value read in priv_ubjr_pointer_to_dynamic(), but integers can + // be smaller sizes in fixed-type arrays and objects. + *hash = hash_i64(*hash, dynamic->integer); + return; + + case UBJ_FLOAT64: hash_value(&dynamic->real, dynamic->type, 0, hash); return; + case UBJ_STRING: hash_value(&dynamic->string, dynamic->type, 0, hash); return; + case UBJ_ARRAY: hash_value(&dynamic->container_array, dynamic->type, 0, hash); return; + case UBJ_OBJECT: hash_value(&dynamic->container_object, dynamic->type, 0, hash); return; + + default: break; + } + hash_value(NULL, dynamic->type, 0, hash); + return; + } + + case UBJ_NULLTYPE: *hash = hash_nil(*hash); return; + case UBJ_BOOL_TRUE: *hash = hash_bool(*hash, true); return; + case UBJ_BOOL_FALSE: *hash = hash_bool(*hash, false); return; + case UBJ_FLOAT64: *hash = hash_double(*hash, ((double*)data)[index]); return; + + // we assume that the type here comes from a fixed-size array, not from + // a dynamic (which should have been handled above.) + case UBJ_INT8: *hash = hash_i64(*hash, (( int8_t*)data)[index]); return; + case UBJ_UINT8: *hash = hash_i64(*hash, ((uint8_t*)data)[index]); return; + case UBJ_INT16: *hash = hash_i64(*hash, ((int16_t*)data)[index]); return; + case UBJ_INT32: *hash = hash_i64(*hash, ((int32_t*)data)[index]); return; + case UBJ_INT64: *hash = hash_i64(*hash, ((int64_t*)data)[index]); return; + + case UBJ_STRING: + case UBJ_CHAR: { + ubjr_string_t str = ((ubjr_string_t*)data)[index]; + *hash = hash_str(*hash, str, strlen(str)); + return; + } + + case UBJ_ARRAY: { + ubjr_array_t* array = &((ubjr_array_t*)data)[index]; + for (size_t i = 0; i < array->size; ++i) + hash_value(array->values, array->type, i, hash); + *hash = hash_u32(*hash, array->size); + return; + } + + case UBJ_OBJECT: + { + ubjr_object_t* object = &((ubjr_object_t*)data)[index]; + for (size_t i = 0; i < object->size; ++i) { + *hash = hash_str(*hash, object->keys[i], strlen(object->keys[i])); + hash_value(object->values, object->type, i, hash); + } + *hash = hash_u32(*hash, object->size); + return; + } + + default: + break; + } +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + ubjr_context_t* src = ubjr_open_memory((uint8_t*)data, (uint8_t*)(data + file_size)); + ubjr_dynamic_t dynamic = ubjr_read_dynamic(src); + hash_value(&dynamic, UBJ_MIXED, 0, hash_out); + ubjr_cleanup_dynamic(&dynamic); + + // ubjr has a bug where it leaks the userdata. it never calls the + // close_cb, but even if it did, free() in memclose() is + // commented out. this ugly hack frees it manually. + free(((void**)src)[4]); + + ubjr_close_context(src); + benchmark_in_situ_free(data); + + // ubjr_open_memory() sets NULL as the error handling callback. + // I don't know how it is meant to handle errors, and it seems to + // have lots of bugs (for example priv_ubjw_read_integer() returns + // 0 on error, but 0 is also a valid integer.) The return value + // of memread() is also never checked so if the data is truncated + // it just continues using uninitialized memory, which seems to + // easily throw it into an infinite loop or infinite recursion. + // + // For now we'll have to skip error checking, but this seems like + // an unfair advantage for ubj. Data validation is fundamental + // to security and it affects performance. + return true; +} + +bool setup_test(size_t object_size) { + const char* config = + #if BENCHMARK_UBJ_OPTIMIZED + NULL + #else + "-unopt" + #endif + ; + file_data = load_data_file_ex(BENCHMARK_FORMAT_UBJSON, object_size, &file_size, config); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BENCHMARK_UBJ_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "UBJSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/ubj/ubj-write.c b/src/ubj/ubj-write.c new file mode 100644 index 0000000..7f1da08 --- /dev/null +++ b/src/ubj/ubj-write.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "ubj.h" +#include "buffer.h" + +static object_t* root_object; + +// ubj only handles errors via a callback which does not +// pass the userdata back, so the only way to properly handle +// this error is to use a thread-local variable. +static __thread bool error_occurred; +void error_fn(const char* error_msg) { + error_occurred = true; +} + +// ubj doesn't have built-in support for writing to a growable buffer. +static size_t buffer_ubj_write(const void* data, size_t size, size_t count, void* userdata) { + bool ok = buffer_write((buffer_t*)userdata, (const char*)data, count); + + // ubj also ignores the return value of this callback; it doesn't + // actually whether bytes were properly written, so we do it manually. + if (!ok) { + error_fn(""); + return 0; + } + + return count; +} + +#if BENCHMARK_UBJ_OPTIMIZED +static UBJ_TYPE get_ubj_type(object_t* object) { + switch (object->type) { + case type_nil: return UBJ_NULLTYPE; + case type_bool: return object->b ? UBJ_BOOL_TRUE : UBJ_BOOL_FALSE; + case type_str: return UBJ_STRING; + case type_double: return UBJ_FLOAT64; + + // Note that ubjw_min_integer_type() has a bug where it incorrectly + // excludes the minimum bound of each signed integer type. For example: + // ubjw_min_integer_type(INT8_MIN) returns UBJ_INT16, not UBJ_INT8. + // This is contrary to the spec which allows -128 in an int8. + // I don't know why it uses llabs() instead of just checking both + // bounds directly. + case type_int: return ubjw_min_integer_type(object->i); + case type_uint: return ubjw_min_integer_type(object->u); + + case type_array: return UBJ_ARRAY; + case type_map: return UBJ_OBJECT; + default: break; + } + return UBJ_MIXED; +} +#endif + +static bool write_object(ubjw_context_t* dst, object_t* object) { + switch (object->type) { + case type_nil: ubjw_write_null (dst); break; + case type_bool: ubjw_write_bool (dst, object->b); break; + case type_double: ubjw_write_float64(dst, object->d); break; + case type_int: ubjw_write_integer(dst, object->i); break; + + case type_str: + // unfortunately ubj currently only supports null-terminated + // strings. there's a TODO in the code to support a "safe string". + ubjw_write_string(dst, object->str); + break; + + case type_uint: + // UBJSON does not support unsigned 64-bit numbers (unless + // they are written as high precision big integers, represented + // by a decimal string.) The generator limits this to INT64_MAX. + ubjw_write_integer(dst, (int64_t)object->u); + break; + + case type_array: { + UBJ_TYPE type = UBJ_MIXED; + size_t count = 0; + + #if BENCHMARK_UBJ_OPTIMIZED + // in order to try to take advantage as much as possible of ubjson's + // space-saving optimization, we check if each element of the array + // is the same type. (we need all integers to be the same width to + // use an int type, otherwise it would be a waste of space.) + // + // in the real world, you would probably already know whether an + // array or map's children are all the same type, especially in + // a statically typed language. this check is not realistic (which + // is why it's off by default in the results.) + count = object->l; + if (count >= 2) { + type = get_ubj_type(&object->children[0]); + if (type != UBJ_MIXED) { + for (size_t i = 1; i < count; ++i) { + if (get_ubj_type(&object->children[i]) != type) { + type = UBJ_MIXED; + break; + } + } + } + } + #endif + + ubjw_begin_array(dst, type, count); + for (size_t i = 0; i < object->l; ++i) + write_object(dst, object->children + i); + ubjw_end(dst); + } break; + + case type_map: { + UBJ_TYPE type = UBJ_MIXED; + size_t count = 0; + + #if BENCHMARK_UBJ_OPTIMIZED + // as above, check if all map values are the same type + count = object->l; + if (count >= 2) { + type = get_ubj_type(&object->children[1]); + if (type != UBJ_MIXED) { + for (size_t i = 1; i < count; ++i) { + if (get_ubj_type(&object->children[i * 2 + 1]) != type) { + type = UBJ_MIXED; + break; + } + } + } + } + #endif + + ubjw_begin_object(dst, type, count); + for (size_t i = 0; i < object->l; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + i * 2; + assert(key->type == type_str); + ubjw_write_key(dst, key->str); + + write_object(dst, object->children + i * 2 + 1); + } + ubjw_end(dst); + } break; + + default: + return false; + } + + return true; +} + +bool run_test(uint32_t* hash_out) { + buffer_t buffer; + buffer_init(&buffer); + ubjw_context_t* dst = ubjw_open_callback(&buffer, buffer_ubj_write, NULL, error_fn); + + if (!write_object(dst, root_object) || error_occurred) { + buffer_destroy(&buffer); + return false; + } + + *hash_out = hash_str(*hash_out, buffer.data, buffer.count); + buffer_destroy(&buffer); + ubjw_close_context(dst); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BENCHMARK_UBJ_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "UBJSON"; +} + +const char* test_filename(void) { + return __FILE__; +} diff --git a/src/udp-json/json-builder.c b/src/udp-json/json-builder.c new file mode 100644 index 0000000..aebd2c2 --- /dev/null +++ b/src/udp-json/json-builder.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "json-builder.h" + +static object_t* root_object; + +static json_value* create_value(object_t* object) { + switch (object->type) { + case type_bool: return json_boolean_new(object->b ? 1 : 0); + case type_nil: return json_null_new(); + case type_int: return json_integer_new(object->i); + case type_double: + // note: doubles are what causes json-builder to not match the hash + // output value of the other JSON encoders. it uses sprintf %g to + // print doubles which changes the output precision. + return json_double_new(object->d); + + case type_uint: + // Note: The generator limits unsigned int to INT64_MAX, so we won't + // ever hit this error, but we check anyway (because you would need + // to check in your own code if you used u64.) json-builder doesn't + // allow integers outside the range of signed int64. + if (object->u > (uint64_t)INT64_MAX) + return false; + return json_integer_new((int64_t)object->u); + + case type_str: + // we can't use nocopy because it takes ownership of the string. + return json_string_new_length(object->l, object->str); + + case type_array: { + json_value* parent = json_array_new(object->l); + if (!parent) + return NULL; + for (size_t i = 0; i < object->l; ++i) { + json_value* child = create_value(object->children + i); + if (!child || !json_array_push(parent, child)) { + json_builder_free(parent); + return NULL; + } + } + return parent; + } + + case type_map: { + json_value* parent = json_object_new(object->l); + if (!parent) + return NULL; + for (size_t i = 0; i < object->l; ++i) { + // we expect keys to be short strings. we can't use nocopy + // because it takes ownership of the string. + object_t* key = object->children + i * 2; + json_value* child = create_value(object->children + i * 2 + 1); + if (!child || !json_object_push_length(parent, key->l, key->str, child)) { + json_builder_free(parent); + return NULL; + } + } + return parent; + } + + default: + break; + } + return false; +} + +bool run_test(uint32_t* hash_out) { + json_value* value = create_value(root_object); + if (!value) + return false; + + json_serialize_opts opts = {json_serialize_mode_packed, 0, 0}; + size_t len = json_measure_ex(value, opts); + json_char* buf = (json_char*)malloc(len); + if (!buf) { + json_builder_free(value); + return false; + } + json_serialize_ex(buf, value, opts); + + *hash_out = hash_str(*hash_out, (const char*)buf, len); + free(buf); + json_builder_free(value); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BENCHMARK_JSON_BUILDER_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} + diff --git a/src/udp-json/json-parser.c b/src/udp-json/json-parser.c new file mode 100644 index 0000000..b801ebb --- /dev/null +++ b/src/udp-json/json-parser.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "json.h" + +static char* file_data; +static size_t file_size; + +static bool hash_json(json_value* value, uint32_t* hash) { + switch (value->type) { + case json_null: *hash = hash_nil(*hash); return true; + case json_boolean: *hash = hash_bool(*hash, value->u.boolean ? true : false); return true; + case json_double: *hash = hash_double(*hash, value->u.dbl); return true; + case json_string: *hash = hash_str(*hash, value->u.string.ptr, value->u.string.length); return true; + + case json_integer: + // json-parser does not support JSON big integers at all. + // Judging from the code, it looks like it just overflows and + // gives garbage with no error if an integer is outside the + // range of int64_t. + *hash = hash_i64(*hash, value->u.integer); + return true; + + case json_array: { + for (unsigned int i = 0; i < value->u.array.length; ++i) + if (!hash_json(value->u.array.values[i], hash)) + return false; + *hash = hash_u32(*hash, value->u.array.length); + return true; + } + + case json_object: { + for (unsigned int i = 0; i < value->u.object.length; ++i) { + json_object_entry* entry = &value->u.object.values[i]; + *hash = hash_str(*hash, entry->name, entry->name_length); + if (!hash_json(entry->value, hash)) + return false; + } + *hash = hash_u32(*hash, value->u.object.length); + return true; + } + + default: + break; + } + + return false; +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + json_value* root = json_parse(data, file_size); + if (!root) { + benchmark_in_situ_free(data); + return false; + } + + bool ok = hash_json(root, hash_out); + json_value_free(root); + benchmark_in_situ_free(data); + return ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_JSON, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + return BENCHMARK_JSON_PARSER_VERSION; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} + diff --git a/src/yajl/yajl-gen.c b/src/yajl/yajl-gen.c new file mode 100644 index 0000000..9f76c2c --- /dev/null +++ b/src/yajl/yajl-gen.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "yajl/yajl_gen.h" +#include "yajl/yajl_version.h" + +static object_t* root_object; + +static bool gen_object(yajl_gen gen, object_t* object) { + switch (object->type) { + case type_bool: return yajl_gen_bool(gen, object->b) == yajl_gen_status_ok; + case type_nil: return yajl_gen_null(gen) == yajl_gen_status_ok; + case type_int: return yajl_gen_integer(gen, object->i) == yajl_gen_status_ok; + case type_double: return yajl_gen_double(gen, object->d) == yajl_gen_status_ok; + + case type_uint: + // Note: The generator limits unsigned int to INT64_MAX. To print a number + // in the range [INT64_MAX, UINT64_MAX) with YAJL, you would need to convert + // it to string yourself (and parse all numbers from strings yourself as well.) + return yajl_gen_integer(gen, (int64_t)object->u) == yajl_gen_status_ok; + + case type_str: + return yajl_gen_string(gen, (const unsigned char*)object->str, object->l) == yajl_gen_status_ok; + + case type_array: + if (yajl_gen_array_open(gen) != yajl_gen_status_ok) + return false; + for (size_t i = 0; i < object->l; ++i) + if (!gen_object(gen, object->children + i)) + return false; + return yajl_gen_array_close(gen) == yajl_gen_status_ok; + + case type_map: + if (yajl_gen_map_open(gen) != yajl_gen_status_ok) + return false; + for (size_t i = 0; i < object->l; ++i) { + + // we expect keys to be short strings + object_t* key = object->children + i * 2; + if (yajl_gen_string(gen, (const unsigned char*)key->str, key->l) != yajl_gen_status_ok) + return false; + + if (!gen_object(gen, object->children + i * 2 + 1)) + return false; + } + return yajl_gen_map_close(gen) == yajl_gen_status_ok; + + default: + break; + } + return false; +} + +bool run_test(uint32_t* hash_out) { + yajl_gen gen = yajl_gen_alloc(NULL); + + if (!gen_object(gen, root_object)) + return false; + + const unsigned char* buf; + size_t len; + yajl_gen_get_buf(gen, &buf, &len); + *hash_out = hash_str(*hash_out, (const char*)buf, len); + + yajl_gen_free(gen); + return true; +} + +bool setup_test(size_t object_size) { + root_object = benchmark_object_create(object_size); + return true; +} + +void teardown_test(void) { + object_destroy(root_object); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + static char buf[16]; + snprintf(buf, sizeof(buf), "%u.%u.%u", YAJL_MAJOR, YAJL_MINOR, YAJL_MICRO); + return buf; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} + diff --git a/src/yajl/yajl-parse.c b/src/yajl/yajl-parse.c new file mode 100644 index 0000000..7217860 --- /dev/null +++ b/src/yajl/yajl-parse.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "yajl/yajl_parse.h" +#include "yajl/yajl_version.h" + +static char* file_data; +static size_t file_size; + +typedef struct parser_t { + uint32_t hash; + int32_t depth; + uint32_t children[32]; +} parser_t; + +static void parser_init(parser_t* parser, uint32_t initial_value) { + parser->hash = initial_value; + parser->depth = -1; +} + +static int parse_null(void* ctx) { + parser_t* parser = (parser_t*)ctx; + ++parser->children[parser->depth]; + parser->hash = hash_nil(parser->hash); + return 1; +} + +static int parse_boolean(void* ctx, int boolean) { + parser_t* parser = (parser_t*)ctx; + ++parser->children[parser->depth]; + parser->hash = hash_bool(parser->hash, boolean); + return 1; +} + +static int parse_integer(void* ctx, long long val) { + parser_t* parser = (parser_t*)ctx; + ++parser->children[parser->depth]; + parser->hash = hash_i64(parser->hash, val); + return 1; +} + +static int parse_double(void* ctx, double val) { + parser_t* parser = (parser_t*)ctx; + ++parser->children[parser->depth]; + parser->hash = hash_double(parser->hash, val); + return 1; +} + +static int parse_string(void* ctx, const unsigned char* str, size_t len) { + parser_t* parser = (parser_t*)ctx; + ++parser->children[parser->depth]; + parser->hash = hash_str(parser->hash, (const char*)str, len); + return 1; +} + +static int parse_map_key(void* ctx, const unsigned char* str, size_t len) { + parser_t* parser = (parser_t*)ctx; + parser->hash = hash_str(parser->hash, (const char*)str, len); + return 1; +} + +static int parse_start_compound(void* ctx) { + parser_t* parser = (parser_t*)ctx; + ++parser->children[parser->depth]; + if (parser->depth + 1 >= sizeof(parser->children) / sizeof(*parser->children)) + return 0; + ++parser->depth; + parser->children[parser->depth] = 0; + return 1; +} + +static int parse_end_compound(void* ctx) { + parser_t* parser = (parser_t*)ctx; + parser->hash = hash_u32(parser->hash, parser->children[parser->depth]); + --parser->depth; + return 1; +} + +static yajl_callbacks callbacks = { + parse_null, + parse_boolean, + parse_integer, + parse_double, + NULL, + parse_string, + parse_start_compound, + parse_map_key, + parse_end_compound, + parse_start_compound, + parse_end_compound +}; + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + parser_t parser; + parser_init(&parser, *hash_out); + + yajl_handle handle = yajl_alloc(&callbacks, NULL, &parser); + yajl_status status = yajl_parse(handle, (const unsigned char*)file_data, file_size); + if (status == yajl_status_ok) + status = yajl_complete_parse(handle); + yajl_free(handle); + + *hash_out = parser.hash; + benchmark_in_situ_free(data); + return status == yajl_status_ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_JSON, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + static char buf[16]; + snprintf(buf, sizeof(buf), "%u.%u.%u", YAJL_MAJOR, YAJL_MINOR, YAJL_MICRO); + return buf; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} + diff --git a/src/yajl/yajl-tree.c b/src/yajl/yajl-tree.c new file mode 100644 index 0000000..c6adf27 --- /dev/null +++ b/src/yajl/yajl-tree.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2016 Nicholas Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "benchmark.h" +#include "yajl/yajl_tree.h" +#include "yajl/yajl_version.h" + +static char* file_data; +static size_t file_size; + +static bool hash_node(yajl_val node, uint32_t* hash) { + switch (node->type) { + case yajl_t_null: *hash = hash_nil(*hash); return true; + case yajl_t_true: *hash = hash_bool(*hash, true); return true; + case yajl_t_false: *hash = hash_bool(*hash, false); return true; + + case yajl_t_number: + if (YAJL_IS_INTEGER(node)) { + *hash = hash_i64(*hash, YAJL_GET_INTEGER(node)); + return true; + } + if (YAJL_IS_DOUBLE(node)) { + *hash = hash_double(*hash, YAJL_GET_DOUBLE(node)); + return true; + } + // We don't handle big integers. (The benchmark data won't contain + // any, but we have to safely handle it anyway.) + return false; + + case yajl_t_string: { + // It seems YAJL tree strings are only null-terminated; the length is + // not provided anywhere in the node, so we need to strlen it :( + const char* str = YAJL_GET_STRING(node); + *hash = hash_str(*hash, str, strlen(str)); + return true; + } + + case yajl_t_array: { + size_t count = YAJL_GET_ARRAY(node)->len; + for (size_t i = 0; i < count; ++i) + if (!hash_node(YAJL_GET_ARRAY(node)->values[i], hash)) + return false; + *hash = hash_u32(*hash, count); + return true; + } + + case yajl_t_object: { + size_t count = YAJL_GET_OBJECT(node)->len; + for (uint32_t i = 0; i < count; ++i) { + + // we expect keys to be short strings (also null-terminated) + const char* key = YAJL_GET_OBJECT(node)->keys[i]; + *hash = hash_str(*hash, key, strlen(key)); + + if (!hash_node(YAJL_GET_OBJECT(node)->values[i], hash)) + return false; + } + *hash = hash_u32(*hash, count); + return true; + } + + default: + break; + } + + return false; +} + +bool run_test(uint32_t* hash_out) { + char* data = benchmark_in_situ_copy(file_data, file_size); + if (!data) + return false; + + char errbuf[1024]; + yajl_val node = yajl_tree_parse(data, errbuf, sizeof(errbuf)); + if (node == NULL) { + benchmark_in_situ_free(data); + return false; + } + + bool ok = hash_node(node, hash_out); + yajl_tree_free(node); + benchmark_in_situ_free(data); + return ok; +} + +bool setup_test(size_t object_size) { + file_data = load_data_file(BENCHMARK_FORMAT_JSON, object_size, &file_size); + if (!file_data) + return false; + return true; +} + +void teardown_test(void) { + free(file_data); +} + +bool is_benchmark(void) { + return true; +} + +const char* test_version(void) { + static char buf[16]; + snprintf(buf, sizeof(buf), "%u.%u.%u", YAJL_MAJOR, YAJL_MINOR, YAJL_MICRO); + return buf; +} + +const char* test_language(void) { + return BENCHMARK_LANGUAGE_C; +} + +const char* test_format(void) { + return "JSON"; +} + +const char* test_filename(void) { + return __FILE__; +} + diff --git a/tools/results.py b/tools/results.py new file mode 100755 index 0000000..50f035b --- /dev/null +++ b/tools/results.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2015 Nicholas Fraser +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import csv, sys +from functools import reduce +from math import sqrt + +benchmarks_url = 'https://github.com/ludocode/schemaless-benchmarks/blob/master/' + +urlrefs = { + "mpack": "https://github.com/ludocode/mpack", + "cmp": "https://github.com/camgunz/cmp", + "msgpack": "https://github.com/msgpack/msgpack-c", + "rapidjson": "http://rapidjson.org/", + "yajl": "http://lloyd.github.io/yajl/", + "libbson": "https://github.com/mongodb/libbson", + "binn": "https://github.com/liteserver/binn", + "jansson": "http://www.digip.org/jansson/", + "json-parser-lib": "https://github.com/udp/json-parser", + "json-builder-lib": "https://github.com/udp/json-builder", + "ubj": "https://github.com/Steve132/ubj", + "mongo-cxx": "https://github.com/mongodb/mongo-cxx-driver", +} + +info = { # fullname, urlref, config + "mpack-": ["MPack", "mpack", ""], + "mpack-tracking-": ["MPack", "mpack", " \\[tracking]"], + "mpack-utf8-": ["MPack", "mpack", " \\[UTF-8]"], + "cmp-": ["CMP", "cmp", ""], + "msgpack-c-": ["msgpack C", "msgpack", ""], + "msgpack-cpp-": ["msgpack C++", "msgpack", ""], + "rapidjson-": ["RapidJSON", "rapidjson", ""], + "rapidjson-insitu-": ["RapidJSON", "rapidjson", " \\[in-situ]"], + "yajl-": ["YAJL", "yajl", ""], + "libbson-": ["libbson", "libbson", ""], + "binn-": ["Binn", "binn", ""], + "jansson-": ["Jansson", "jansson", ""], + "jansson-ordered-": ["Jansson", "jansson", " \\[ordered]"], + "json-parser": ["json-parser", "json-parser-lib", ""], + "json-builder": ["json-builder", "json-builder-lib", ""], + "ubj-": ["ubj", "ubj", ""], + "ubj-opt-": ["ubj", "ubj", " \\[optimized]"], + "mongo-cxx-": ["MongoDB Legacy", "mongo-cxx", ""], +} + +size_results = len(sys.argv) > 1 and sys.argv[1] == "size" +extended = len(sys.argv) > 2 and sys.argv[2] == "extended" + +show_overhead = extended +show_language = extended +show_hash = extended +show_stdev = extended +show_benchmark = not extended + +if show_overhead: + hash_footnote = """ +_The Time column shows the total time taken to hash the expected output of the library in the test (the expected objects for Tree and Incremental tests, or a chunk of bytes roughly the size of encoded data for the Write tests.) The Code Size column shows the total code size of the benchmark harness, including object generation code (which is included in all tests.)_ +""" + write_footnote = """ +_The Time and Code Size columns show the net result after subtracting the hash-data time and size. The Time Overhead column shows the total time of the benchmark divided by the total time of hash-data. In all three columns, lower is better._ +""" + read_footnote = """ +_The Time and Code Size columns show the net result after subtracting the hash-object time and size. The Time Overhead column shows the total time of the benchmark divided by the total time of hash-object. In all three columns, lower is better._ +""" +else: + hash_footnote = """ +_The Time column shows the total time taken to hash the expected output of the library in the test (the expected objects for Tree and Incremental tests, or a chunk of bytes roughly the size of encoded data for the Write tests.) The Code Size column shows the total code size of the benchmark harness, including hashing and object generation code (which is included in all tests.)_ +""" + write_footnote = """ +_The Time and Code Size columns show the net result after subtracting the hash-data time and size. In both columns, lower is better._ +""" + read_footnote = """ +_The Time and Code Size columns show the net result after subtracting the hash-object time and size. In both columns, lower is better._ +""" + +csvname = 'results.csv' + +NAME, LANGUAGE, VERSION, FILE, FORMAT, OBJECT_SIZE, TIME, BINARY_SIZE, SIZE_OPTIMIZED, HASH = range(10) + +# data[size][name] +data = {} +for i in range(1,6): + data[i] = {} + +# collect data in csv +with open(csvname) as csvfile: + reader = csv.reader(csvfile) + for row in reader: + if not size_results == int(row[SIZE_OPTIMIZED]): + continue + + sizedata = data[int(row[OBJECT_SIZE])] + name = row[NAME] + + # add the row once, but replace the time with a list containing all times found + if name in sizedata: + if sizedata[name][BINARY_SIZE] != row[BINARY_SIZE]: + raise Exception("row code size does not match! did you 'make clean'?\nnew row: " + + str(row) + "\nexisting row: " + str(sizedata[name])) + sizedata[name][TIME].append(float(row[TIME])) + else: + row[TIME] = [float(row[TIME])] + sizedata[name] = row + +def printheader(test): + print() + print(test) + print() + header = '| Library |' + divider = '|----|' + if show_benchmark: + header += ' Benchmark |' + divider += '----|' + + header += ' Format | Time
(μs)%s | Code Size
(bytes)%s |' + header = header % (size_results and ("", " ▲") or (" ▲", "")) + divider += '----|---:|---:|' + + if show_overhead: + header += ' Time
Overhead |' + divider += '---:|' + if show_hash: + header += ' Hash |' + divider += '---:|' + + print(header) + print(divider) + +def rowtime(row): + + # copy list + times = list(row[TIME]) + + # drop highest and lowest + if len(times) > 6: + times.remove(max(times)) + times.remove(min(times)) + + # calculate mean and stdev + count = len(times) + mean = sum(times) / count + if count > 1: + sumsqr = reduce(lambda x, y: x + pow(y - mean, 2), times, 0) + stdev = sqrt(sumsqr / (count - 1)) + else: + stdev = mean + + return mean, stdev + +def rowstring(net, stdev): + if show_stdev: + ## pad the stdev with U+2007 FIGURE SPACE to better align results + #stdevstr = '%.2f' % stdev + #return '%.2f %s± %s' % (net, '\u2007' * (5 - len(stdevstr)), stdevstr) + return '%.2f ± %.2f' % (net, stdev) + return '%.2f' % net + +def rowtimestr(row, sub): + time, timedev = rowtime(row) + subtime, subdev = rowtime(sub) + net = time - subtime + stdev = sqrt(pow(timedev, 2) + pow(subdev, 2)) + return rowstring(net, stdev) + +def rowoverhead(row, sub): + time, timedev = rowtime(row) + subtime, subdev = rowtime(sub) + overhead = time / subtime + stdev = overhead * sqrt(pow(timedev / time, 2) + pow(subdev / subtime, 2)) + return rowstring(overhead, stdev) + +def addrow(rows, sizedata, name, write): + if name not in sizedata: + return + row = sizedata[name] + sub = sizedata[write and "hash-data" or "hash-object"] + + size = int(row[BINARY_SIZE]) - int(sub[BINARY_SIZE]) + + time = rowtime(row)[0] + timestr = rowtimestr(row, sub) + overheadstr = rowoverhead(row, sub) + + filename = row[FILE].split('/')[-1] + version = row[VERSION] + + fullname = row[NAME] + urlref = "" + config = "" + match = 0 + for key, value in info.items(): + if row[NAME].startswith(key) and len(key) > match: + match = len(key) + fullname = value[0] + urlref = value[1] + config = value[2] + + language = "" + if show_language: + language = row[LANGUAGE] + + if show_benchmark: + p = '| [%s][%s] (%s)%s | [%s][%s]%s |' % (fullname, urlref, version, config, filename, name, show_language and (" " + language) or "") + else: + p = '| [%s][%s] (%s)%s [(%s)][%s] |' % (fullname, urlref, version, config, language, name) + + p += ' %s | %s | %i |' % \ + (row[FORMAT], timestr, size) + if show_overhead: + p += ' %s |' % overheadstr + if show_hash: + p += ' %s |' % row[HASH] + rows.append([size_results and size or time, p]) + +def printrows(rows): + for row in sorted(rows): + print(row[1]) + print() + +def printhashrow(sizedata, name, desc, purpose): + row = sizedata[name] + time = rowstring(*rowtime(row)) + size = int(row[BINARY_SIZE]) + filename = row[FILE].split('/')[-1] + print('| [%s][%s] | %s | %i | %s |' % (filename, name, time, size, purpose)) + +# print link refs +print() +for name, url in urlrefs.items(): + print("[%s]: %s" % (name, url)) +print() +printed = [] +for size in range(1,6): + sizedata = data[size] + if len(sizedata) == 0: + continue + for name, row in sizedata.items(): + if name in printed: + continue + printed.append(name) + print("[%s]: %s" % (name, benchmarks_url + row[FILE])) +print() + +for size in range(1,6): + sizedata = data[size] + if len(sizedata) == 0: + continue + + if size == 1: + print("## Smallest Data") + if size == 2: + print("## Small Data") + if size == 3: + print("## Medium Data") + if size == 4: + print("## Large Data") + if size == 5: + print("## Largest Data") + print() + + print() + print("### Hash Comparisons") + print() + print('| Benchmark | Time
(μs) | Code Size
(bytes) | Comparison |') + print('|----|---:|---:|----|') + printhashrow(sizedata, 'hash-data', 'Data Hash', 'subtracted from Write tests') + printhashrow(sizedata, 'hash-object', 'Object Hash', 'subtracted from Tree and Incremental tests') + print() + print(hash_footnote) + print() + + rows = [] + addrow(rows, sizedata, 'mpack-write', True) + addrow(rows, sizedata, 'cmp-write', True) + addrow(rows, sizedata, 'msgpack-cpp-pack', True) + addrow(rows, sizedata, 'msgpack-c-pack', True) + addrow(rows, sizedata, 'rapidjson-write', True) + addrow(rows, sizedata, 'yajl-gen', True) + addrow(rows, sizedata, 'jansson-dump', True) + addrow(rows, sizedata, 'libbson-append', True) + addrow(rows, sizedata, 'mongo-cxx-builder', True) + if extended: + addrow(rows, sizedata, 'binn-write', True) + addrow(rows, sizedata, 'json-builder', True) + addrow(rows, sizedata, 'ubj-write', True) + addrow(rows, sizedata, 'ubj-opt-write', True) + addrow(rows, sizedata, 'mpack-tracking-write', True) + addrow(rows, sizedata, 'jansson-ordered-dump', True) + if len(rows) > 0: + printheader('### Write Test') + printrows(rows) + print() + print(write_footnote) + print() + + rows = [] + addrow(rows, sizedata, 'mpack-node', False) + addrow(rows, sizedata, 'msgpack-c-unpack', False) + addrow(rows, sizedata, 'msgpack-cpp-unpack', False) + addrow(rows, sizedata, 'rapidjson-insitu-dom', False) + addrow(rows, sizedata, 'yajl-tree', False) + addrow(rows, sizedata, 'jansson-load', False) + addrow(rows, sizedata, 'libbson-iter', False) + addrow(rows, sizedata, 'mongo-cxx-obj', False) + if extended: + addrow(rows, sizedata, 'binn-load', False) + addrow(rows, sizedata, 'json-parser', False) + addrow(rows, sizedata, 'mpack-utf8-node', False) + addrow(rows, sizedata, 'rapidjson-dom', False) + addrow(rows, sizedata, 'jansson-ordered-load', False) + addrow(rows, sizedata, 'ubj-read', False) + addrow(rows, sizedata, 'ubj-opt-read', False) + if len(rows) > 0: + printheader('### Tree Test') + printrows(rows) + print() + print(read_footnote) + print() + + rows = [] + addrow(rows, sizedata, 'mpack-read', False) + addrow(rows, sizedata, 'cmp-read', False) + addrow(rows, sizedata, 'rapidjson-insitu-sax', False) + addrow(rows, sizedata, 'yajl-parse', False) + addrow(rows, sizedata, 'libbson-iter', False) + addrow(rows, sizedata, 'mongo-cxx-obj', False) + if extended: + addrow(rows, sizedata, 'binn-load', False) + addrow(rows, sizedata, 'mpack-utf8-read', False) + addrow(rows, sizedata, 'mpack-tracking-read', False) + addrow(rows, sizedata, 'rapidjson-sax', False) + if len(rows) > 0: + printheader('### Incremental Parse Test') + printrows(rows) + print() + print(read_footnote) + print() +