Skip to content

Commit b223f55

Browse files
committed
Merge: Json benchmark
Added a JSON parser benchmark between different languages and Nit using 3 variants: * Nit/NitCC: The old parser relying on NitCC, which is slow and memory-consuming (more than 6 Gio RAM for the 100Mio escaping-intensive file) * Nit/Ad-hoc UTF-8 no ropes: The new parser working exclusively on `FlatString` * Nit/Ad-hoc UTF-8 with ropes: The new parser with a mix of `Concat` and `FlatString` ![vr5fa](https://cloud.githubusercontent.com/assets/1444825/11787549/4375a4e6-a25a-11e5-87b3-ac4346dee3bd.jpg) I hear you all clamouring, well, here are the results (after #1885 and #1887, naturally): ![output](https://cloud.githubusercontent.com/assets/1444825/11787622/b24c0c98-a25a-11e5-8cff-0e0afe03c9d8.png) So yeah, I guess we could do better when it comes to escaping since the biggest difference in runtime is in the `large_escape` benchmark which coincidentally contains mostly `\uXXXX` characters. Other than that, we do as well as Go and better than Ruby (also worse than Python, but this does not count), which is nice. About the inputs: * large_escaped is an unusual file since it contains large strings with lots of unicode escaping sequences which should highlight the handling of String-to-Int conversions and Unicode-escape-sequences-to-UTF-8-characters, and it is big, as in very big (94.7 Mio) * magic, a normally-formatted 54 Mio JSON file with quite a bunch of Unicode characters * gov_data, a 6.9 Mio JSON file with ASCII characters only * twitter, a 64 kio JSON file with a lot of japanese characters I might add some more files later to better represent the variety of inputs, but right now is a good time to push the benchmark suite, enjoy ! Note: Since the ad-hoc JSON parser is benched, #1886 will need to be merged prior to this one if the bench is to work on your machines Pull-Request: #1895 Reviewed-by: Jean Privat <jean@pryen.org>
2 parents caeeea7 + 958bf20 commit b223f55

File tree

14 files changed

+314
-0
lines changed

14 files changed

+314
-0
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "ujson4c"]
2+
path = benchmarks/json/thirdparty/ujson4c
3+
url = https://github.com/esnme/ujson4c

benchmarks/json/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
all:
2+
./bench_json.sh

benchmarks/json/bench_json.sh

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/bin/bash
2+
# This file is part of NIT ( http://www.nitlanguage.org ).
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# Shell script to bench json parsers over different documents
17+
18+
source ../bench_common.sh
19+
source ../bench_plot.sh
20+
21+
## CONFIGURATION OPTIONS ##
22+
23+
# Default number of times a command must be run with bench_command
24+
# Can be overrided with 'the option -n'
25+
count=5
26+
27+
## HANDLE OPTIONS ##
28+
29+
function init_repo()
30+
{
31+
echo "Preparing submodules"
32+
git submodule update --init
33+
echo "Submodules ready"
34+
mkdir -p inputs
35+
echo "Preparing data for benchmarks"
36+
if [ ! -e inputs/large_escaped.json ]; then
37+
echo "Downloading file 1/4"
38+
wget -O inputs/large_escaped.json https://github.com/seductiveapps/largeJSON/blob/master/100mb.json?raw=true
39+
fi
40+
echo "File 1/4 ready"
41+
if [ ! -e inputs/magic.json ]; then
42+
echo "Downloading file 2/4"
43+
wget -O inputs/magic.json http://mtgjson.com/json/AllSets-x.json
44+
fi
45+
echo "File 2/4 ready"
46+
if [ ! -e inputs/big_twitter.json ]; then
47+
echo "Downloading file 3/4"
48+
wget -O inputs/twitter.json https://github.com/miloyip/nativejson-benchmark/raw/master/data/twitter.json
49+
cd inputs
50+
./multiply_twitter.sh
51+
rm twitter.json
52+
cd ..
53+
fi
54+
echo "File 3/4 ready"
55+
if [ ! -e inputs/big_gov_data.json ]; then
56+
echo "Downloading file 4/4"
57+
wget -O inputs/gov_data.json https://edg.epa.gov/data.json
58+
cd inputs
59+
./multiply_gov.sh
60+
rm gov_data.json
61+
cd ..
62+
fi
63+
echo "File 4/4 ready"
64+
}
65+
66+
function usage()
67+
{
68+
echo "run_bench: ./bench_json.sh [options]"
69+
echo " -v: verbose mode"
70+
echo " -n count: number of execution for each bar (default: $count)"
71+
echo " -h: this help"
72+
}
73+
74+
stop=false
75+
while [ "$stop" = false ]; do
76+
case "$1" in
77+
-v) verbose=true; shift;;
78+
-h) usage; exit;;
79+
-n) count="$2"; shift; shift;;
80+
*) stop=true
81+
esac
82+
done
83+
84+
init_repo
85+
86+
mkdir -p out
87+
88+
echo "Compiling engines"
89+
90+
echo "C JSON Parser"
91+
92+
gcc -O2 -I thirdparty/ujson4c/src -I thirdparty/ujson4c/3rdparty/ thirdparty/ujson4c/3rdparty/ultrajsondec.c scripts/c_parser.c -o scripts/c_parser -lm
93+
94+
echo "Go JSON Parser"
95+
96+
go build -o scripts/json_parse scripts/json_parse.go
97+
98+
echo "Nit/NitCC Parser"
99+
100+
nitc --semi-global scripts/nitcc_parser.nit -o scripts/nitcc_parser
101+
102+
echo "Nit/Ad-Hoc UTF-8 Parser, No Ropes"
103+
104+
nitc --semi-global scripts/nit_adhoc_utf_noropes.nit -o scripts/nit_adhoc_utf_noropes
105+
106+
echo "Nit/Ad-Hoc UTF-8 Parser, With Ropes"
107+
108+
nitc --semi-global scripts/nit_adhoc_utf_ropes.nit -o scripts/nit_adhoc_utf_ropes
109+
110+
declare -a script_names=('C' 'Python 3' 'Python 2' 'Go' 'Nit Ad-hoc UTF-8 No Ropes' 'Nit Ad-hoc UTF-8 + Ropes' 'Ruby ext')
111+
declare -a script_cmds=('./scripts/c_parser' 'python3 scripts/python.py' 'python2 scripts/python.py' './scripts/json_parse' './scripts/nit_adhoc_utf_noropes' './scripts/nit_adhoc_utf_ropes' 'ruby scripts/json_ext.rb')
112+
113+
for script in `seq 1 ${#script_cmds[@]}`; do
114+
echo "Preparing res for ${script_names[$script - 1]}"
115+
prepare_res "./out/${script_names[$script - 1]}.dat" "${script_names[$script - 1]}" "${script_names[$script - 1]}"
116+
for file in inputs/*.json; do
117+
fname=`basename $file .json`
118+
bench_command $file "Benching file $file using ${script_cmds[$script - 1]} parser" ${script_cmds[$script - 1]} $file
119+
done;
120+
done;
121+
122+
rm scripts/nitcc_parser
123+
rm scripts/json_parse
124+
rm scripts/c_parser
125+
rm scripts/nit_adhoc_utf_noropes
126+
rm scripts/nit_adhoc_utf_ropes
127+
128+
plot out/bench_json.gnu
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
# This file is part of NIT ( http://www.nitlanguage.org ).
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
echo "[" > big_gov_data.json
17+
for i in $(seq 10); do
18+
test "$i" != "1" && echo "," >> big_gov_data.json
19+
cat gov_data.json >> big_gov_data.json
20+
done
21+
echo "]" >> big_gov_data.json
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
# This file is part of NIT ( http://www.nitlanguage.org ).
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
echo "[" > big_twitter.json
17+
for i in $(seq 100); do
18+
test "$i" != "1" && echo "," >> big_twitter.json
19+
cat twitter.json >> big_twitter.json
20+
done
21+
echo "]" >> big_twitter.json

benchmarks/json/scripts/c_parser.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#include "ujdecode.c"
2+
#include <stdlib.h>
3+
#include <stdio.h>
4+
#include <sys/stat.h>
5+
#include <sys/types.h>
6+
#include <fcntl.h>
7+
#include <unistd.h>
8+
#include <assert.h>
9+
10+
// Gets the byte size of a file
11+
int file_byte_size(char* path)
12+
{
13+
int f = open(path, O_RDONLY);
14+
if(f == -1)
15+
return -1;
16+
struct stat s;
17+
int ln = 0;
18+
if(!fstat(f, &s))
19+
ln = s.st_size;
20+
close(f);
21+
return ln;
22+
}
23+
24+
int main(int argc, char** argv)
25+
{
26+
if(argc == 1) {
27+
printf("Usage: ./c_parser file\n");
28+
exit(1);
29+
}
30+
31+
int fl_sz = file_byte_size(argv[1]);
32+
char* input = malloc(fl_sz);
33+
34+
FILE* ifl = fopen(argv[1], "r");
35+
if(ifl == NULL) {
36+
printf("Error: cannot read file %s, are you sure permissions are set correctly ?\n", argv[1]);
37+
exit(2);
38+
}
39+
int rd = fread(input, 1, fl_sz, ifl);
40+
assert(rd == fl_sz);
41+
42+
void *state;
43+
44+
UJObject obj = UJDecode(input, fl_sz, NULL, &state);
45+
46+
free(input);
47+
UJFree(state);
48+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
require 'json/ext'
2+
3+
txt = IO.read(ARGV.first)
4+
my_hash = JSON.parse(txt)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package main
2+
3+
import "io/ioutil"
4+
import "encoding/json"
5+
import "os"
6+
import "fmt"
7+
8+
func main() {
9+
if len(os.Args) == 1 {
10+
fmt.Println("Usage ./json_parse file")
11+
os.Exit(-1)
12+
}
13+
dat, err := ioutil.ReadFile(os.Args[1])
14+
if err != nil { panic(err) }
15+
16+
var obj interface{}
17+
18+
jsonerr := json.Unmarshal(dat, &obj)
19+
if jsonerr != nil { panic(jsonerr) }
20+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
require 'json/pure'
2+
3+
txt = IO.read(ARGV.first)
4+
my_hash = JSON.parse(txt)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# This file is part of NIT ( http://www.nitlanguage.org ).
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import json::string_parser
16+
17+
var text = args.first.to_path.read_all_bytes.to_s
18+
var json = text.parse_json

0 commit comments

Comments
 (0)