Skip to content

Commit 4fab6c1

Browse files
committed
Add RE2 and TCL to the benchmark harness.
This also adds a new utility, regex-run-one, to the benchmark suite. This utility is a CLI tool that lets one count the number of regex matches for any of the regex engines in the benchmark harness. e.g., regex-run-one tcl '\w{5}z\w{5}' my-file Will count the number of times the regex '\w{5}z\w{5}' matches in my-file. Supported engines are: pcre1, pcre2, onig, re2, rust, rust-bytes and tcl.
1 parent 4332c9c commit 4fab6c1

28 files changed

+2131
-563
lines changed

.travis.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ script:
2020
- if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
2121
travis_wait ./run-bench rust;
2222
travis_wait ./run-bench rust-bytes --no-run;
23-
travis_wait ./run-bench rust-plugin --no-run;
24-
travis_wait ./run-bench pcre --no-run;
23+
travis_wait ./run-bench pcre1 --no-run;
2524
travis_wait ./run-bench onig --no-run;
2625
travis_wait cargo test --verbose --manifest-path=regex_macros/Cargo.toml;
2726
fi

benches/Cargo.toml

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,74 +8,58 @@ repository = "https://github.com/rust-lang/regex"
88
documentation = "http://doc.rust-lang.org/regex/regex_syntax/index.html"
99
homepage = "https://github.com/rust-lang/regex"
1010
description = "Regex benchmarks for Rust's and other engines."
11-
1211
build = "build.rs"
1312

1413
[dependencies]
14+
docopt = "0.6"
1515
lazy_static = "0.1"
1616
libc = "0.2"
1717
onig = { version = "0.4", optional = true }
1818
libpcre-sys = { version = "0.2", optional = true }
19+
memmap = "0.2"
1920
regex = { version = "0.1", path = ".." }
2021
regex_macros = { version = "0.1", path = "../regex_macros", optional = true }
2122
regex-syntax = { version = "0.3", path = "../regex-syntax" }
23+
rustc-serialize = "0.3"
2224

2325
[build-dependencies]
26+
gcc = "0.3"
2427
pkg-config = "0.3"
2528

29+
[[bin]]
30+
name = "regex-run-one"
31+
path = "src/main.rs"
32+
bench = false
33+
2634
# Use features to conditionally compile benchmarked regexes, since not every
2735
# regex works on every engine. Additionally, it is useful to be able to build
2836
# each benchmark individually, so that not all dependencies are required to
2937
# run only one benchmark.
38+
#
39+
# Note that when running benchmarks, only ONE feature should be set at a time.
40+
# Doing anything else will probably result in weird "duplicate definition"
41+
# compiler errors.
42+
#
43+
# Tip: use the run-bench script in the root of this repository to run
44+
# benchmarks.
3045
[features]
31-
re-pcre = ["libpcre-sys"]
46+
re-pcre1 = ["libpcre-sys"]
3247
re-pcre2 = []
3348
re-onig = ["onig"]
49+
re-re2 = []
3450
re-rust = []
3551
re-rust-bytes = []
3652
re-rust-plugin = ["regex_macros"]
53+
re-tcl = []
3754

38-
# Run the benchmarks on the default behavior of Regex::new.
39-
[[bench]]
40-
name = "rust"
41-
path = "src/bench_rust.rs"
42-
test = false
43-
bench = true
44-
45-
# Run the benchmarks on the default behavior of bytes::Regex::new.
46-
[[bench]]
47-
name = "rust-bytes"
48-
path = "src/bench_rust_bytes.rs"
49-
test = false
50-
bench = true
51-
52-
# Run the benchmarks on the default behavior of the `regex!` compiler plugin.
53-
[[bench]]
54-
name = "rust-plugin"
55-
path = "src/bench_rust_plugin.rs"
56-
test = false
57-
bench = true
58-
59-
# Run the benchmarks on PCRE.
6055
[[bench]]
61-
name = "pcre"
62-
path = "src/bench_pcre.rs"
56+
name = "bench"
57+
path = "src/bench.rs"
6358
test = false
6459
bench = true
6560

66-
# Run the benchmarks on PCRE2.
67-
[[bench]]
68-
name = "pcre2"
69-
path = "src/bench_pcre2.rs"
70-
test = false
71-
bench = true
72-
73-
# Run the benchmarks on Oniguruma.
74-
[[bench]]
75-
name = "onig"
76-
path = "src/bench_onig.rs"
77-
test = false
78-
bench = true
61+
[profile.release]
62+
debug = true
7963

8064
[profile.bench]
8165
debug = true

benches/build.rs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
extern crate gcc;
1112
extern crate pkg_config;
1213

1314
use std::env;
1415
use std::process;
1516

16-
use pkg_config::Config;
17-
1817
macro_rules! we {
1918
($($tt:tt)*) => {{
2019
use std::io::Write;
@@ -23,11 +22,37 @@ macro_rules! we {
2322
}
2423

2524
fn main() {
25+
// We only need to look for PCRE2 and RE2 because we roll the FFI bindings
26+
// for those libraries ourselves from scratch. For PCRE1 and Oniguruma, we
27+
// rely on other crates that do something similar to the dance below for
28+
// us.
29+
2630
let wants_pcre2 = env::var("CARGO_FEATURE_RE_PCRE2").is_ok();
27-
let has_pcre2 =
28-
Config::new().atleast_version("10.21").find("libpcre2-8").is_ok();
31+
let has_pcre2 = pkg_config::Config::new().find("libpcre2-8").is_ok();
2932
if wants_pcre2 && !has_pcre2 {
3033
we!("pcre2 cannot be found by pkg-config");
3134
process::exit(1);
3235
}
36+
37+
let wants_re2 = env::var("CARGO_FEATURE_RE_RE2").is_ok();
38+
let has_re2 = pkg_config::Config::new().find("re2").is_ok();
39+
if wants_re2 {
40+
if !has_re2 {
41+
we!("re2 cannot be found by pkg-config");
42+
process::exit(1);
43+
}
44+
gcc::Config::new()
45+
.cpp(true)
46+
.flag("-std=c++11")
47+
.file("src/ffi/re2.cpp")
48+
.compile("libcre2.a");
49+
println!("cargo:rustc-link-lib=re2");
50+
}
51+
52+
let wants_tcl = env::var("CARGO_FEATURE_RE_TCL").is_ok();
53+
let has_tcl = pkg_config::Config::new().find("tcl").is_ok();
54+
if wants_tcl && !has_tcl {
55+
we!("tcl cannot be found by pkg-config");
56+
process::exit(1);
57+
}
3358
}

benches/compile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/sh
2+
3+
exec cargo build \
4+
--release \
5+
--features 're-onig re-pcre1 re-pcre2 re-re2 re-rust re-rust-bytes re-tcl' \
6+
"$@"

benches/log/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tmp

benches/run

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/bin/bash
2+
3+
usage() {
4+
echo "Usage: $(basename $0) [rust | rust-bytes | rust-plugin | pcre1 | pcre2 | re2 | onig | tcl ]" >&2
5+
exit 1
6+
}
7+
8+
if [ $# = 0 ] || [ $1 = '-h' ] || [ $1 = '--help' ]; then
9+
usage
10+
fi
11+
12+
which="$1"
13+
shift
14+
case $which in
15+
rust)
16+
exec cargo bench --bench bench --features re-rust "$@"
17+
;;
18+
rust-bytes)
19+
exec cargo bench --bench bench --features re-rust-bytes "$@"
20+
;;
21+
rust-plugin)
22+
exec cargo bench --bench bench --features re-rust-plugin "$@"
23+
;;
24+
re2)
25+
exec cargo bench --bench bench --features re-re2 "$@"
26+
;;
27+
pcre1)
28+
exec cargo bench --bench bench --features re-pcre1 "$@"
29+
;;
30+
pcre2)
31+
exec cargo bench --bench bench --features re-pcre2 "$@"
32+
;;
33+
onig)
34+
exec cargo bench --bench bench --features re-onig "$@"
35+
;;
36+
tcl)
37+
exec cargo bench --bench bench --features re-tcl "$@"
38+
;;
39+
*)
40+
usage
41+
;;
42+
esac

0 commit comments

Comments
 (0)