Skip to content

Commit 351d788

Browse files
author
Martin Buchholz
committed
8259074: regex benchmarks and tests
Reviewed-by: redestad
1 parent d6d5d9b commit 351d788

File tree

5 files changed

+476
-10
lines changed

5 files changed

+476
-10
lines changed

test/jdk/java/util/regex/TestCases.txt

+43-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// A test case consists of three lines:
2626
// The first line is a pattern used in the test
2727
// The second line is the input to search for the pattern in
28-
// The third line is a concatentation of the match, the number of groups,
28+
// The third line is a concatenation of the match, the number of groups,
2929
// and the contents of the first four subexpressions.
3030
// Empty lines and lines beginning with comment slashes are ignored.
3131
//
@@ -1231,3 +1231,45 @@ true 1
12311231
(|f){0,1}+
12321232
foo
12331233
true 1
1234+
1235+
//----------------------------------------------------------------
1236+
// Unary numeral primality testing
1237+
//----------------------------------------------------------------
1238+
1239+
// Input is 7 (a prime), in unary; reluctant quantifier
1240+
^(11+?)\1+$
1241+
1111111
1242+
false 1
1243+
1244+
^(1{2,}?)\1+$
1245+
1111111
1246+
false 1
1247+
1248+
// Input is 8 (a power of two), in unary; reluctant quantifier
1249+
// group is shortest possible (2)
1250+
^(11+?)\1+$
1251+
11111111
1252+
true 11111111 1 11
1253+
1254+
^(1{2,}?)\1+$
1255+
11111111
1256+
true 11111111 1 11
1257+
1258+
// Input is 7 (a prime), in unary; greedy quantifier
1259+
^(11+)\1+$
1260+
1111111
1261+
false 1
1262+
1263+
^(1{2,})\1+$
1264+
1111111
1265+
false 1
1266+
1267+
// Input is 8 (a power of two), in unary; greedy quantifier
1268+
// group is longest possible (4)
1269+
^(11+)\1+$
1270+
11111111
1271+
true 11111111 1 1111
1272+
1273+
^(1{2,})\1+$
1274+
11111111
1275+
true 11111111 1 1111

test/micro/org/openjdk/bench/java/lang/ArrayFiddle.java

+5-9
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,7 @@
2222
*/
2323
package org.openjdk.bench.java.lang;
2424

25-
import org.openjdk.jmh.annotations.Benchmark;
26-
import org.openjdk.jmh.annotations.BenchmarkMode;
27-
import org.openjdk.jmh.annotations.Mode;
28-
import org.openjdk.jmh.annotations.OutputTimeUnit;
29-
import org.openjdk.jmh.annotations.Param;
30-
import org.openjdk.jmh.annotations.Scope;
31-
import org.openjdk.jmh.annotations.Setup;
32-
import org.openjdk.jmh.annotations.State;
25+
import org.openjdk.jmh.annotations.*;
3326

3427
import java.util.Arrays;
3528
import java.util.concurrent.ThreadLocalRandom;
@@ -60,9 +53,12 @@
6053
* This benchmark is great for measuring cache effects, e.g. size=10^6 has 5x
6154
* the per-element cost of size=10^3 (See "The Myth of RAM".)
6255
*
63-
* (cd $(hg root) && for size in 3 16 999 999999; do make test TEST="micro:java.lang.ArrayFiddle" MICRO="FORK=2;WARMUP_ITER=4;ITER=4;OPTIONS=-opi $size -p size=$size" |& perl -ne 'print if /^Benchmark/ .. /^Finished running test/'; done)
56+
* (cd $(git rev-parse --show-toplevel) && for size in 3 16 999 999999; do make test TEST='micro:java.lang.ArrayFiddle' MICRO="FORK=2;WARMUP_ITER=4;ITER=4;OPTIONS=-opi $size -p size=$size" |& perl -ne 'print if /^Benchmark/ .. /^Finished running test/'; done)
6457
*/
6558
@BenchmarkMode(Mode.AverageTime)
59+
@Fork(2)
60+
@Warmup(iterations = 1)
61+
@Measurement(iterations = 4)
6662
@OutputTimeUnit(TimeUnit.NANOSECONDS)
6763
@State(Scope.Benchmark)
6864
public class ArrayFiddle {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* Copyright 2020 Google Inc. All Rights Reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
package org.openjdk.bench.java.util.regex;
24+
25+
import org.openjdk.jmh.annotations.*;
26+
27+
import java.util.concurrent.TimeUnit;
28+
import java.util.regex.Matcher;
29+
import java.util.regex.Pattern;
30+
31+
/**
32+
* Benchmarks of Patterns that exhibit O(2^N) performance due to catastrophic
33+
* backtracking, **when implemented naively**.
34+
*
35+
* See: jdk/test/java/util/regex/RegExTest.java#expoBacktracking
36+
* commit b45ea8903ec290ab194d9ebe040bc43edd5dd0a3
37+
* Author: Xueming Shen <sherman@openjdk.org>
38+
* Date: Tue May 10 21:19:25 2016 -0700
39+
*
40+
* Here's a way to compare the per-char cost:
41+
*
42+
* (cd $(git rev-parse --show-toplevel) && for size in 16 128 1024; do make test TEST='micro:java.util.regex.Exponential' MICRO="FORK=1;WARMUP_ITER=1;ITER=4;OPTIONS=-opi $size -p size=$size" |& perl -ne 'print if /^Benchmark/ .. /^Finished running test/'; done)
43+
*
44+
*/
45+
@BenchmarkMode(Mode.AverageTime)
46+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
47+
@Fork(1)
48+
@Warmup(iterations = 1)
49+
@Measurement(iterations = 4)
50+
@State(Scope.Benchmark)
51+
public class Exponential {
52+
/** Run length of non-matching consecutive whitespace chars. */
53+
@Param({"16", "128", "1024"})
54+
// 2048+ runs into StackOverflowError; see JDK-8260866
55+
int size;
56+
57+
public String justXs;
58+
public String notJustXs;
59+
60+
// Patterns that match justXs but not notJustXs
61+
public Pattern pat1;
62+
public Pattern pat2;
63+
public Pattern pat3;
64+
public Pattern pat4;
65+
66+
Pattern compile(String regex) {
67+
Pattern pat = Pattern.compile(regex);
68+
// ad hoc correctness checking
69+
if (! pat.matcher(justXs).matches()
70+
|| pat.matcher(notJustXs).matches()) {
71+
throw new AssertionError("unexpected matching: " + regex);
72+
}
73+
return pat;
74+
}
75+
76+
@Setup(Level.Trial)
77+
public void setup() {
78+
justXs = "X".repeat(size);
79+
notJustXs = justXs + "!";
80+
81+
// Will (or should) the engine optimize (?:X|X) to X ?
82+
pat1 = compile("(?:X|X)*");
83+
84+
// Tougher to optimize than pat1
85+
pat2 = compile("(?:[XY]|[XZ])*");
86+
87+
pat3 = compile("(X+)+");
88+
89+
pat4 = compile("^(X+)+$");
90+
}
91+
92+
/** O(N) */
93+
@Benchmark
94+
public boolean pat1_justXs() {
95+
return pat1.matcher(justXs).matches();
96+
}
97+
98+
/** O(N) */
99+
@Benchmark
100+
public boolean pat1_notJustXs() {
101+
return pat1.matcher(notJustXs).matches();
102+
}
103+
104+
/** O(N) */
105+
@Benchmark
106+
public boolean pat2_justXs() {
107+
return pat2.matcher(justXs).matches();
108+
}
109+
110+
/** O(N) */
111+
@Benchmark
112+
public boolean pat2_notJustXs() {
113+
return pat2.matcher(notJustXs).matches();
114+
}
115+
116+
/** O(1) - very surprising! */
117+
@Benchmark
118+
public boolean pat3_justXs() {
119+
return pat3.matcher(justXs).matches();
120+
}
121+
122+
/** O(N^2) - surprising! O(N) seems very achievable. */
123+
@Benchmark
124+
public boolean pat3_notJustXs() {
125+
return pat3.matcher(notJustXs).matches();
126+
}
127+
128+
/** O(1) - very surprising! */
129+
@Benchmark
130+
public boolean pat4_justXs() {
131+
return pat4.matcher(justXs).matches();
132+
}
133+
134+
/** O(N^2) - surprising! O(N) seems very achievable. */
135+
@Benchmark
136+
public boolean pat4_notJustXs() {
137+
return pat4.matcher(notJustXs).matches();
138+
}
139+
140+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Copyright 2020 Google Inc. All Rights Reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
package org.openjdk.bench.java.util.regex;
24+
25+
import org.openjdk.jmh.annotations.*;
26+
27+
import java.util.concurrent.TimeUnit;
28+
import java.util.regex.Pattern;
29+
30+
/**
31+
* Abusing regexes for fun primality testing.
32+
* Famous among regex enthusiasts.
33+
* https://stackoverflow.com/q/3296050/625403
34+
*
35+
* Prime numbers exhibit O(N^2) performance with all variants, due to exhaustive
36+
* backtracking.
37+
*
38+
* Powers of two exhibit O(N) performance with all variants, with reluctant
39+
* quantifiers doing somewhat better.
40+
*
41+
* Here's a way to compare the per-input-char cost:
42+
*
43+
* (cd $(git rev-parse --show-toplevel) && for n in 16 17 256 257 4096 4099; do make test TEST='micro:java.util.regex.Primality' MICRO="FORK=1;WARMUP_ITER=1;ITER=4;OPTIONS=-opi $n -p n=$n" |& perl -ne 'print if /^Benchmark/ .. /^Finished running test/'; done)
44+
*/
45+
@BenchmarkMode(Mode.AverageTime)
46+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
47+
@Fork(1)
48+
@Warmup(iterations = 1)
49+
@Measurement(iterations = 4)
50+
@State(Scope.Benchmark)
51+
public class Primality {
52+
/** Number to be primality tested. */
53+
@Param({"16", "17", "256", "257", "4096", "4099"})
54+
// "64", "67", "1024", "1031", "16384", "16411"})
55+
int n;
56+
57+
/** Unary numeral representation of int n */
58+
public String unary;
59+
60+
// Patterns that match composite numbers represented as unary numerals.
61+
public Pattern reluctant1;
62+
public Pattern reluctant2;
63+
public Pattern greedy1;
64+
public Pattern greedy2;
65+
66+
Pattern compile(String regex) {
67+
Pattern pat = Pattern.compile(regex);
68+
// ad hoc correctness checking
69+
boolean isPrime1 = ! pat.matcher(unary).matches();
70+
boolean isPrime2 = java.math.BigInteger.valueOf(n).isProbablePrime(100);
71+
if (isPrime1 != isPrime2) {
72+
throw new AssertionError("regex=" + regex + ", n=" + n);
73+
}
74+
return pat;
75+
}
76+
77+
@Setup(Level.Trial)
78+
public void setup() {
79+
unary = "1".repeat(n);
80+
81+
reluctant1 = compile("^(11+?)\\1+$");
82+
reluctant2 = compile("^(1{2,}?)\\1+$");
83+
greedy1 = compile("^(11+)\\1+$");
84+
greedy2 = compile("^(1{2,})\\1+$");
85+
}
86+
87+
@Benchmark
88+
public boolean reluctant1() {
89+
return reluctant1.matcher(unary).matches();
90+
}
91+
92+
@Benchmark
93+
public boolean reluctant2() {
94+
return reluctant2.matcher(unary).matches();
95+
}
96+
97+
@Benchmark
98+
public boolean greedy1() {
99+
return greedy1.matcher(unary).matches();
100+
}
101+
102+
@Benchmark
103+
public boolean greedy2() {
104+
return greedy2.matcher(unary).matches();
105+
}
106+
}

0 commit comments

Comments
 (0)