Skip to content

Commit 5aaa20f

Browse files
author
Sandhya Viswanathan
committed
8272861: Add a micro benchmark for vector api
Reviewed-by: psandoz
1 parent 7a01ba6 commit 5aaa20f

File tree

1 file changed

+189
-0
lines changed

1 file changed

+189
-0
lines changed
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
/*
2+
* Copyright (c) 2021, Intel Corporation. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
package org.openjdk.bench.jdk.incubator.vector;
26+
27+
import jdk.incubator.vector.FloatVector;
28+
import jdk.incubator.vector.VectorOperators;
29+
import jdk.incubator.vector.VectorSpecies;
30+
import org.openjdk.jmh.annotations.*;
31+
32+
import java.util.Objects;
33+
import java.util.Random;
34+
import java.util.concurrent.TimeUnit;
35+
import java.util.function.IntUnaryOperator;
36+
37+
@BenchmarkMode(Mode.Throughput)
38+
@OutputTimeUnit(TimeUnit.SECONDS)
39+
@State(Scope.Thread)
40+
@Warmup(iterations = 3, time = 5)
41+
@Measurement(iterations = 3, time = 5)
42+
@Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
43+
public class BlackScholes {
44+
45+
@Param("1024")
46+
int size;
47+
48+
49+
float[] s0; // Stock Price
50+
float[] x; // Strike Price
51+
float[] t; // Maturity
52+
float[] call;
53+
float[] put;
54+
float r; // risk-neutrality
55+
float sig; // volatility
56+
Random rand;
57+
58+
59+
float randFloat(float low, float high) {
60+
float val = rand.nextFloat();
61+
return (1.0f - val) * low + val * high;
62+
}
63+
64+
float[] fillRandom(float low, float high) {
65+
float[] array = new float[size];
66+
for (int i = 0; i < array.length; i++) {
67+
array[i] = randFloat(low, high);
68+
}
69+
return array;
70+
}
71+
72+
@Setup
73+
public void init() {
74+
rand = new Random();
75+
s0 = fillRandom(5.0f, 30.0f);
76+
x = fillRandom(1.0f, 100.0f);
77+
t = fillRandom(0.25f, 10.0f);
78+
r = 0.02f;
79+
sig = 0.30f;
80+
call = new float[size];
81+
put = new float[size];
82+
}
83+
84+
static final float Y = 0.2316419f;
85+
static final float A1 = 0.31938153f;
86+
static final float A2 = -0.356563782f;
87+
static final float A3 = 1.781477937f;
88+
static final float A4 = -1.821255978f;
89+
static final float A5 = 1.330274429f;
90+
static final float PI = (float)Math.PI;
91+
92+
float cdf(float inp) {
93+
float x = inp;
94+
if (inp < 0f) {
95+
x = -inp;
96+
}
97+
98+
float term = 1f / (1f + (Y * x));
99+
float term_pow2 = term * term;
100+
float term_pow3 = term_pow2 * term;
101+
float term_pow4 = term_pow2 * term_pow2;
102+
float term_pow5 = term_pow2 * term_pow3;
103+
104+
float part1 = (1f / (float)Math.sqrt(2f * PI)) * (float)Math.exp((-x * x) * 0.5f);
105+
106+
float part2 = (A1 * term) +
107+
(A2 * term_pow2) +
108+
(A3 * term_pow3) +
109+
(A4 * term_pow4) +
110+
(A5 * term_pow5);
111+
112+
if (inp >= 0f)
113+
return 1f - part1 * part2;
114+
else
115+
return part1 * part2;
116+
117+
}
118+
119+
public void scalar_black_scholes_kernel(int off) {
120+
float sig_sq_by2 = 0.5f * sig * sig;
121+
for (int i = off; i < size; i++ ) {
122+
float log_s0byx = (float)Math.log(s0[i] / x[i]);
123+
float sig_sqrt_t = sig * (float)Math.sqrt(t[i]);
124+
float exp_neg_rt = (float)Math.exp(-r * t[i]);
125+
float d1 = (log_s0byx + (r + sig_sq_by2) * t[i])/(sig_sqrt_t);
126+
float d2 = d1 - sig_sqrt_t;
127+
call[i] = s0[i] * cdf(d1) - exp_neg_rt * x[i] * cdf(d2);
128+
put[i] = call[i] + exp_neg_rt - s0[i];
129+
}
130+
}
131+
132+
@Benchmark
133+
public void scalar_black_scholes() {
134+
scalar_black_scholes_kernel(0);
135+
}
136+
137+
static final VectorSpecies<Float> fsp = FloatVector.SPECIES_PREFERRED;
138+
139+
FloatVector vcdf(FloatVector vinp) {
140+
var vx = vinp.abs();
141+
var vone = FloatVector.broadcast(fsp, 1.0f);
142+
var vtwo = FloatVector.broadcast(fsp, 2.0f);
143+
var vterm = vone.div(vone.add(vx.mul(Y)));
144+
var vterm_pow2 = vterm.mul(vterm);
145+
var vterm_pow3 = vterm_pow2.mul(vterm);
146+
var vterm_pow4 = vterm_pow2.mul(vterm_pow2);
147+
var vterm_pow5 = vterm_pow2.mul(vterm_pow3);
148+
var vpart1 = vone.div(vtwo.mul(PI).lanewise(VectorOperators.SQRT)).mul(vx.mul(vx).neg().lanewise(VectorOperators.EXP).mul(0.5f));
149+
var vpart2 = vterm.mul(A1).add(vterm_pow2.mul(A2)).add(vterm_pow3.mul(A3)).add(vterm_pow4.mul(A4)).add(vterm_pow5.mul(A5));
150+
var vmask = vinp.compare(VectorOperators.GT, 0f);
151+
var vresult1 = vpart1.mul(vpart2);
152+
var vresult2 = vresult1.neg().add(vone);
153+
var vresult = vresult1.blend(vresult2, vmask);
154+
155+
return vresult;
156+
}
157+
158+
public int vector_black_scholes_kernel() {
159+
int i = 0;
160+
var vsig = FloatVector.broadcast(fsp, sig);
161+
var vsig_sq_by2 = vsig.mul(vsig).mul(0.5f);
162+
var vr = FloatVector.broadcast(fsp, r);
163+
var vnegr = FloatVector.broadcast(fsp, -r);
164+
for (; i <= x.length - fsp.length(); i += fsp.length()) {
165+
var vx = FloatVector.fromArray(fsp, x, i);
166+
var vs0 = FloatVector.fromArray(fsp, s0, i);
167+
var vt = FloatVector.fromArray(fsp, t, i);
168+
var vlog_s0byx = vs0.div(vx).lanewise(VectorOperators.LOG);
169+
var vsig_sqrt_t = vt.lanewise(VectorOperators.SQRT).mul(vsig);
170+
var vexp_neg_rt = vt.mul(vnegr).lanewise(VectorOperators.EXP);
171+
var vd1 = vsig_sq_by2.add(vr).mul(vt).add(vlog_s0byx).div(vsig_sqrt_t);
172+
var vd2 = vd1.sub(vsig_sqrt_t);
173+
var vcall = vs0.mul(vcdf(vd1)).sub(vx.mul(vexp_neg_rt).mul(vcdf(vd2)));
174+
var vput = vcall.add(vexp_neg_rt).sub(vs0);
175+
vcall.intoArray(call, i);
176+
vput.intoArray(put, i);
177+
}
178+
return i;
179+
}
180+
181+
@Benchmark
182+
public void vector_black_scholes() {
183+
int processed = vector_black_scholes_kernel();
184+
if (processed < size) {
185+
scalar_black_scholes_kernel(processed);
186+
}
187+
}
188+
}
189+

0 commit comments

Comments
 (0)