Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8272861: Add a micro benchmark for vector api #5234

Closed
wants to merge 5 commits into from
Closed
Changes from 2 commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,190 @@
/*
* Copyright (c) 2021, Intel Corporation. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

package org.openjdk.bench.jdk.incubator.vector;

import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;
import org.openjdk.jmh.annotations.*;

import java.util.Objects;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.function.IntUnaryOperator;
import org.openjdk.jmh.infra.Blackhole;

@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@State(Scope.Thread)
@Warmup(iterations = 3, time = 5)
@Measurement(iterations = 3, time = 5)
@Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public class BlackScholes {

@Param("1024")
int size;


float[] s0; // Stock Price
float[] x; // Strike Price
float[] t; // Maturity
float[] call;
float[] put;
float r; // risk-neutrality
float sig; // volatility
Random rand;


float randFloat(float low, float high) {
float val = rand.nextFloat()/Float.MAX_VALUE;

This comment has been minimized.

@PaulSandoz

PaulSandoz Aug 26, 2021
Member Outdated

nextFloat returns a PSR between 0 and 1., so no need to divide by Float.MAX_VALUE ?

return (1.0f - val) * low + val * high;
}

float[] fillRandom(float low, float high) {
float[] array = new float[size];
for (int i = 0; i < array.length; i++) {
array[i] = randFloat(low, high);
}
return array;
}

@Setup
public void init() {
rand = new Random();
s0 = fillRandom(5.0f, 30.0f);
x = fillRandom(1.0f, 100.0f);
t = fillRandom(0.25f, 10.0f);
r = 0.02f;
sig = 0.30f;
call = new float[size];
put = new float[size];
}

static float Y = 0.2316419f;
static float A1 = 0.31938153f;
static float A2 = -0.356563782f;
static float A3 = 1.781477937f;
static float A4 = -1.821255978f;
static float A5 = 1.330274429f;
static float PI = (float)Math.PI;
Comment on lines +85 to +91

This comment has been minimized.

@shipilev

shipilev Aug 24, 2021
Contributor Outdated

Want to make these static final?


float cdf(float inp) {
float x = inp;
if (inp < 0f) {
x = -inp;
}

float term = 1f / (1f + (Y * x));
float term_pow2 = term * term;
float term_pow3 = term_pow2 * term;
float term_pow4 = term_pow2 * term_pow2;
float term_pow5 = term_pow2 * term_pow3;

float part1 = (1f / (float)Math.sqrt(2f * PI)) * (float)Math.exp((-x * x) * 0.5f);

float part2 = (A1 * term) +
(A2 * term_pow2) +
(A3 * term_pow3) +
(A4 * term_pow4) +
(A5 * term_pow5);

if (inp >= 0f)
return 1f - part1 * part2;
else
return part1 * part2;

}

public void scalar_black_scholes_kernel(int off) {
float sig_sq_by2 = 0.5f * sig * sig;
for (int i = off; i < size; i++ ) {
float log_s0byx = (float)Math.log(s0[i] / x[i]);
float sig_sqrt_t = sig * (float)Math.sqrt(t[i]);
float exp_neg_rt = (float)Math.exp(-r * t[i]);
float d1 = (log_s0byx + (r + sig_sq_by2) * t[i])/(sig_sqrt_t);
float d2 = d1 - sig_sqrt_t;
call[i] = s0[i] * cdf(d1) - exp_neg_rt * x[i] * cdf(d2);
put[i] = call[i] + exp_neg_rt - s0[i];
}
}

@Benchmark
public void scalar_black_scholes(Blackhole bh) {

This comment has been minimized.

@shipilev

shipilev Aug 24, 2021
Contributor Outdated

So there is a Blackhole argument here, but what is actually blackholed? AFAICS, stores to call and put are roughly okay to avoid dead-code elimination on these paths.

scalar_black_scholes_kernel(0);
}

static final VectorSpecies<Float> fsp = FloatVector.SPECIES_256;

This comment has been minimized.

@nsjian

nsjian Aug 24, 2021
Outdated

Can we use a more portable way instead of fixed size SPECIES_256?


FloatVector vcdf(FloatVector vinp) {
var vx = vinp.abs();
var vone = FloatVector.broadcast(fsp, 1.0f);
var vtwo = FloatVector.broadcast(fsp, 2.0f);
var vterm = vone.div(vone.add(vx.mul(Y)));
var vterm_pow2 = vterm.mul(vterm);
var vterm_pow3 = vterm_pow2.mul(vterm);
var vterm_pow4 = vterm_pow2.mul(vterm_pow2);
var vterm_pow5 = vterm_pow2.mul(vterm_pow3);
var vpart1 = vone.div(vtwo.mul(PI).lanewise(VectorOperators.SQRT)).mul(vx.mul(vx).neg().lanewise(VectorOperators.EXP).mul(0.5f));
var vpart2 = vterm.mul(A1).add(vterm_pow2.mul(A2)).add(vterm_pow3.mul(A3)).add(vterm_pow4.mul(A4)).add(vterm_pow5.mul(A5));
var vmask = vinp.compare(VectorOperators.GT, 0f);
var vresult1 = vpart1.mul(vpart2);
var vresult2 = vresult1.neg().add(vone);
var vresult = vresult1.blend(vresult2, vmask);

return vresult;
}

public int vector_black_scholes_kernel() {
int i = 0;
var vsig = FloatVector.broadcast(fsp, sig);
var vsig_sq_by2 = vsig.mul(vsig).mul(0.5f);
var vr = FloatVector.broadcast(fsp, r);
var vnegr = FloatVector.broadcast(fsp, -r);
for (; i <= x.length - fsp.length(); i += fsp.length()) {
var vx = FloatVector.fromArray(fsp, x, i);
var vs0 = FloatVector.fromArray(fsp, s0, i);
var vt = FloatVector.fromArray(fsp, t, i);
var vlog_s0byx = vs0.div(vx).lanewise(VectorOperators.LOG);
var vsig_sqrt_t = vt.lanewise(VectorOperators.SQRT).mul(vsig);
var vexp_neg_rt = vt.mul(vnegr).lanewise(VectorOperators.EXP);
var vd1 = vsig_sq_by2.add(vr).mul(vt).add(vlog_s0byx).div(vsig_sqrt_t);
var vd2 = vd1.sub(vsig_sqrt_t);
var vcall = vs0.mul(vcdf(vd1)).sub(vx.mul(vexp_neg_rt).mul(vcdf(vd2)));
var vput = vcall.add(vexp_neg_rt).sub(vs0);
vcall.intoArray(call, i);
vput.intoArray(put, i);
}
return i;
}

@Benchmark
public void vector_black_scholes(Blackhole bh) {
int processed = vector_black_scholes_kernel();
if (processed < size) {
scalar_black_scholes_kernel(processed);
}
}
}