|
| 1 | +/* |
| 2 | + * Copyright (c) 2021, Intel Corporation. All rights reserved. |
| 3 | + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | + * |
| 5 | + * This code is free software; you can redistribute it and/or modify it |
| 6 | + * under the terms of the GNU General Public License version 2 only, as |
| 7 | + * published by the Free Software Foundation. |
| 8 | + * |
| 9 | + * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | + * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | + * accompanied this code). |
| 14 | + * |
| 15 | + * You should have received a copy of the GNU General Public License version |
| 16 | + * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | + * |
| 19 | + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | + * or visit www.oracle.com if you need additional information or have any |
| 21 | + * questions. |
| 22 | + * |
| 23 | + */ |
| 24 | + |
| 25 | +package org.openjdk.bench.jdk.incubator.vector; |
| 26 | + |
| 27 | +import jdk.incubator.vector.FloatVector; |
| 28 | +import jdk.incubator.vector.VectorOperators; |
| 29 | +import jdk.incubator.vector.VectorSpecies; |
| 30 | +import org.openjdk.jmh.annotations.*; |
| 31 | + |
| 32 | +import java.util.Objects; |
| 33 | +import java.util.Random; |
| 34 | +import java.util.concurrent.TimeUnit; |
| 35 | +import java.util.function.IntUnaryOperator; |
| 36 | + |
| 37 | +@BenchmarkMode(Mode.Throughput) |
| 38 | +@OutputTimeUnit(TimeUnit.SECONDS) |
| 39 | +@State(Scope.Thread) |
| 40 | +@Warmup(iterations = 3, time = 5) |
| 41 | +@Measurement(iterations = 3, time = 5) |
| 42 | +@Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) |
| 43 | +public class BlackScholes { |
| 44 | + |
| 45 | + @Param("1024") |
| 46 | + int size; |
| 47 | + |
| 48 | + |
| 49 | + float[] s0; // Stock Price |
| 50 | + float[] x; // Strike Price |
| 51 | + float[] t; // Maturity |
| 52 | + float[] call; |
| 53 | + float[] put; |
| 54 | + float r; // risk-neutrality |
| 55 | + float sig; // volatility |
| 56 | + Random rand; |
| 57 | + |
| 58 | + |
| 59 | + float randFloat(float low, float high) { |
| 60 | + float val = rand.nextFloat(); |
| 61 | + return (1.0f - val) * low + val * high; |
| 62 | + } |
| 63 | + |
| 64 | + float[] fillRandom(float low, float high) { |
| 65 | + float[] array = new float[size]; |
| 66 | + for (int i = 0; i < array.length; i++) { |
| 67 | + array[i] = randFloat(low, high); |
| 68 | + } |
| 69 | + return array; |
| 70 | + } |
| 71 | + |
| 72 | + @Setup |
| 73 | + public void init() { |
| 74 | + rand = new Random(); |
| 75 | + s0 = fillRandom(5.0f, 30.0f); |
| 76 | + x = fillRandom(1.0f, 100.0f); |
| 77 | + t = fillRandom(0.25f, 10.0f); |
| 78 | + r = 0.02f; |
| 79 | + sig = 0.30f; |
| 80 | + call = new float[size]; |
| 81 | + put = new float[size]; |
| 82 | + } |
| 83 | + |
| 84 | + static final float Y = 0.2316419f; |
| 85 | + static final float A1 = 0.31938153f; |
| 86 | + static final float A2 = -0.356563782f; |
| 87 | + static final float A3 = 1.781477937f; |
| 88 | + static final float A4 = -1.821255978f; |
| 89 | + static final float A5 = 1.330274429f; |
| 90 | + static final float PI = (float)Math.PI; |
| 91 | + |
| 92 | + float cdf(float inp) { |
| 93 | + float x = inp; |
| 94 | + if (inp < 0f) { |
| 95 | + x = -inp; |
| 96 | + } |
| 97 | + |
| 98 | + float term = 1f / (1f + (Y * x)); |
| 99 | + float term_pow2 = term * term; |
| 100 | + float term_pow3 = term_pow2 * term; |
| 101 | + float term_pow4 = term_pow2 * term_pow2; |
| 102 | + float term_pow5 = term_pow2 * term_pow3; |
| 103 | + |
| 104 | + float part1 = (1f / (float)Math.sqrt(2f * PI)) * (float)Math.exp((-x * x) * 0.5f); |
| 105 | + |
| 106 | + float part2 = (A1 * term) + |
| 107 | + (A2 * term_pow2) + |
| 108 | + (A3 * term_pow3) + |
| 109 | + (A4 * term_pow4) + |
| 110 | + (A5 * term_pow5); |
| 111 | + |
| 112 | + if (inp >= 0f) |
| 113 | + return 1f - part1 * part2; |
| 114 | + else |
| 115 | + return part1 * part2; |
| 116 | + |
| 117 | + } |
| 118 | + |
| 119 | + public void scalar_black_scholes_kernel(int off) { |
| 120 | + float sig_sq_by2 = 0.5f * sig * sig; |
| 121 | + for (int i = off; i < size; i++ ) { |
| 122 | + float log_s0byx = (float)Math.log(s0[i] / x[i]); |
| 123 | + float sig_sqrt_t = sig * (float)Math.sqrt(t[i]); |
| 124 | + float exp_neg_rt = (float)Math.exp(-r * t[i]); |
| 125 | + float d1 = (log_s0byx + (r + sig_sq_by2) * t[i])/(sig_sqrt_t); |
| 126 | + float d2 = d1 - sig_sqrt_t; |
| 127 | + call[i] = s0[i] * cdf(d1) - exp_neg_rt * x[i] * cdf(d2); |
| 128 | + put[i] = call[i] + exp_neg_rt - s0[i]; |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + @Benchmark |
| 133 | + public void scalar_black_scholes() { |
| 134 | + scalar_black_scholes_kernel(0); |
| 135 | + } |
| 136 | + |
| 137 | + static final VectorSpecies<Float> fsp = FloatVector.SPECIES_PREFERRED; |
| 138 | + |
| 139 | + FloatVector vcdf(FloatVector vinp) { |
| 140 | + var vx = vinp.abs(); |
| 141 | + var vone = FloatVector.broadcast(fsp, 1.0f); |
| 142 | + var vtwo = FloatVector.broadcast(fsp, 2.0f); |
| 143 | + var vterm = vone.div(vone.add(vx.mul(Y))); |
| 144 | + var vterm_pow2 = vterm.mul(vterm); |
| 145 | + var vterm_pow3 = vterm_pow2.mul(vterm); |
| 146 | + var vterm_pow4 = vterm_pow2.mul(vterm_pow2); |
| 147 | + var vterm_pow5 = vterm_pow2.mul(vterm_pow3); |
| 148 | + var vpart1 = vone.div(vtwo.mul(PI).lanewise(VectorOperators.SQRT)).mul(vx.mul(vx).neg().lanewise(VectorOperators.EXP).mul(0.5f)); |
| 149 | + var vpart2 = vterm.mul(A1).add(vterm_pow2.mul(A2)).add(vterm_pow3.mul(A3)).add(vterm_pow4.mul(A4)).add(vterm_pow5.mul(A5)); |
| 150 | + var vmask = vinp.compare(VectorOperators.GT, 0f); |
| 151 | + var vresult1 = vpart1.mul(vpart2); |
| 152 | + var vresult2 = vresult1.neg().add(vone); |
| 153 | + var vresult = vresult1.blend(vresult2, vmask); |
| 154 | + |
| 155 | + return vresult; |
| 156 | + } |
| 157 | + |
| 158 | + public int vector_black_scholes_kernel() { |
| 159 | + int i = 0; |
| 160 | + var vsig = FloatVector.broadcast(fsp, sig); |
| 161 | + var vsig_sq_by2 = vsig.mul(vsig).mul(0.5f); |
| 162 | + var vr = FloatVector.broadcast(fsp, r); |
| 163 | + var vnegr = FloatVector.broadcast(fsp, -r); |
| 164 | + for (; i <= x.length - fsp.length(); i += fsp.length()) { |
| 165 | + var vx = FloatVector.fromArray(fsp, x, i); |
| 166 | + var vs0 = FloatVector.fromArray(fsp, s0, i); |
| 167 | + var vt = FloatVector.fromArray(fsp, t, i); |
| 168 | + var vlog_s0byx = vs0.div(vx).lanewise(VectorOperators.LOG); |
| 169 | + var vsig_sqrt_t = vt.lanewise(VectorOperators.SQRT).mul(vsig); |
| 170 | + var vexp_neg_rt = vt.mul(vnegr).lanewise(VectorOperators.EXP); |
| 171 | + var vd1 = vsig_sq_by2.add(vr).mul(vt).add(vlog_s0byx).div(vsig_sqrt_t); |
| 172 | + var vd2 = vd1.sub(vsig_sqrt_t); |
| 173 | + var vcall = vs0.mul(vcdf(vd1)).sub(vx.mul(vexp_neg_rt).mul(vcdf(vd2))); |
| 174 | + var vput = vcall.add(vexp_neg_rt).sub(vs0); |
| 175 | + vcall.intoArray(call, i); |
| 176 | + vput.intoArray(put, i); |
| 177 | + } |
| 178 | + return i; |
| 179 | + } |
| 180 | + |
| 181 | + @Benchmark |
| 182 | + public void vector_black_scholes() { |
| 183 | + int processed = vector_black_scholes_kernel(); |
| 184 | + if (processed < size) { |
| 185 | + scalar_black_scholes_kernel(processed); |
| 186 | + } |
| 187 | + } |
| 188 | +} |
| 189 | + |
0 commit comments