## Benchmark - Julia vs Python Numba

### Purpose

Verify how Julia compares to Python Numba @jit.  This notebook contains the Julia implementation. The code is intentionally kept the same as the Python version. 

### Source

Black Scholes example from Python Numba:

https://github.com/ContinuumIO/numbapro-examples/blob/master/blackscholes/blackscholes_numba.py

### Test Result

Method | Elapsed Time *per* iteration
-------|--------------------------------
Python Numpy | 783 ms
Python Numba | 404 ms
Python Numba (nogit/parallel) | 396 ms
Julia (single threaded) | 518 ms
Julia (multi-threaded)  | 247 ms
Julia (multi-threaded/inbounds) | 255 ms

In [1]:
Threads.nthreads()

4

In [2]:
using BenchmarkTools

In [3]:
function cnd(d::Float64) 
    A1 = 0.31938153
    A2 = -0.356563782
    A3 = 1.781477937
    A4 = -1.821255978
    A5 = 1.330274429
    RSQRT2PI = 0.39894228040143267793994605993438
    K = 1.0 / (1.0 + 0.2316419 * abs(d))
    ret_val = (RSQRT2PI * exp(-0.5 * d * d) *
              (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))))
    d > 0 ? 1.0 - ret_val : ret_val
end

cnd (generic function with 1 method)

In [4]:
function black_scholes(callResult, putResult, stockPrice, optionStrike, optionYears, 
        Riskfree, Volatility)
    S = stockPrice
    X = optionStrike
    T = optionYears
    R = Riskfree
    V = Volatility
    for i in 1:length(S)
        sqrtT = sqrt(T[i])
        d1 = (log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT)
        d2 = d1 - V * sqrtT
        cndd1 = cnd(d1)
        cndd2 = cnd(d2)

        expRT = exp((-1. * R) * T[i])
        callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2)
        putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1))
    end
end

black_scholes (generic function with 1 method)

In [5]:
randfloat(r, low, high) = @. (1.0 - r) * low + r * high

randfloat (generic function with 1 method)

In [6]:
function testbs(OPT_N, f)
    iterations = 10
    RISKFREE = 0.02
    VOLATILITY = 0.30
    callResult = zeros(OPT_N)
    putResult  = -ones(OPT_N)
    stockPrice    = randfloat(rand(OPT_N), 5.0, 30.0)
    optionStrike  = randfloat(rand(OPT_N), 1.0, 100.0)
    optionYears   = randfloat(rand(OPT_N), 0.25, 10.0)
    for i in 1:iterations
        f(callResult, putResult, stockPrice, optionStrike, optionYears, RISKFREE, VOLATILITY)
    end
end

testbs (generic function with 1 method)

In [7]:
testbs(1, black_scholes)  # warm up
@time testbs(4000000, black_scholes)

  5.851138 seconds (101 allocations: 274.665 MiB, 4.83% gc time)


In [8]:
bmk = @benchmark testbs(4000000, $black_scholes) seconds=20

BenchmarkTools.Trial: 
  memory estimate:  274.66 MiB
  allocs estimate:  18
  --------------
  minimum time:     5.022 s (3.76% GC)
  median time:      5.186 s (3.65% GC)
  mean time:        5.319 s (3.80% GC)
  maximum time:     5.881 s (3.23% GC)
  --------------
  samples:          4
  evals/sample:     1

In [11]:
function black_scholes2(callResult, putResult, stockPrice, optionStrike, optionYears, 
        Riskfree, Volatility)
    S = stockPrice
    X = optionStrike
    T = optionYears
    R = Riskfree
    V = Volatility
    Threads.@threads for i in 1:length(S)
        sqrtT = sqrt(T[i])
        d1 = (log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT)
        d2 = d1 - V * sqrtT
        cndd1 = cnd(d1)
        cndd2 = cnd(d2)

        expRT = exp((-1. * R) * T[i])
        callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2)
        putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1))
    end
end

black_scholes2 (generic function with 1 method)

In [12]:
bmk2 = @benchmark testbs(4000000, $black_scholes2) seconds=20

BenchmarkTools.Trial: 
  memory estimate:  274.66 MiB
  allocs estimate:  28
  --------------
  minimum time:     2.429 s (10.45% GC)
  median time:      2.466 s (9.34% GC)
  mean time:        2.492 s (9.85% GC)
  maximum time:     2.667 s (8.81% GC)
  --------------
  samples:          9
  evals/sample:     1

In [15]:
function black_scholes3(callResult, putResult, stockPrice, optionStrike, optionYears, 
        Riskfree, Volatility)
    S = stockPrice
    X = optionStrike
    T = optionYears
    R = Riskfree
    V = Volatility
    @inbounds Threads.@threads for i in 1:length(S)
        sqrtT = sqrt(T[i])
        d1 = (log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT)
        d2 = d1 - V * sqrtT
        cndd1 = cnd(d1)
        cndd2 = cnd(d2)

        expRT = exp((-1. * R) * T[i])
        callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2)
        putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1))
    end
end

black_scholes3 (generic function with 1 method)

In [16]:
bmk3 = @benchmark testbs(4000000, $black_scholes3) seconds=20

BenchmarkTools.Trial: 
  memory estimate:  274.66 MiB
  allocs estimate:  28
  --------------
  minimum time:     2.447 s (11.75% GC)
  median time:      2.549 s (12.62% GC)
  mean time:        2.649 s (13.91% GC)
  maximum time:     3.066 s (21.68% GC)
  --------------
  samples:          8
  evals/sample:     1

In [17]:
versioninfo()

Julia Version 0.6.2
Commit d386e40c17 (2017-12-13 18:08 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin14.5.0)
  CPU: Intel(R) Core(TM) i5-4258U CPU @ 2.40GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, haswell)


In [18]:
;python -V

Python 3.6.3 :: Anaconda, Inc.
