# Original Asian version

In [1]:
import math # This is the standard Python math module. Not the numpy one
import random # This is the standard Python random module.  Not the numpy one
from numba import jit,prange

def Asian_original(so,k,r,v,t,m,n):
    """
    I have not identified what the arguments mean since the original MATLAB code didn't either. 
    This doc-string will be updated once I learn what they are!
    """
    dt = t/m
    AsianPayoffSum = 0
    for i in range(1,n+1):
        s = so
        stSum = so
        at = so
        for j in range(1,m+1):
            st = s * math.exp(((r-v**2/2)*dt) + (v*random.normalvariate(0,1)*math.sqrt(dt)))
            stSum = stSum + st 
            at = stSum/(j+1)
            s = st
        AsianPayoff = max(at-k,0);
        AsianPayoffSum = AsianPayoffSum + AsianPayoff;
    AsianCall = math.exp(-r*t)*(AsianPayoffSum/n)
    return(AsianCall)

In [2]:
# Test it
random.seed(10)
Asian_original(100,90,0.15,0.45,1,100,200000)

18.420053075801047

In [3]:
original_time = %timeit -o Asian_original(100,90,0.15,0.45,1,100,200000)

35.7 s ± 2.18 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Cython version

In [4]:
%load_ext Cython

In [5]:
%%cython
import math 
import random 
def Asian_cython(float so, float k, float r, float v, float t, int m, int n):
    """
    I have not identified what the arguments mean since the original MATLAB code didn't either. 
    This doc-string will be updated once I learn what they are!
    """
    cdef int i, j
    cdef float dt, s, stSum, at, st
    cdef float AsianPayoff, AsianPayoffSum, AsianCall
    
    dt = t/m
    AsianPayoffSum = 0
    for i in range(1,n+1):
        s = so
        stSum = so
        at = so
        for j in range(1,m+1):
            st = s * math.exp(((r-v**2/2)*dt) + (v*random.normalvariate(0,1)*math.sqrt(dt)))
            stSum = stSum + st 
            at = stSum/(j+1)
            s = st
        AsianPayoff = max(at-k,0);
        AsianPayoffSum = AsianPayoffSum + AsianPayoff;
    AsianCall = math.exp(-r*t)*(AsianPayoffSum/n)
    return(AsianCall)

In [6]:
# Test it
random.seed(10)
Asian_cython(100,90,0.15,0.45,1,100,200000)

18.420015335083008

In [7]:
cython_time = %timeit -o Asian_cython(100,90,0.15,0.45,1,100,200000)

25.2 s ± 1.21 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Numba JIT version 

In [8]:
@jit
def Asian_jit(so,k,r,v,t,m,n):
    """
    I have not identified what the arguments mean since the original MATLAB code didn't either. 
    This doc-string will be updated once I learn what they are!
    """
    dt = t/m
    AsianPayoffSum = 0
    for i in range(1,n+1):
        s = so
        stSum = so
        at = so
        for j in range(1,m+1):
            st = s * math.exp(((r-v**2/2)*dt) + (v*random.normalvariate(0,1)*math.sqrt(dt)))
            stSum = stSum + st 
            at = stSum/(j+1)
            s = st
        AsianPayoff = max(at-k,0);
        AsianPayoffSum = AsianPayoffSum + AsianPayoff;
    AsianCall = math.exp(-r*t)*(AsianPayoffSum/n)
    return(AsianCall)

In [9]:
# Test it
random.seed(10)
Asian_jit(100,90,0.15,0.45,1,100,200000)

18.513386163621995

In [10]:
jit_time = %timeit -o Asian_jit(100,90,0.15,0.45,1,100,200000)

736 ms ± 9.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Parallel Numba JIT Version

In [11]:
import math # This is the standard Pyton math module. Not the numpy one
import random # This is the standard Python random module.  Not the numpy one
import numpy as np
from numba import jit

@jit(nopython=True)
def inner_loop(m,so,k,r,v,t):
    dt = t/m
    s = so
    stSum = so
    at = so
    for j in range(1,m+1):
        st = s * math.exp(((r-v**2/2)*dt) + (v*random.normalvariate(0,1)*math.sqrt(dt)))
        stSum = stSum + st 
        at = stSum/(j+1)
        s = st
    return(max(at-k,0))

@jit(nopython=True,parallel=True)
def numba_Asian_parallel(so,k,r,v,t,m,n):
    """
    I have not identified what the arguments mean since the original MATLAB code didn't either. 
    This doc-string will be updated once I learn what they are!
    """
    AsianPayoff= np.zeros(n)
    for i in prange(n):
        AsianPayoff[i] = inner_loop(m,so,k,r,v,t)
    AsianPayoffSum = np.sum(AsianPayoff);
    AsianCall = math.exp(-r*t)*(AsianPayoffSum/n)
    return(AsianCall)

In [12]:
parallel_jit_time = %timeit -o numba_Asian_parallel(100,90,0.15,0.45,1,100,200000)

191 ms ± 6.56 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Numpy version with one loop vectorised

In [13]:
#Solution 1 - Removing the first loop by using vectors
import numpy as np

def Asian_numpy1(so,k,r,v,t,m,n):
    dt = t/m
    AsianPayoffSum = 0
    for i in range(n):
        st = np.cumprod(np.hstack((so,np.exp(((r-v**2/2)*dt) + (v*np.random.normal(0,1,m)*np.sqrt(dt))))))
        at = np.mean(st)
        AsianPayoff = max(at-k,0);
        AsianPayoffSum = AsianPayoffSum + AsianPayoff;
    AsianCall = np.exp(-r*t)*(AsianPayoffSum/n)
    return(AsianCall)

In [14]:
numpy1_time = %timeit -o Asian_numpy1(100,90,0.15,0.45,1,100,200000)

8.66 s ± 1.31 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Numpy version with two loops vectorised

In [15]:
# Solution 2 - remove both loops by using matrices
import numpy as np
from numpy import sqrt,exp
from numpy.random import normal

def Asian_numpy2(so,k,r,v,t,m,n):
    dt = t/m
    AsianPayoffSum = 0
    sqrt_dt = sqrt(dt)
    st = np.cumprod( np.hstack((so*np.ones((n,1)),exp( ((r-v**2/2)*dt) + (v*normal(0,1,(n,m)) * sqrt_dt)) )) ,axis=1)
    at = np.mean(st,axis=1)
    AsianPayoff = np.maximum(at-k,0)
    AsianPayoffMean = AsianPayoff.mean()
    AsianCall = np.exp(-r*t)*AsianPayoffMean
    return(AsianCall)

In [16]:
numpy2_time = %timeit -o Asian_numpy2(100,90,0.15,0.45,1,100,200000)

1.29 s ± 58.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
print(f"Original code took {original_time} seconds")
cythonx = original_time.average/cython_time.average
print(f"Cython was  {cythonx:.1f}x faster")
numpy1x = original_time.average/numpy1_time.average
print(f"Numpy with one vectorised loop was  {numpy1x:.1f}x faster")
numpy2x = original_time.average/numpy2_time.average
print(f"Numpy with two vectorised loop was  {numpy2x:.1f}x faster")
parallel_jit_x= original_time.average/parallel_jit_time.average
jit_x = original_time.average/jit_time.average 
print(f"Serial numba was  {jit_x:.1f}x faster")
print(f"Parallel numba was  {parallel_jit_x:.1f}x faster")

Original code took 35.7 s ± 2.18 s per loop (mean ± std. dev. of 7 runs, 1 loop each) seconds
Cython was  1.4x faster
Numpy with one vectorised loop was  4.1x faster
Numpy with two vectorised loop was  27.7x faster
Serial numba was  48.5x faster
Parallel numba was  186.2x faster
