# test ig signatures and output args affects speed

In [97]:
import numpy as np
from time import perf_counter

from numba import njit, types as nbtypes, typeof


@njit()
def test0(a, b ,c,out):
    s= 0.
    for n in range(a.shape[0]):
        for m in range(a.shape[1]):
            s += np.sin(a[n, m]) + np.cos(b[n, m]) + c[n, m]
    return s

#@njit()
@njit(nbtypes.float64(nbtypes.float64[:], nbtypes.float32[:], nbtypes.int32[:]))
def kernal(a, b ,c):
    out = 0.
    for  m in range(a.shape[0]):
         out += np.sin(a[m]) + np.cos(b[m]) + c[m]
    return out

@njit()
def test1(a, b ,c, out):
    out[0]=0
    for n in range(a.shape[0]):
        out[0] += kernal(a[n,:] , b[n,:], c[n,:])
    return out[0]

@njit(nbtypes.float64(nbtypes.float64[:,:],nbtypes.float32[:,:],nbtypes.int32[:,:], typeof(1)),inline='never')
#@njit
def kernal2(a, b ,c,n):
    s = 0.
    for  m in range(a.shape[1]):
         s += np.sin(a[n,m]) + np.cos(b[n, m]) + c[n, m]
    return s

@njit
def test2(a, b, c, out):
    s = 0.

    for n in range(a.shape[0]):
        s += kernal2(a,b,c,n)
    out[0] = s
    

N=1000000
M= 3

A = np.random.random((N,M))
B =np.random.random((N,M)).astype(np.float32)
C =np.random.random((N,M)).astype(np.int32)

out = np.zeros((M,),dtype = np.float64)

# pre-compile 
test0(A, B,C, out)
test1(A, B,C, out)
test2(A, B, C,out)


In [77]:
%%timeit -n 100
test0(A, B, C,out)

36.5 ms ± 223 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [98]:
%%timeit -n 100
test1(A, B, C,out)

32.5 ms ± 257 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [92]:
%%timeit -n 100
test2(A, B, C,out)

32 ms ± 68.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [113]:
from numba import njit, types as nbtypes, typeof

@njit
def numba_max1(x,out):
    for n in range(x.shape[0]):
        for 
        out[n] = max(x[n,0], x[n,1])


@njit
def numba_max2(x,out):
    for n in range(x.shape[0]):
        out[n] = np.maximum(x[n,0], x[n,1])   

@njit
def numba_max3(x,out):
    for n in range(x.shape[0]):
        if x[n,0]>  x[n,1] : out[n] = x[n,0] 

N=1000000

x = np.random.random((N,3))
out = np.empty((N,),dtype=np.int32))

numba_max1(x,out)
numba_max2(x,out)
numba_max3(x,out)

# fast maxium of two values for recording indext of max for use in largest barycetric cords

In [117]:
%%timeit 
numba_max1(x,out)

1.19 ms ± 12.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [115]:
%%timeit 
numba_max2(x,out)

1.44 ms ± 8.69 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [112]:
%%timeit 
numba_max3(x,out)

4.76 ms ± 10.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
