<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Introduction" data-toc-modified-id="Introduction-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Introduction</a></span><ul class="toc-item"><li><span><a href="#Check-numba-version" data-toc-modified-id="Check-numba-version-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Check <code>numba</code> version</a></span></li><li><span><a href="#Support-overloading" data-toc-modified-id="Support-overloading-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Support overloading</a></span></li><li><span><a href="#Support-class-function" data-toc-modified-id="Support-class-function-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Support class function</a></span></li></ul></li><li><span><a href="#numba-examples" data-toc-modified-id="numba-examples-2"><span class="toc-item-num">2&nbsp;&nbsp;</span><code>numba</code> examples</a></span><ul class="toc-item"><li><span><a href="#Addition-2-arrays" data-toc-modified-id="Addition-2-arrays-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Addition 2 arrays</a></span><ul class="toc-item"><li><span><a href="#Testing" data-toc-modified-id="Testing-2.1.1"><span class="toc-item-num">2.1.1&nbsp;&nbsp;</span>Testing</a></span></li></ul></li><li><span><a href="#Addition-3-arrays" data-toc-modified-id="Addition-3-arrays-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Addition 3 arrays</a></span><ul class="toc-item"><li><span><a href="#Testing" data-toc-modified-id="Testing-2.2.1"><span class="toc-item-num">2.2.1&nbsp;&nbsp;</span>Testing</a></span></li></ul></li><li><span><a href="#Summation-of-an-array-with-given-indices" data-toc-modified-id="Summation-of-an-array-with-given-indices-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Summation of an array with given indices</a></span><ul class="toc-item"><li><span><a href="#Testing" data-toc-modified-id="Testing-2.3.1"><span class="toc-item-num">2.3.1&nbsp;&nbsp;</span>Testing</a></span></li></ul></li><li><span><a href="#Trace-of-a-matrix" data-toc-modified-id="Trace-of-a-matrix-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Trace of a matrix</a></span><ul class="toc-item"><li><span><a href="#Testing" data-toc-modified-id="Testing-2.4.1"><span class="toc-item-num">2.4.1&nbsp;&nbsp;</span>Testing</a></span></li></ul></li><li><span><a href="#Distances" data-toc-modified-id="Distances-2.5"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>Distances</a></span><ul class="toc-item"><li><span><a href="#Testing" data-toc-modified-id="Testing-2.5.1"><span class="toc-item-num">2.5.1&nbsp;&nbsp;</span>Testing</a></span></li></ul></li><li><span><a href="#Distances---test-function-as-argument" data-toc-modified-id="Distances---test-function-as-argument-2.6"><span class="toc-item-num">2.6&nbsp;&nbsp;</span>Distances - test function as argument</a></span><ul class="toc-item"><li><span><a href="#Testing" data-toc-modified-id="Testing-2.6.1"><span class="toc-item-num">2.6.1&nbsp;&nbsp;</span>Testing</a></span></li></ul></li></ul></li></ul></div>

# Introduction

## Check `numba` version


In [1]:
import numba as nb
print(nb.__version__)

0.46.0


## Support overloading

In [2]:
import numpy as np
import numba as nb

def add_python(a,b):
    c=np.empty(a.shape[0])
    for i in range(a.shape[0]):
        c[i] = a[i] + b[i]
    return c

def add_numpy(a,b):
    return a+b

add_python_numba=nb.jit(nopython=True)(add_python)
add_numpy_numba=nb.jit(nopython=True)(add_numpy)

add_python_numba(np.array([1.]), np.array([1.]))
add_numpy_numba(np.array([1.]), np.array([1.]))
add_python_numba(np.array([1]), np.array([1]))
add_numpy_numba(np.array([1]), np.array([1]))

print(add_python_numba.nopython_signatures)
print(add_numpy_numba.nopython_signatures)  

[(array(float64, 1d, C), array(float64, 1d, C)) -> array(float64, 1d, C), (array(int32, 1d, C), array(int32, 1d, C)) -> array(float64, 1d, C)]
[(array(float64, 1d, C), array(float64, 1d, C)) -> array(float64, 1d, C), (array(int32, 1d, C), array(int32, 1d, C)) -> array(int32, 1d, C)]


## Support class function

Not `classmethod`.

In [3]:
import numba as nb
class C:
    
    @nb.jit(nopython=True)
    def add(a,b):
        return a + b
    

In [4]:
print(C.add(1.,1.))

2.0


#  `numba` examples

## Addition 2 arrays

We calculate the addition of two arrays.

In [5]:
import numpy as np
import numba as nb

def add_python(a,b):
    c=np.empty(a.shape[0])
    for i in range(a.shape[0]):
        c[i] = a[i] + b[i]
    return c

def add_numpy(a,b):
    return a+b

add_python_numba=nb.jit(nopython=True)(add_python)
add_numpy_numba=nb.jit(nopython=True)(add_numpy)


### Testing

In [6]:
a=np.random.randn(1000)
b=np.random.randn(1000)

add_python_numba(a,b)
add_numpy_numba(a,b)

print("python:")
%timeit add_python(a,b)
print("numpy:")
%timeit add_numpy(a,b)
print("python numba:")
%timeit add_python_numba(a,b)
print("numpy numba:")
%timeit add_numpy_numba(a,b)

python:
424 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
numpy:
1.12 µs ± 17.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
python numba:
998 ns ± 17.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
numpy numba:
1.01 µs ± 13.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## Addition 3 arrays

We calculate the addition of three arrays

In [7]:
import numpy as np
import numba as nb

def add_python(a,b,c):
    d=np.empty(a.shape[0])
    for i in range(a.shape[0]):
        d[i] = a[i] + b[i] + c[i]
    return d

def add_numpy(a,b,c):
    return a+b+c

add_python_numba=nb.jit(nopython=True)(add_python)
add_numpy_numba=nb.jit(nopython=True)(add_numpy)

### Testing

In [8]:
a=np.random.randn(1000)
b=np.random.randn(1000)
c=np.random.randn(1000)

add_python_numba(a,b,c)
add_numpy_numba(a,b,c)

print("python:")
%timeit add_python(a,b,c)
print("numpy:")
%timeit add_numpy(a,b,c)
print("python numba:")
%timeit add_python_numba(a,b,c)
print("numpy numba:")
%timeit add_numpy_numba(a,b,c)


python:
682 µs ± 10.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
numpy:
2.24 µs ± 14.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
python numba:
1.35 µs ± 24.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
numpy numba:
1.39 µs ± 31.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## Summation of an array with given indices

We calculate the summation of an array with give indices

In [9]:
import numpy as np
import numba as nb

def sum_python(v,idx):
    sm=0
    for i in range(idx.shape[0]):
        sm += v[i]
    return sm

def sum_numpy(v,idx):
    return np.sum(v[idx])

sum_python_numba=nb.jit(nopython=True)(sum_python)
sum_numpy_numba=nb.jit(nopython=True)(sum_numpy)

### Testing

In [10]:
a=np.random.randn(1000)
idx=np.random.randint(1000,size=100)

sum_python_numba(a, idx)
sum_numpy_numba(a, idx)

print("python:")
%timeit sum_python(a,idx)
print("numpy:")
%timeit sum_numpy(a,idx)
print("python numba:")
%timeit sum_python_numba(a,idx)
print("numpy numba:")
%timeit sum_numpy_numba(a,idx)

python:
26.5 µs ± 1.23 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
numpy:
5.25 µs ± 71.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
python numba:
505 ns ± 12.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
numpy numba:
737 ns ± 8.93 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


##  Trace of a matrix

In [11]:
import numpy as np
import numba as nb

def trace_python(a):
    trace = 0
    for i in range(a.shape[0]):
        trace += a[i, i]
    return trace

def trace_numpy(a):
    return np.diag(a).sum()

trace_python_numba = nb.jit(nopython=True)(trace_python)
trace_numpy_numba = nb.jit(nopython=True)(trace_numpy)

### Testing

In [12]:
n=1000
a=np.random.rand(n,n)

trace_python_numba(a)
trace_numpy_numba(a)

print("python")
%timeit trace_python(a)
print("numpy:")
%timeit trace_numpy(a)
print("python numba:")
%timeit trace_python_numba(a)
print("numpy numba:")
%timeit trace_numpy_numba(a)

python
258 µs ± 7.96 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
numpy:
9.4 µs ± 411 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
python numba:
2.3 µs ± 115 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
numpy numba:
3.68 µs ± 38.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Distances

We calculate the distances of each vector


In [13]:
import numpy as np
import numba as nb

def distance_python(a):
    d=np.zeros((a.shape[0], a.shape[0]))
    for i in range(a.shape[0]):
        for j in range(a.shape[0]):
            d2=0.0
            for k in range(a.shape[1]):
                d2 += np.square(a[i,k]-a[j,k])
            d[i,j] = np.sqrt(d2)
    return d

def distance_numpy(a):
    #a[Ndata, Xdim]
    #a1[Ndata,1, Xdim]
    #a2[1,Ndata, Xdim]
    a1=a.reshape(-1,1,a.shape[1])
    a2=a.reshape(1,-1,a.shape[1])
    #b[Ndata,Ndata, Xdim]
    b=a1-a2
    d=np.sqrt(np.sum(np.square(b),axis=-1))
    return d
    

distance_python_numba = nb.jit(nopython=True)(distance_python)
distance_numpy_numba = nb.jit(nopython=True)(distance_numpy)

### Testing

Test correctness

In [14]:
n=4
a=np.random.rand(n, 3)
print(distance_python(a))
print(distance_numpy(a))
print(distance_python_numba(a))
print(distance_numpy_numba(a))


[[0.         1.0862949  0.75142547 0.82100835]
 [1.0862949  0.         0.54109866 0.79213744]
 [0.75142547 0.54109866 0.         0.68058665]
 [0.82100835 0.79213744 0.68058665 0.        ]]
[[0.         1.0862949  0.75142547 0.82100835]
 [1.0862949  0.         0.54109866 0.79213744]
 [0.75142547 0.54109866 0.         0.68058665]
 [0.82100835 0.79213744 0.68058665 0.        ]]
[[0.         1.0862949  0.75142547 0.82100835]
 [1.0862949  0.         0.54109866 0.79213744]
 [0.75142547 0.54109866 0.         0.68058665]
 [0.82100835 0.79213744 0.68058665 0.        ]]
[[0.         1.0862949  0.75142547 0.82100835]
 [1.0862949  0.         0.54109866 0.79213744]
 [0.75142547 0.54109866 0.         0.68058665]
 [0.82100835 0.79213744 0.68058665 0.        ]]


Test performance

In [15]:
n=100
a=np.random.rand(n,n)


distance_python_numba(a)
distance_numpy_numba(a)

print("python")
%timeit distance_python(a)
print("numpy:")
%timeit distance_numpy(a)
print("python numba:")
%timeit distance_python_numba(a)
print("numpy numba:")
%timeit distance_numpy_numba(a)

python
2.1 s ± 62 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
numpy:
6.76 ms ± 253 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
python numba:
1.02 ms ± 8.81 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
numpy numba:
9.79 ms ± 220 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Distances - test function as argument

We calculate the distances of each vector

In [16]:
import numpy as np
import numba as nb

def distance(a,i,j):
    d2=0.0
    for k in range(a.shape[1]):
        d2 += np.square(a[i,k]-a[j,k])
    return np.sqrt(d2)
    
def distance_python2(a):
    d=np.zeros((a.shape[0], a.shape[0]))
    for i in range(a.shape[0]):
        for j in range(a.shape[0]):
            d[i,j] = distance(a,i,j)
    return d

def distance_python3(a, dist):
    d=np.zeros((a.shape[0], a.shape[0]))
    for i in range(a.shape[0]):
        for j in range(a.shape[0]):
            d[i,j] = dist(a,i,j)
    return d

distance = nb.jit(nopython=True)(distance)
distance_python2_numba = nb.jit(nopython=True)(distance_python2)
distance_python3_numba = nb.jit(nopython=True)(distance_python3)


### Testing

In [17]:
n=100
a=np.random.rand(n,n)

distance_python2_numba(a)
distance_python3_numba(a, distance)

print("numba version1:")
%timeit distance_python_numba(a)
print("numba version2:")
%timeit distance_python2_numba(a)
print("numba version3:")
%timeit distance_python3_numba(a, distance)

numba version1:
1.03 ms ± 17.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
numba version2:
1.01 ms ± 6.04 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
numba version3:
1.03 ms ± 9.89 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
