In [1]:
%load_ext Cython

In [2]:
import numpy as np

In [5]:
%%cython

cdef int a = 0
for i in range(10):
    a += i
print(a)

In [16]:
from hello import compute
from hello_typed import compute as computed_2
from hello_memview import compute as computed_3

In [17]:
array_1 = np.random.uniform(0, 1000, size=(3000, 2000)).astype(np.intc)
array_2 = np.random.uniform(0, 1000, size=(3000, 2000)).astype(np.intc)
a = 4
b = 3
c = 9

In [18]:
%timeit compute(array_1, array_2, a, b, c)

18.5 s ± 3.13 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [19]:
%timeit computed_2(array_1, array_2, a, b, c)

10.5 s ± 129 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [20]:
%timeit computed_3(array_1, array_2, a, b, c)

59.8 ms ± 1.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [21]:
18500/59.8

309.3645484949833

In [24]:
%%cython

import numpy as np

DTYPE = np.intc


cdef int clip(int a, int min_value, int max_value):
    return min(max(a, min_value), max_value)


def compute_4(int[:, :] array_1, int[:, :] array_2, int a, int b, int c):

    cdef Py_ssize_t x_max = array_1.shape[0]
    cdef Py_ssize_t y_max = array_1.shape[1]

    # array_1.shape is now a C array, no it's not possible
    # to compare it simply by using == without a for-loop.
    # To be able to compare it to array_2.shape easily,
    # we convert them both to Python tuples.
    assert tuple(array_1.shape) == tuple(array_2.shape)

    result = np.zeros((x_max, y_max), dtype=DTYPE)
    cdef int[:, :] result_view = result

    cdef int tmp
    cdef Py_ssize_t x, y

    for x in range(x_max):
        for y in range(y_max):

            tmp = clip(array_1[x, y], 2, 10)
            tmp = tmp * a + array_2[x, y] * b
            result_view[x, y] = tmp + c

    return result

In [26]:
%timeit compute_4(array_1, array_2, a, b, c)

44.1 ms ± 3.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [83]:
%%cython

import numpy as np

DTYPE = np.float64

def hello(double[:] params, double[:] x, double[:] y, double[:] u):

    # array_1.shape is now a C array, no it's not possible
    # to compare it simply by using == without a for-loop.
    # To be able to compare it to array_2.shape easily,
    # we convert them both to Python tuples.
    #assert tuple(array_1.shape) == tuple(array_2.shape)

    f = np.zeros((6,), dtype=DTYPE)

    cdef double p_0 = params[0]
    cdef double p_1 = params[1]
    cdef double p_2 = params[2]
    cdef double p_3 = params[3]
    cdef double p_4 = params[4]

    cdef double x_0 = x[0]
    cdef double x_1 = x[1]
    cdef double x_2 = x[2]
    cdef double x_3 = x[3]
    cdef double x_4 = x[4]

    cdef double y_0 = y[0]
    cdef double y_1 = y[1]
    cdef double y_2 = y[2]
    cdef double y_3 = y[3]
    cdef double y_4 = y[4]
    

    f[0] = p_0*x_0
    f[1] = p_1*x_1
    f[2] = p_2*x_2
    f[3] = p_3*x_3
    f[4] = p_3*x_4

    return f

In [90]:
params = np.random.uniform(0, 1000, size=(6)).astype(np.float64)
x = np.random.uniform(0, 1000, size=(6)).astype(np.float64)
y = np.random.uniform(0, 1000, size=(6)).astype(np.float64)
u = np.random.uniform(0, 1000, size=(6)).astype(np.float64)
f = np.zeros(6,)

In [86]:
%timeit hello(params,x,y,u)

4.07 µs ± 307 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [71]:
np.random.uniform(0, 1000, size=(6))

array([525.33949139, 200.89032309, 733.16024751, 282.15007601,
       463.71367783, 910.84158216])

In [74]:
import numba

In [96]:
@numba.njit(cache=True)
def hello_nb(params,x,y,u,f):

    # array_1.shape is now a C array, no it's not possible
    # to compare it simply by using == without a for-loop.
    # To be able to compare it to array_2.shape easily,
    # we convert them both to Python tuples.
    #assert tuple(array_1.shape) == tuple(array_2.shape)

    #f = np.zeros((6,), dtype=DTYPE)

    p_0 = params[0]
    p_1 = params[1]
    p_2 = params[2]
    p_3 = params[3]
    p_4 = params[4]

    x_0 = x[0]
    x_1 = x[1]
    x_2 = x[2]
    x_3 = x[3]
    x_4 = x[4]

    y_0 = y[0]
    y_1 = y[1]
    y_2 = y[2]
    y_3 = y[3]
    y_4 = y[4]
    

    f[0] = p_0*x_0
    f[1] = p_1*x_1
    f[2] = p_2*x_2
    f[3] = p_3*x_3
    f[4] = p_3*x_4 + y_3

    #return f

In [97]:
%timeit hello_nb(params,x,y,u,f)

969 ns ± 10.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [95]:
f

array([ 37864.76806858,  31093.31153325, 177199.72251311,  75276.28071601,
        88879.80456878,      0.        ])