In [1]:
import numpy as np
from numba import jit
import numexpr as ne

a = np.random.uniform(size=(10,10,10,100))
b = np.random.uniform(size=(10,10,10,100))
print("numpy adding:")
%timeit 2 * a + 3 * b
%timeit sum(2 * a + 3 * b)
print("numexpr adding:")
%timeit ne.evaluate("2 * a + 3 * b")
%timeit ne.evaluate("sum(2 * a + 3 * b)")
print("numba adding:")
from numba import jit
from numpy import arange

# jit decorator tells Numba to compile this function.
# The argument types will be inferred by Numba when function is called.
@jit
def sum2arr_res(a, b):
    M, N, O, P = a.shape
    result = 0.0
    for i in range(M):
        for j in range(N):
            for k in range(O):
                for l in range(P):
                        result += 2 * a[i,j,k,l] + 3 * b[i,j,k,l]
    return result

@jit
def sum2arr(a,b):
    2 * a + 3 * b
%timeit sum2arr(a, b)
%timeit sum2arr_res(a, b)

numpy adding:
2.65 ms ± 444 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.37 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
numexpr adding:
The slowest run took 8.73 times longer than the fastest. This could mean that an intermediate result is being cached.
1.09 ms ± 671 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
820 µs ± 180 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
numba adding:
933 µs ± 90.8 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
161 µs ± 45.6 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
