In [1]:
import numpy as np
from numba import jit
from numpy.typing import ArrayLike

In [2]:
def without_numba(x: ArrayLike) -> ArrayLike:
    trace = .0
    for i in range(x.shape[0]):
        trace += np.tanh(x[i, i])
    return x + trace

In [3]:
@jit(nopython=True)
def with_numba(x: ArrayLike) -> ArrayLike:
    trace = .0
    for i in range(x.shape[0]):
        trace += np.tanh(x[i, i])
    return x + trace

In [4]:
x = np.arange(1000000).reshape(1000, 1000)

In [5]:
%%time
_ = without_numba(x)

CPU times: user 5.11 ms, sys: 12.9 ms, total: 18 ms
Wall time: 83.6 ms


In [6]:
%%timeit
_ = without_numba(x)

4.26 ms ± 1.09 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%%time
_ = with_numba(x)

CPU times: user 692 ms, sys: 189 ms, total: 881 ms
Wall time: 1.69 s


In [8]:
%%timeit
_ = with_numba(x)

1.21 ms ± 258 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
