In [2]:
from numba import jit
import numpy as np
from numpy.typing import ArrayLike


In [3]:
def without_numba(x: ArrayLike) -> ArrayLike:
  trace = .0
  for i in range(x.shape[0]):
    trace += np.tanh(x[i, i])
  return x + trace

In [4]:
@jit
def with_numba(x: ArrayLike) -> ArrayLike:
  trace = .0
  for i in range(x.shape[0]):
    trace += np.tanh(x[i, i])
  return x + trace

In [5]:
x = np.arange(10000).reshape(100, 100)

In [9]:
%%time
_ = without_numba(x)

CPU times: user 281 μs, sys: 59 μs, total: 340 μs
Wall time: 364 μs


In [11]:
%%timeit
_ = without_numba(x)

120 μs ± 11.7 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [10]:
%%time
_ = with_numba(x)

CPU times: user 302 ms, sys: 50.8 ms, total: 353 ms
Wall time: 545 ms


In [12]:
%%time
_ = with_numba(x)

CPU times: user 24 μs, sys: 73 μs, total: 97 μs
Wall time: 107 μs


In [23]:
%%timeit -n 100000
_ = without_numba(x)

59.4 μs ± 1.12 μs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [13]:
%%timeit
_ = with_numba(x)

3.72 μs ± 602 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
