In [1]:
from numba import jit
import numpy as np
from numpy.typing import ArrayLike


In [2]:
def without_numba(x: ArrayLike) -> ArrayLike:
  trace = .0
  for i in range(x.shape[0]):
    trace += np.tanh(x[i, i])
  return x + trace

In [3]:
@jit
def with_numba(x: ArrayLike) -> ArrayLike:
  trace = .0
  for i in range(x.shape[0]):
    trace += np.tanh(x[i, i])
  return x + trace

In [6]:
x = np.arange(10000).reshape(100, 100)

In [7]:
%%time
_ = without_numba(x)

CPU times: user 323 μs, sys: 31 μs, total: 354 μs
Wall time: 351 μs


In [8]:
%%time
_ = without_numba(x)

CPU times: user 379 μs, sys: 51 μs, total: 430 μs
Wall time: 410 μs


In [9]:
%%time
_ = with_numba(x)

CPU times: user 186 ms, sys: 12.1 ms, total: 198 ms
Wall time: 237 ms


In [10]:
%%time
_ = with_numba(x)

CPU times: user 49 μs, sys: 10 μs, total: 59 μs
Wall time: 62.9 μs


In [23]:
%%timeit -n 100000
_ = without_numba(x)

59.4 μs ± 1.12 μs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [22]:
%%timeit -n 100000 
_ = with_numba(x)

1.68 μs ± 34.8 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
