In [2]:
from numba import jit
import numpy as np

In [3]:
x = np.arange(100).reshape(10, 10)

@jit(nopython=True)
def go_fast(a):
    trace = 0.0
    for i in range(a.shape[0]):
        trace += np.tanh(a[i, i])
    return a + trace

In [4]:
print(go_fast(x))

[[  9.  10.  11.  12.  13.  14.  15.  16.  17.  18.]
 [ 19.  20.  21.  22.  23.  24.  25.  26.  27.  28.]
 [ 29.  30.  31.  32.  33.  34.  35.  36.  37.  38.]
 [ 39.  40.  41.  42.  43.  44.  45.  46.  47.  48.]
 [ 49.  50.  51.  52.  53.  54.  55.  56.  57.  58.]
 [ 59.  60.  61.  62.  63.  64.  65.  66.  67.  68.]
 [ 69.  70.  71.  72.  73.  74.  75.  76.  77.  78.]
 [ 79.  80.  81.  82.  83.  84.  85.  86.  87.  88.]
 [ 89.  90.  91.  92.  93.  94.  95.  96.  97.  98.]
 [ 99. 100. 101. 102. 103. 104. 105. 106. 107. 108.]]


In [5]:
from numba import jit
import numpy as np
import time

x = np.arange(100).reshape(10, 10)

@jit(nopython=True)
def go_fast(a): # Function is compiled and runs in machine code
    trace = 0.0
    for i in range(a.shape[0]):
        trace += np.tanh(a[i, i])
    return a + trace

# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.perf_counter()
go_fast(x)
end = time.perf_counter()
print("Elapsed (with compilation) = {}s".format((end - start)))

# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.perf_counter()
go_fast(x)
end = time.perf_counter()
print("Elapsed (after compilation) = {}s".format((end - start)))

Elapsed (with compilation) = 0.10262733399999746s
Elapsed (after compilation) = 4.0999999995960934e-05s


In [11]:
def inner_rows(C,A,B):
    for i in range(len(A)):
        for j in range(len(A)):
            C[i,j] = A[i,j] + B[i,j]

def inner_cols(C,A,B):
    for j in range(len(A)):
        for i in range(len(A)):
            C[i,j] = A[i,j] + B[i,j]

def inner_alloc(C,A,B):
    for i in range(len(A)):
        for j in range(len(A)):
            val = [A[i,j] + B[i,j]]
            C[i,j] = val[0]


In [7]:
A = np.random.rand(100,100)
B = np.random.rand(100,100)
C = np.random.rand(100,100)


In [12]:
%timeit inner_rows(C,A,B)
%timeit inner_cols(C,A,B)
%timeit inner_alloc(C,A,B)

3.23 ms ± 67.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.27 ms ± 78.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.71 ms ± 71.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
